diff options
author | Andrej Shadura <andrewsh@debian.org> | 2019-03-09 22:30:28 +0000 |
---|---|---|
committer | Andrej Shadura <andrewsh@debian.org> | 2019-03-09 22:30:28 +0000 |
commit | cde76aeb42240f7270bc6605c606ae07d2dc5a7d (patch) | |
tree | c3ebf1d7745224f524da31dbabc5d76b9ea75916 /src/de/lmu/ifi/dbs/elki/math/linearalgebra |
Import Upstream version 0.4.0~beta1
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/math/linearalgebra')
58 files changed, 11447 insertions, 0 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/AffineTransformation.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/AffineTransformation.java new file mode 100644 index 00000000..9deea281 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/AffineTransformation.java @@ -0,0 +1,402 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * Affine transformations implemented using homogeneous coordinates. + * + * The use of homogeneous coordinates allows the combination of multiple affine + * transformations (rotations, translations, scaling) into a single matrix + * operation (of dimensionality dim+1), and also the construction of an inverse + * transformation. + * + * @author Erich Schubert + * + * @apiviz.composedOf Matrix + * @apiviz.uses Matrix + * @apiviz.uses Vector + */ +public class AffineTransformation { + /** + * the dimensionality of the transformation + */ + private int dim; + + /** + * The transformation matrix of dim+1 x dim+1 for homogeneous coordinates + */ + private Matrix trans; + + /** + * the inverse transformation + */ + private Matrix inv = null; + + /** + * Constructor for an identity transformation. + * + * @param dim dimensionality + */ + public AffineTransformation(int dim) { + super(); + this.dim = dim; + this.trans = Matrix.unitMatrix(dim + 1); + } + + /** + * Trivial constructor with all fields, mostly for cloning + * + * @param dim dimensionality + * @param trans transformation matrix + * @param inv inverse matrix + */ + public AffineTransformation(int dim, Matrix trans, Matrix inv) { + super(); + this.dim = dim; + this.trans = trans; + this.inv = inv; + } + + /** + * Generate a transformation that reorders axes in the given way. + * + * The list of axes to be used should not contain duplicates, or the resulting + * matrix will not be invertible. It does not have to be complete however, in + * particular an empty list will result in the identity transform: unmentioned + * axes will be appended in their original order. + * + * @param dim Dimensionality of vector space (resulting Matrix will be dim+1 x + * dim+1) + * @param axes (Partial) list of axes + * @return new transformation to do the requested reordering + */ + public static AffineTransformation reorderAxesTransformation(int dim, int[] axes) { + Matrix m = Matrix.zeroMatrix(dim + 1); + // insert ones appropriately: + for(int i = 0; i < axes.length; i++) { + assert (0 < axes[i] && axes[i] <= dim); + m.set(i, axes[i] - 1, 1.0); + } + int useddim = 1; + for(int i = axes.length; i < dim + 1; i++) { + // find next "unused" dimension. + { + boolean search = true; + while(search) { + search = false; + for(int a : axes) { + if(a == useddim) { + search = true; + useddim++; + break; + } + } + } + } + m.set(i, useddim - 1, 1.0); + useddim++; + } + assert (useddim - 2 == dim); + return new AffineTransformation(dim, m, null); + } + + /** + * Return a clone of the affine transformation + * + * @return cloned affine transformation + */ + @Override + public AffineTransformation clone() { + // Note that we're NOT using copied matrices here, since this class + // supposedly never modifies it's matrixes but replaces them with new + // ones. Thus it is safe to re-use it for a cloned copy. + return new AffineTransformation(this.dim, this.trans, this.inv); + } + + /** + * Query dimensionality of the transformation. + * + * @return dimensionality + */ + public int getDimensionality() { + return dim; + } + + /** + * Add a translation operation to the matrix + * + * @param v translation vector + */ + public void addTranslation(Vector v) { + assert (v.getRowDimensionality() == dim); + + // reset inverse transformation - needs recomputation. + inv = null; + + Matrix homTrans = Matrix.unitMatrix(dim + 1); + for(int i = 0; i < dim; i++) { + homTrans.set(i, dim, v.get(i)); + } + trans = homTrans.times(trans); + } + + /** + * Add a matrix operation to the matrix. + * + * Be careful to use only invertible matrices if you want an invertible affine + * transformation. + * + * @param m matrix (should be invertible) + */ + public void addMatrix(Matrix m) { + assert (m.getRowDimensionality() == dim); + assert (m.getColumnDimensionality() == dim); + + // reset inverse transformation - needs recomputation. + inv = null; + + // extend the matrix with an extra row and column + double[][] ht = new double[dim + 1][dim + 1]; + for(int i = 0; i < dim; i++) { + for(int j = 0; j < dim; j++) { + ht[i][j] = m.get(i, j); + } + } + // the other cells default to identity matrix + ht[dim][dim] = 1.0; + // Multiply from left. + trans = new Matrix(ht).times(trans); + } + + /** + * Convenience function to apply a rotation in 2 dimensions. + * + * @param axis1 first dimension + * @param axis2 second dimension + * @param angle rotation angle in radians. + */ + public void addRotation(int axis1, int axis2, double angle) { + // TODO: throw an exception instead of using assert + assert (axis1 >= 0); + assert (axis1 < dim); + assert (axis1 >= 0); + assert (axis2 < dim); + assert (axis1 != axis2); + + // reset inverse transformation - needs recomputation. + inv = null; + + double[][] ht = new double[dim + 1][dim + 1]; + // identity matrix + for(int i = 0; i < dim + 1; i++) { + ht[i][i] = 1.0; + } + // insert rotation values + ht[axis1][axis1] = +Math.cos(angle); + ht[axis1][axis2] = -Math.sin(angle); + ht[axis2][axis1] = +Math.sin(angle); + ht[axis2][axis2] = +Math.cos(angle); + // Multiply from left + trans = new Matrix(ht).times(trans); + } + + /** + * Add a reflection along the given axis. + * + * @param axis Axis number to do the reflection at. + */ + public void addAxisReflection(int axis) { + assert (0 < axis && axis <= dim); + // reset inverse transformation - needs recomputation. + inv = null; + + // Formal: + // Matrix homTrans = Matrix.unitMatrix(dim + 1); + // homTrans.set(axis - 1, axis - 1, -1); + // trans = homTrans.times(trans); + // Faster: + for(int i = 0; i <= dim; i++) { + trans.set(axis - 1, i, -trans.get(axis - 1, i)); + } + } + + /** + * Simple linear (symmetric) scaling. + * + * @param scale Scaling factor + */ + public void addScaling(double scale) { + // invalidate inverse + inv = null; + // Note: last ROW is not included. + for(int i = 0; i < dim; i++) { + for(int j = 0; j <= dim; j++) { + trans.set(i, j, trans.get(i, j) * scale); + } + } + // As long as relative vectors aren't used, this would also work: + // trans.set(dim, dim, trans.get(dim, dim) / scale); + } + + /** + * Get a copy of the transformation matrix + * + * @return copy of the transformation matrix + */ + public Matrix getTransformation() { + return trans.copy(); + } + + /** + * Get a copy of the inverse matrix + * + * @return a copy of the inverse transformation matrix + */ + public Matrix getInverse() { + if(inv == null) { + updateInverse(); + } + return inv.copy(); + } + + /** + * Compute the inverse transformation matrix + */ + private void updateInverse() { + inv = trans.inverse(); + } + + /** + * Transform an absolute vector into homogeneous coordinates. + * + * @param v initial vector + * @return vector of dim+1, with new column having the value 1.0 + */ + public Vector homogeneVector(Vector v) { + assert (v.getRowDimensionality() == dim); + double[] dv = new double[dim + 1]; + for(int i = 0; i < dim; i++) { + dv[i] = v.get(i); + } + dv[dim] = 1.0; + return new Vector(dv); + } + + /** + * Transform a relative vector into homogeneous coordinates. + * + * @param v initial vector + * @return vector of dim+1, with new column having the value 0.0 + */ + public Vector homogeneRelativeVector(Vector v) { + assert (v.getRowDimensionality() == dim); + // TODO: this only works properly when trans[dim][dim] == 1.0, right? + double[] dv = new double[dim + 1]; + for(int i = 0; i < dim; i++) { + dv[i] = v.get(i); + } + dv[dim] = 0.0; + return new Vector(dv); + } + + /** + * Project an homogeneous vector back into the original space. + * + * @param v Matrix of 1 x dim+1 containing the homogeneous vector + * @return vector of dimension dim + */ + public Vector unhomogeneVector(Vector v) { + assert (v.getRowDimensionality() == dim + 1); + // TODO: this only works properly when trans[dim][dim] == 1.0, right? + double[] dv = new double[dim]; + double scale = v.get(dim); + assert (Math.abs(scale) > 0.0); + for(int i = 0; i < dim; i++) { + dv[i] = v.get(i) / scale; + } + return new Vector(dv); + } + + /** + * Project an homogeneous vector back into the original space. + * + * @param v Matrix of 1 x dim+1 containing the homogeneous vector + * @return vector of dimension dim + */ + public Vector unhomogeneRelativeVector(Vector v) { + assert (v.getRowDimensionality() == dim + 1); + double[] dv = new double[dim]; + double scale = v.get(dim); + assert (Math.abs(scale) == 0.0); + for(int i = 0; i < dim; i++) { + dv[i] = v.get(i); + } + return new Vector(dv); + } + + /** + * Apply the transformation onto a vector + * + * @param v vector of dimensionality dim + * @return transformed vector of dimensionality dim + */ + public Vector apply(Vector v) { + return unhomogeneVector(trans.times(homogeneVector(v))); + } + + /** + * Apply the inverse transformation onto a vector + * + * @param v vector of dimensionality dim + * @return transformed vector of dimensionality dim + */ + public Vector applyInverse(Vector v) { + if(inv == null) { + updateInverse(); + } + return unhomogeneVector(inv.times(homogeneVector(v))); + } + + /** + * Apply the transformation onto a vector + * + * @param v vector of dimensionality dim + * @return transformed vector of dimensionality dim + */ + public Vector applyRelative(Vector v) { + return unhomogeneRelativeVector(trans.times(homogeneRelativeVector(v))); + } + + /** + * Apply the inverse transformation onto a vector + * + * @param v vector of dimensionality dim + * @return transformed vector of dimensionality dim + */ + public Vector applyRelativeInverse(Vector v) { + if(inv == null) { + updateInverse(); + } + return unhomogeneRelativeVector(inv.times(homogeneRelativeVector(v))); + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/Centroid.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/Centroid.java new file mode 100644 index 00000000..76fe15d1 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/Centroid.java @@ -0,0 +1,194 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +import de.lmu.ifi.dbs.elki.data.NumberVector; +import de.lmu.ifi.dbs.elki.database.ids.DBID; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil; + +/** + * Class to compute the centroid of some data. + * + * Note: this class abstracts the efficient and numerical stable computation of + * centroids. + * + * See {@link de.lmu.ifi.dbs.elki.utilities.DatabaseUtil DatabaseUtil} for + * easier to use APIs. + * + * @author Erich Schubert + */ +public class Centroid extends Vector { + /** + * Serial version + */ + private static final long serialVersionUID = 1L; + + /** + * The current weight + */ + protected double wsum; + + /** + * Constructor. + * + * @param dim Dimensionality + */ + public Centroid(int dim) { + super(dim); + this.wsum = 0; + } + + /** + * Add a single value with weight 1.0 + * + * @param val Value + */ + public void put(double[] val) { + assert (val.length == elements.length); + wsum += 1.0; + for(int i = 0; i < elements.length; i++) { + final double delta = val[i] - elements[i]; + elements[i] += delta / wsum; + } + } + + /** + * Add data with a given weight. + * + * @param val data + * @param weight weight + */ + public void put(double val[], double weight) { + assert (val.length == elements.length); + final double nwsum = weight + wsum; + for(int i = 0; i < elements.length; i++) { + final double delta = val[i] - elements[i]; + final double rval = delta * weight / nwsum; + elements[i] += rval; + } + wsum = nwsum; + } + + /** + * Add a single value with weight 1.0 + * + * @param val Value + */ + public final void put(Vector val) { + put(val.getArrayRef()); + } + + /** + * Add data with a given weight. + * + * @param val data + * @param weight weight + */ + public final void put(Vector val, double weight) { + put(val.getArrayRef(), weight); + } + + /** + * Add a single value with weight 1.0 + * + * @param val Value + */ + public void put(NumberVector<?, ?> val) { + assert (val.getDimensionality() == elements.length); + wsum += 1.0; + for(int i = 0; i < elements.length; i++) { + final double delta = val.doubleValue(i + 1) - elements[i]; + elements[i] += delta / wsum; + } + } + + /** + * Add data with a given weight. + * + * @param val data + * @param weight weight + */ + public void put(NumberVector<?, ?> val, double weight) { + assert (val.getDimensionality() == elements.length); + final double nwsum = weight + wsum; + for(int i = 0; i < elements.length; i++) { + final double delta = val.doubleValue(i + 1) - elements[i]; + final double rval = delta * weight / nwsum; + elements[i] += rval; + } + wsum = nwsum; + } + + /** + * Get the data as vector + * + * @return the data + */ + public <F extends NumberVector<? extends F, ?>> F toVector(Relation<? extends F> relation) { + return DatabaseUtil.assumeVectorField(relation).getFactory().newInstance(elements); + } + + /** + * Static Constructor from an existing matrix columns. + * + * @param mat Matrix to use the columns from. + */ + public static Centroid make(Matrix mat) { + Centroid c = new Centroid(mat.getRowDimensionality()); + int n = mat.getColumnDimensionality(); + for(int i = 0; i < n; i++) { + // TODO: avoid constructing the vector objects? + c.put(mat.getColumnVector(i)); + } + return c; + } + + /** + * Static constructor from an existing relation. + * + * @param relation Relation to use + * @return Centroid of relation + */ + public static Centroid make(Relation<? extends NumberVector<?, ?>> relation) { + Centroid c = new Centroid(DatabaseUtil.dimensionality(relation)); + for(DBID id : relation.iterDBIDs()) { + c.put(relation.get(id)); + } + return c; + } + + /** + * Static constructor from an existing relation. + * + * @param relation Relation to use + * @param ids IDs to use + */ + public static Centroid make(Relation<? extends NumberVector<?, ?>> relation, Iterable<DBID> ids) { + Centroid c = new Centroid(DatabaseUtil.dimensionality(relation)); + for(DBID id : ids) { + c.put(relation.get(id)); + } + return c; + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/CholeskyDecomposition.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/CholeskyDecomposition.java new file mode 100644 index 00000000..c72b1245 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/CholeskyDecomposition.java @@ -0,0 +1,166 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * Cholesky Decomposition. + * <P> + * For a symmetric, positive definite matrix A, the Cholesky decomposition is an + * lower triangular matrix L so that A = L*L'. + * <P> + * If the matrix is not symmetric or positive definite, the constructor returns + * a partial decomposition and sets an internal flag that may be queried by the + * isSPD() method. + * + * @apiviz.uses Matrix - - transforms + */ +@SuppressWarnings("serial") +public class CholeskyDecomposition implements java.io.Serializable { + /** + * Array for internal storage of decomposition. + * + * @serial internal array storage. + */ + private double[][] L; + + /** + * Row and column dimension (square matrix). + * + * @serial matrix dimension. + */ + private int n; + + /** + * Symmetric and positive definite flag. + * + * @serial is symmetric and positive definite flag. + */ + private boolean isspd; + + /* + * ------------------------ Constructor ------------------------ + */ + + /** + * Cholesky algorithm for symmetric and positive definite matrix. + * + * @param Arg Square, symmetric matrix. + * + */ + public CholeskyDecomposition(Matrix Arg) { + // Initialize. + double[][] A = Arg.getArrayRef(); + n = Arg.getRowDimensionality(); + L = new double[n][n]; + isspd = (Arg.getColumnDimensionality() == n); + // Main loop. + for(int j = 0; j < n; j++) { + double[] Lrowj = L[j]; + double d = 0.0; + for(int k = 0; k < j; k++) { + double[] Lrowk = L[k]; + double s = 0.0; + for(int i = 0; i < k; i++) { + s += Lrowk[i] * Lrowj[i]; + } + Lrowj[k] = s = (A[j][k] - s) / L[k][k]; + d = d + s * s; + isspd = isspd & (A[k][j] == A[j][k]); + } + d = A[j][j] - d; + isspd = isspd & (d > 0.0); + L[j][j] = Math.sqrt(Math.max(d, 0.0)); + for(int k = j + 1; k < n; k++) { + L[j][k] = 0.0; + } + } + } + + /* + * ------------------------ Public Methods ------------------------ + */ + + /** + * Is the matrix symmetric and positive definite? + * + * @return true if A is symmetric and positive definite. + */ + public boolean isSPD() { + return isspd; + } + + /** + * Return triangular factor. + * + * @return L + */ + public Matrix getL() { + return new Matrix(L); + } + + /** + * Solve A*X = B + * + * @param B A Matrix with as many rows as A and any number of columns. + * @return X so that L*L'*X = B + * @exception IllegalArgumentException Matrix row dimensions must agree. + * @exception RuntimeException Matrix is not symmetric positive definite. + */ + public Matrix solve(Matrix B) { + if(B.getRowDimensionality() != n) { + throw new IllegalArgumentException("Matrix row dimensions must agree."); + } + if(!isspd) { + throw new RuntimeException("Matrix is not symmetric positive definite."); + } + + // Copy right hand side. + double[][] X = B.getArrayCopy(); + int nx = B.getColumnDimensionality(); + + // Solve L*Y = B; + for(int k = 0; k < n; k++) { + for(int i = k + 1; i < n; i++) { + for(int j = 0; j < nx; j++) { + X[i][j] -= X[k][j] * L[i][k]; + } + } + for(int j = 0; j < nx; j++) { + X[k][j] /= L[k][k]; + } + } + + // Solve L'*X = Y; + for(int k = n - 1; k >= 0; k--) { + for(int j = 0; j < nx; j++) { + X[k][j] /= L[k][k]; + } + for(int i = 0; i < k; i++) { + for(int j = 0; j < nx; j++) { + X[i][j] -= X[k][j] * L[k][i]; + } + } + } + return new Matrix(X); + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/CovarianceMatrix.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/CovarianceMatrix.java new file mode 100644 index 00000000..03b3c18d --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/CovarianceMatrix.java @@ -0,0 +1,365 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +import de.lmu.ifi.dbs.elki.data.NumberVector; +import de.lmu.ifi.dbs.elki.database.ids.DBID; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil; + +/** + * Class for computing covariance matrixes using stable mean and variance + * computations. + * + * This class encapsulates the mathematical aspects of computing this matrix. + * + * See {@link de.lmu.ifi.dbs.elki.utilities.DatabaseUtil DatabaseUtil} for + * easier to use APIs. + * + * For use in algorithms, it is more appropriate to use + * {@link de.lmu.ifi.dbs.elki.math.linearalgebra.pca.StandardCovarianceMatrixBuilder StandardCovarianceMatrixBuilder} + * since this class can be overriden with a stabilized covariance matrix builder! + * + * @author Erich Schubert + * + * @apiviz.uses Vector oneway + * @apiviz.uses NumberVector oneway + * @apiviz.has Matrix oneway - - «produces» + */ +public class CovarianceMatrix { + /** + * The means + */ + double[] mean; + + /** + * The covariance matrix + */ + double[][] elements; + + /** + * Temporary storage, to avoid reallocations + */ + double[] nmea; + + /** + * The current weight + */ + protected double wsum; + + /** + * Constructor. + * + * @param dim Dimensionality + */ + public CovarianceMatrix(int dim) { + super(); + this.mean = new double[dim]; + this.nmea = new double[dim]; + this.elements = new double[dim][dim]; + this.wsum = 0.0; + } + + /** + * Add a single value with weight 1.0 + * + * @param val Value + */ + public void put(double[] val) { + assert (val.length == mean.length); + final double nwsum = wsum + 1.0; + // Compute new means + for(int i = 0; i < mean.length; i++) { + final double delta = val[i] - mean[i]; + nmea[i] = mean[i] + delta / nwsum; + } + // Update covariance matrix + for(int i = 0; i < mean.length; i++) { + for(int j = i; j < mean.length; j++) { + // We DO want to use the new mean once and the old mean once! + // It does not matter which one is which. + double delta = (val[i] - nmea[i]) * (val[j] - mean[j]); + elements[i][j] = elements[i][j] + delta; + // Optimize via symmetry + if(i != j) { + elements[j][i] = elements[j][i] + delta; + } + } + } + + // Use new values. + wsum = nwsum; + System.arraycopy(nmea, 0, mean, 0, nmea.length); + } + + /** + * Add data with a given weight. + * + * @param val data + * @param weight weight + */ + public void put(double val[], double weight) { + assert (val.length == mean.length); + final double nwsum = wsum + weight; + // Compute new means + for(int i = 0; i < mean.length; i++) { + final double delta = val[i] - mean[i]; + final double rval = delta * weight / nwsum; + nmea[i] = mean[i] + rval; + } + // Update covariance matrix + for(int i = 0; i < mean.length; i++) { + for(int j = i; j < mean.length; j++) { + // We DO want to use the new mean once and the old mean once! + // It does not matter which one is which. + double delta = (val[i] - nmea[i]) * (val[j] - mean[j]) * weight; + elements[i][j] = elements[i][j] + delta; + // Optimize via symmetry + if(i != j) { + elements[j][i] = elements[j][i] + delta; + } + } + } + + // Use new values. + wsum = nwsum; + System.arraycopy(nmea, 0, mean, 0, nmea.length); + } + + /** + * Add a single value with weight 1.0 + * + * @param val Value + */ + public final void put(Vector val) { + put(val.getArrayRef()); + } + + /** + * Add data with a given weight. + * + * @param val data + * @param weight weight + */ + public final void put(Vector val, double weight) { + put(val.getArrayRef(), weight); + } + + /** + * Add a single value with weight 1.0 + * + * @param val Value + */ + public void put(NumberVector<?, ?> val) { + assert (val.getDimensionality() == mean.length); + final double nwsum = wsum + 1.0; + // Compute new means + for(int i = 0; i < mean.length; i++) { + final double delta = val.doubleValue(i + 1) - mean[i]; + nmea[i] = mean[i] + delta / nwsum; + } + // Update covariance matrix + for(int i = 0; i < mean.length; i++) { + for(int j = i; j < mean.length; j++) { + // We DO want to use the new mean once and the old mean once! + // It does not matter which one is which. + double delta = (val.doubleValue(i + 1) - nmea[i]) * (val.doubleValue(j + 1) - mean[j]); + elements[i][j] = elements[i][j] + delta; + // Optimize via symmetry + if(i != j) { + elements[j][i] = elements[j][i] + delta; + } + } + } + // Use new values. + wsum = nwsum; + System.arraycopy(nmea, 0, mean, 0, nmea.length); + } + + /** + * Add data with a given weight. + * + * @param val data + * @param weight weight + */ + public void put(NumberVector<?, ?> val, double weight) { + assert (val.getDimensionality() == mean.length); + final double nwsum = wsum + weight; + // Compute new means + for(int i = 0; i < mean.length; i++) { + final double delta = val.doubleValue(i + 1) - mean[i]; + final double rval = delta * weight / nwsum; + nmea[i] = mean[i] + rval; + } + // Update covariance matrix + for(int i = 0; i < mean.length; i++) { + for(int j = i; j < mean.length; j++) { + // We DO want to use the new mean once and the old mean once! + // It does not matter which one is which. + double delta = (val.doubleValue(i + 1) - nmea[i]) * (val.doubleValue(j + 1) - mean[j]) * weight; + elements[i][j] = elements[i][j] + delta; + // Optimize via symmetry + if(i != j) { + elements[j][i] = elements[j][i] + delta; + } + } + } + // Use new values. + wsum = nwsum; + System.arraycopy(nmea, 0, mean, 0, nmea.length); + } + + /** + * Get the mean as vector. + * + * @return Mean vector + */ + public Vector getMeanVector() { + return new Vector(mean); + } + + /** + * Get the mean as vector. + * + * @return Mean vector + */ + public <F extends NumberVector<? extends F, ?>> F getMeanVector(Relation<? extends F> relation) { + return DatabaseUtil.assumeVectorField(relation).getFactory().newInstance(mean); + } + + /** + * Obtain the covariance matrix according to the sample statistics: (n-1) + * degrees of freedom. + * + * This method duplicates the matrix contents, so it does allow further + * updates. Use {@link #destroyToSampleMatrix()} if you do not need further + * updates. + * + * @return New matrix + */ + public Matrix makeSampleMatrix() { + if(wsum <= 1.0) { + throw new IllegalStateException("Too few elements used to obtain a valid covariance matrix."); + } + Matrix mat = new Matrix(elements); + return mat.times(1.0 / (wsum - 1)); + } + + /** + * Obtain the covariance matrix according to the population statistics: n + * degrees of freedom. + * + * This method duplicates the matrix contents, so it does allow further + * updates. Use {@link #destroyToNaiveMatrix()} if you do not need further + * updates. + * + * @return New matrix + */ + public Matrix makeNaiveMatrix() { + if(wsum <= 0.0) { + throw new IllegalStateException("Too few elements used to obtain a valid covariance matrix."); + } + Matrix mat = new Matrix(elements); + return mat.times(1.0 / wsum); + } + + /** + * Obtain the covariance matrix according to the sample statistics: (n-1) + * degrees of freedom. + * + * This method doesn't require matrix duplication, but will not allow further + * updates, the object should be discarded. Use {@link #makeSampleMatrix()} if + * you want to perform further updates. + * + * @return New matrix + */ + public Matrix destroyToSampleMatrix() { + if(wsum <= 1.0) { + throw new IllegalStateException("Too few elements used to obtain a valid covariance matrix."); + } + Matrix mat = new Matrix(elements).timesEquals(1.0 / (wsum - 1)); + this.elements = null; + return mat; + } + + /** + * Obtain the covariance matrix according to the population statistics: n + * degrees of freedom. + * + * This method doesn't require matrix duplication, but will not allow further + * updates, the object should be discarded. Use {@link #makeNaiveMatrix()} if + * you want to perform further updates. + * + * @return New matrix + */ + public Matrix destroyToNaiveMatrix() { + if(wsum <= 0.0) { + throw new IllegalStateException("Too few elements used to obtain a valid covariance matrix."); + } + Matrix mat = new Matrix(elements).timesEquals(1.0 / wsum); + this.elements = null; + return mat; + } + + /** + * Static Constructor. + * + * @param mat Matrix to use the columns of + */ + public static CovarianceMatrix make(Matrix mat) { + CovarianceMatrix c = new CovarianceMatrix(mat.getRowDimensionality()); + int n = mat.getColumnDimensionality(); + for(int i = 0; i < n; i++) { + // TODO: avoid constructing the vector objects? + c.put(mat.getColumnVector(i)); + } + return c; + } + + /** + * Static Constructor from a full relation. + * + * @param relation Relation to use. + */ + public static CovarianceMatrix make(Relation<? extends NumberVector<?, ?>> relation) { + CovarianceMatrix c = new CovarianceMatrix(DatabaseUtil.dimensionality(relation)); + for(DBID id : relation.iterDBIDs()) { + c.put(relation.get(id)); + } + return c; + } + + /** + * Static Constructor from a full relation. + * + * @param relation Relation to use. + * @param ids IDs to add + */ + public static CovarianceMatrix make(Relation<? extends NumberVector<?, ?>> relation, Iterable<DBID> ids) { + CovarianceMatrix c = new CovarianceMatrix(DatabaseUtil.dimensionality(relation)); + for(DBID id : ids) { + c.put(relation.get(id)); + } + return c; + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/EigenPair.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/EigenPair.java new file mode 100644 index 00000000..6ee04aa5 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/EigenPair.java @@ -0,0 +1,105 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +import de.lmu.ifi.dbs.elki.utilities.FormatUtil; + +/** + * Helper class which encapsulates an eigenvector and its corresponding + * eigenvalue. This class is used to sort eigenpairs. + * + * @author Elke Achtert + * + * @apiviz.composedOf Matrix + */ +public class EigenPair implements Comparable<EigenPair> { + /** + * The eigenvector as a matrix. + */ + private Vector eigenvector; + + /** + * The corresponding eigenvalue. + */ + private double eigenvalue; + + /** + * Creates a new EigenPair object. + * + * @param eigenvector the eigenvector as a matrix + * @param eigenvalue the corresponding eigenvalue + */ + public EigenPair(Vector eigenvector, double eigenvalue) { + this.eigenvalue = eigenvalue; + this.eigenvector = eigenvector; + } + + /** + * Compares this object with the specified object for order. Returns a + * negative integer, zero, or a positive integer as this object's eigenvalue + * is greater than, equal to, or less than the specified object's eigenvalue. + * + * @param o the Eigenvector to be compared. + * @return a negative integer, zero, or a positive integer as this object's + * eigenvalue is greater than, equal to, or less than the specified + * object's eigenvalue. + */ + @Override + public int compareTo(EigenPair o) { + if(this.eigenvalue < o.eigenvalue) { + return -1; + } + if(this.eigenvalue > o.eigenvalue) { + return +1; + } + return 0; + } + + /** + * Returns the eigenvector. + * + * @return the eigenvector + */ + public Vector getEigenvector() { + return eigenvector; + } + + /** + * Returns the eigenvalue. + * + * @return the eigenvalue + */ + public double getEigenvalue() { + return eigenvalue; + } + + /** + * Returns a string representation of this EigenPair. + * + * @return a string representation of this EigenPair + */ + @Override + public String toString() { + return "(ew = " + FormatUtil.format(eigenvalue) + ", ev = [" + FormatUtil.format(eigenvector) + "])"; + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/EigenvalueDecomposition.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/EigenvalueDecomposition.java new file mode 100644 index 00000000..7c018ea0 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/EigenvalueDecomposition.java @@ -0,0 +1,1018 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +import de.lmu.ifi.dbs.elki.math.MathUtil; + +/** + * Eigenvalues and eigenvectors of a real matrix. + * <P> + * If A is symmetric, then A = V*D*V' where the eigenvalue matrix D is diagonal + * and the eigenvector matrix V is orthogonal. I.e. A = + * V.times(D.timesTranspose(V)) and V.timesTranspose(V) equals the identity + * matrix. + * <P> + * If A is not symmetric, then the eigenvalue matrix D is block diagonal with + * the real eigenvalues in 1-by-1 blocks and any complex eigenvalues, lambda + + * i*mu, in 2-by-2 blocks, [lambda, mu; -mu, lambda]. The columns of V represent + * the eigenvectors in the sense that A*V = V*D, i.e. A.times(V) equals + * V.times(D). The matrix V may be badly conditioned, or even singular, so the + * validity of the equation A = V*D*inverse(V) depends upon V.cond(). + * + * @apiviz.uses Matrix - - transforms + */ +public class EigenvalueDecomposition implements java.io.Serializable { + /** + * Serial version + */ + private static final long serialVersionUID = 1L; + + /** + * Row and column dimension (square matrix). + * + * @serial matrix dimension. + */ + private int n; + + /** + * Symmetry flag. + * + * @serial internal symmetry flag. + */ + private boolean issymmetric; + + /** + * Arrays for internal storage of eigenvalues. + * + * @serial internal storage of eigenvalues. + */ + private double[] d, e; + + /** + * Array for internal storage of eigenvectors. + * + * @serial internal storage of eigenvectors. + */ + private double[][] V; + + /** + * Array for internal storage of nonsymmetric Hessenberg form. + * + * @serial internal storage of nonsymmetric Hessenberg form. + */ + private double[][] H; + + /** + * Working storage for nonsymmetric algorithm. + * + * @serial working storage for nonsymmetric algorithm. + */ + private double[] ort; + + /* + * ------------------------ Private Methods ------------------------ + */ + + // Symmetric Householder reduction to tridiagonal form. + private void tred2() { + + // This is derived from the Algol procedures tred2 by + // Bowdler, Martin, Reinsch, and Wilkinson, Handbook for + // Auto. Comp., Vol.ii-Linear Algebra, and the corresponding + // Fortran subroutine in EISPACK. + + for(int j = 0; j < n; j++) { + d[j] = V[n - 1][j]; + } + + // Householder reduction to tridiagonal form. + + for(int i = n - 1; i > 0; i--) { + + // Scale to avoid under/overflow. + + double scale = 0.0; + double h = 0.0; + for(int k = 0; k < i; k++) { + scale = scale + Math.abs(d[k]); + } + if(scale == 0.0) { + e[i] = d[i - 1]; + for(int j = 0; j < i; j++) { + d[j] = V[i - 1][j]; + V[i][j] = 0.0; + V[j][i] = 0.0; + } + } + else { + + // Generate Householder vector. + + for(int k = 0; k < i; k++) { + d[k] /= scale; + h += d[k] * d[k]; + } + double f = d[i - 1]; + double g = Math.sqrt(h); + if(f > 0) { + g = -g; + } + e[i] = scale * g; + h = h - f * g; + d[i - 1] = f - g; + for(int j = 0; j < i; j++) { + e[j] = 0.0; + } + + // Apply similarity transformation to remaining columns. + + for(int j = 0; j < i; j++) { + f = d[j]; + V[j][i] = f; + g = e[j] + V[j][j] * f; + for(int k = j + 1; k <= i - 1; k++) { + g += V[k][j] * d[k]; + e[k] += V[k][j] * f; + } + e[j] = g; + } + f = 0.0; + for(int j = 0; j < i; j++) { + e[j] /= h; + f += e[j] * d[j]; + } + double hh = f / (h + h); + for(int j = 0; j < i; j++) { + e[j] -= hh * d[j]; + } + for(int j = 0; j < i; j++) { + f = d[j]; + g = e[j]; + for(int k = j; k <= i - 1; k++) { + V[k][j] -= (f * e[k] + g * d[k]); + } + d[j] = V[i - 1][j]; + V[i][j] = 0.0; + } + } + d[i] = h; + } + + // Accumulate transformations. + + for(int i = 0; i < n - 1; i++) { + V[n - 1][i] = V[i][i]; + V[i][i] = 1.0; + double h = d[i + 1]; + if(h != 0.0) { + for(int k = 0; k <= i; k++) { + d[k] = V[k][i + 1] / h; + } + for(int j = 0; j <= i; j++) { + double g = 0.0; + for(int k = 0; k <= i; k++) { + g += V[k][i + 1] * V[k][j]; + } + for(int k = 0; k <= i; k++) { + V[k][j] -= g * d[k]; + } + } + } + for(int k = 0; k <= i; k++) { + V[k][i + 1] = 0.0; + } + } + for(int j = 0; j < n; j++) { + d[j] = V[n - 1][j]; + V[n - 1][j] = 0.0; + } + V[n - 1][n - 1] = 1.0; + e[0] = 0.0; + } + + // Symmetric tridiagonal QL algorithm. + + private void tql2() { + + // This is derived from the Algol procedures tql2, by + // Bowdler, Martin, Reinsch, and Wilkinson, Handbook for + // Auto. Comp., Vol.ii-Linear Algebra, and the corresponding + // Fortran subroutine in EISPACK. + + for(int i = 1; i < n; i++) { + e[i - 1] = e[i]; + } + e[n - 1] = 0.0; + + double f = 0.0; + double tst1 = 0.0; + double eps = Math.pow(2.0, -52.0); + for(int l = 0; l < n; l++) { + + // Find small subdiagonal element + + tst1 = Math.max(tst1, Math.abs(d[l]) + Math.abs(e[l])); + int m = l; + while(m < n) { + if(Math.abs(e[m]) <= eps * tst1) { + break; + } + m++; + } + + // If m == l, d[l] is an eigenvalue, + // otherwise, iterate. + + if(m > l) { + int iter = 0; + do { + iter = iter + 1; // (Could check iteration count here.) + + // Compute implicit shift + + double g = d[l]; + double p = (d[l + 1] - g) / (2.0 * e[l]); + double r = MathUtil.hypotenuse(p, 1.0); + if(p < 0) { + r = -r; + } + d[l] = e[l] / (p + r); + d[l + 1] = e[l] * (p + r); + double dl1 = d[l + 1]; + double h = g - d[l]; + for(int i = l + 2; i < n; i++) { + d[i] -= h; + } + f = f + h; + + // Implicit QL transformation. + + p = d[m]; + double c = 1.0; + double c2 = c; + double c3 = c; + double el1 = e[l + 1]; + double s = 0.0; + double s2 = 0.0; + for(int i = m - 1; i >= l; i--) { + c3 = c2; + c2 = c; + s2 = s; + g = c * e[i]; + h = c * p; + r = MathUtil.hypotenuse(p, e[i]); + e[i + 1] = s * r; + s = e[i] / r; + c = p / r; + p = c * d[i] - s * g; + d[i + 1] = h + s * (c * g + s * d[i]); + + // Accumulate transformation. + + for(int k = 0; k < n; k++) { + h = V[k][i + 1]; + V[k][i + 1] = s * V[k][i] + c * h; + V[k][i] = c * V[k][i] - s * h; + } + } + p = -s * s2 * c3 * el1 * e[l] / dl1; + e[l] = s * p; + d[l] = c * p; + + // Check for convergence. + + } + while(Math.abs(e[l]) > eps * tst1); + } + d[l] = d[l] + f; + e[l] = 0.0; + + } + + // Sort eigenvalues and corresponding vectors. + + for(int i = 0; i < n - 1; i++) { + int k = i; + double p = d[i]; + for(int j = i + 1; j < n; j++) { + if(d[j] < p) { + k = j; + p = d[j]; + } + } + if(k != i) { + d[k] = d[i]; + d[i] = p; + for(int j = 0; j < n; j++) { + p = V[j][i]; + V[j][i] = V[j][k]; + V[j][k] = p; + } + } + } + } + + // Nonsymmetric reduction to Hessenberg form. + + private void orthes() { + // FIXME: does this fail on NaN/inf values? + + // This is derived from the Algol procedures orthes and ortran, + // by Martin and Wilkinson, Handbook for Auto. Comp., + // Vol.ii-Linear Algebra, and the corresponding + // Fortran subroutines in EISPACK. + + int low = 0; + int high = n - 1; + + for(int m = low + 1; m <= high - 1; m++) { + + // Scale column. + + double scale = 0.0; + for(int i = m; i <= high; i++) { + scale = scale + Math.abs(H[i][m - 1]); + } + if(scale != 0.0) { + + // Compute Householder transformation. + + double h = 0.0; + for(int i = high; i >= m; i--) { + ort[i] = H[i][m - 1] / scale; + h += ort[i] * ort[i]; + } + double g = Math.sqrt(h); + if(ort[m] > 0) { + g = -g; + } + h = h - ort[m] * g; + ort[m] = ort[m] - g; + + // Apply Householder similarity transformation + // H = (I-u*u'/h)*H*(I-u*u')/h) + + for(int j = m; j < n; j++) { + double f = 0.0; + for(int i = high; i >= m; i--) { + f += ort[i] * H[i][j]; + } + f = f / h; + for(int i = m; i <= high; i++) { + H[i][j] -= f * ort[i]; + } + } + + for(int i = 0; i <= high; i++) { + double f = 0.0; + for(int j = high; j >= m; j--) { + f += ort[j] * H[i][j]; + } + f = f / h; + for(int j = m; j <= high; j++) { + H[i][j] -= f * ort[j]; + } + } + ort[m] = scale * ort[m]; + H[m][m - 1] = scale * g; + } + } + + // Accumulate transformations (Algol's ortran). + + for(int i = 0; i < n; i++) { + for(int j = 0; j < n; j++) { + V[i][j] = (i == j ? 1.0 : 0.0); + } + } + + for(int m = high - 1; m >= low + 1; m--) { + if(H[m][m - 1] != 0.0) { + for(int i = m + 1; i <= high; i++) { + ort[i] = H[i][m - 1]; + } + for(int j = m; j <= high; j++) { + double g = 0.0; + for(int i = m; i <= high; i++) { + g += ort[i] * V[i][j]; + } + // Double division avoids possible underflow + g = (g / ort[m]) / H[m][m - 1]; + for(int i = m; i <= high; i++) { + V[i][j] += g * ort[i]; + } + } + } + } + } + + // Complex scalar division. + + private transient double cdivr, cdivi; + + private void cdiv(double xr, double xi, double yr, double yi) { + double r, d; + if(Math.abs(yr) > Math.abs(yi)) { + r = yi / yr; + d = yr + r * yi; + cdivr = (xr + r * xi) / d; + cdivi = (xi - r * xr) / d; + } + else { + r = yr / yi; + d = yi + r * yr; + cdivr = (r * xr + xi) / d; + cdivi = (r * xi - xr) / d; + } + } + + // Nonsymmetric reduction from Hessenberg to real Schur form. + + private void hqr2() { + // FIXME: does this fail on NaN/inf values? + + // This is derived from the Algol procedure hqr2, + // by Martin and Wilkinson, Handbook for Auto. Comp., + // Vol.ii-Linear Algebra, and the corresponding + // Fortran subroutine in EISPACK. + + // Initialize + + int nn = this.n; + int n = nn - 1; + int low = 0; + int high = nn - 1; + double eps = Math.pow(2.0, -52.0); + double exshift = 0.0; + double p = 0, q = 0, r = 0, s = 0, z = 0, t, w, x, y; + + // Store roots isolated by balanc and compute matrix norm + + double norm = 0.0; + for(int i = 0; i < nn; i++) { + if(i < low | i > high) { + d[i] = H[i][i]; + e[i] = 0.0; + } + for(int j = Math.max(i - 1, 0); j < nn; j++) { + norm = norm + Math.abs(H[i][j]); + } + } + + // Outer loop over eigenvalue index + + int iter = 0; + while(n >= low) { + + // Look for single small sub-diagonal element + + int l = n; + while(l > low) { + s = Math.abs(H[l - 1][l - 1]) + Math.abs(H[l][l]); + if(s == 0.0) { + s = norm; + } + if(Math.abs(H[l][l - 1]) < eps * s) { + break; + } + l--; + } + + // Check for convergence + // One root found + + if(l == n) { + H[n][n] = H[n][n] + exshift; + d[n] = H[n][n]; + e[n] = 0.0; + n--; + iter = 0; + + // Two roots found + + } + else if(l == n - 1) { + w = H[n][n - 1] * H[n - 1][n]; + p = (H[n - 1][n - 1] - H[n][n]) / 2.0; + q = p * p + w; + z = Math.sqrt(Math.abs(q)); + H[n][n] = H[n][n] + exshift; + H[n - 1][n - 1] = H[n - 1][n - 1] + exshift; + x = H[n][n]; + + // Real pair + + if(q >= 0) { + if(p >= 0) { + z = p + z; + } + else { + z = p - z; + } + d[n - 1] = x + z; + d[n] = d[n - 1]; + if(z != 0.0) { + d[n] = x - w / z; + } + e[n - 1] = 0.0; + e[n] = 0.0; + x = H[n][n - 1]; + s = Math.abs(x) + Math.abs(z); + p = x / s; + q = z / s; + r = Math.sqrt(p * p + q * q); + p = p / r; + q = q / r; + + // Row modification + + for(int j = n - 1; j < nn; j++) { + z = H[n - 1][j]; + H[n - 1][j] = q * z + p * H[n][j]; + H[n][j] = q * H[n][j] - p * z; + } + + // Column modification + + for(int i = 0; i <= n; i++) { + z = H[i][n - 1]; + H[i][n - 1] = q * z + p * H[i][n]; + H[i][n] = q * H[i][n] - p * z; + } + + // Accumulate transformations + + for(int i = low; i <= high; i++) { + z = V[i][n - 1]; + V[i][n - 1] = q * z + p * V[i][n]; + V[i][n] = q * V[i][n] - p * z; + } + + // Complex pair + + } + else { + d[n - 1] = x + p; + d[n] = x + p; + e[n - 1] = z; + e[n] = -z; + } + n = n - 2; + iter = 0; + + // No convergence yet + + } + else { + + // Form shift + + x = H[n][n]; + y = 0.0; + w = 0.0; + if(l < n) { + y = H[n - 1][n - 1]; + w = H[n][n - 1] * H[n - 1][n]; + } + + // Wilkinson's original ad hoc shift + + if(iter == 10) { + exshift += x; + for(int i = low; i <= n; i++) { + H[i][i] -= x; + } + s = Math.abs(H[n][n - 1]) + Math.abs(H[n - 1][n - 2]); + x = y = 0.75 * s; + w = -0.4375 * s * s; + } + + // MATLAB's new ad hoc shift + + if(iter == 30) { + s = (y - x) / 2.0; + s = s * s + w; + if(s > 0) { + s = Math.sqrt(s); + if(y < x) { + s = -s; + } + s = x - w / ((y - x) / 2.0 + s); + for(int i = low; i <= n; i++) { + H[i][i] -= s; + } + exshift += s; + x = y = w = 0.964; + } + } + + iter = iter + 1; // (Could check iteration count here.) + + // Look for two consecutive small sub-diagonal elements + + int m = n - 2; + while(m >= l) { + z = H[m][m]; + r = x - z; + s = y - z; + p = (r * s - w) / H[m + 1][m] + H[m][m + 1]; + q = H[m + 1][m + 1] - z - r - s; + r = H[m + 2][m + 1]; + s = Math.abs(p) + Math.abs(q) + Math.abs(r); + p = p / s; + q = q / s; + r = r / s; + if(m == l) { + break; + } + if(Math.abs(H[m][m - 1]) * (Math.abs(q) + Math.abs(r)) < eps * (Math.abs(p) * (Math.abs(H[m - 1][m - 1]) + Math.abs(z) + Math.abs(H[m + 1][m + 1])))) { + break; + } + m--; + } + + for(int i = m + 2; i <= n; i++) { + H[i][i - 2] = 0.0; + if(i > m + 2) { + H[i][i - 3] = 0.0; + } + } + + // Double QR step involving rows l:n and columns m:n + + for(int k = m; k <= n - 1; k++) { + boolean notlast = (k != n - 1); + if(k != m) { + p = H[k][k - 1]; + q = H[k + 1][k - 1]; + r = (notlast ? H[k + 2][k - 1] : 0.0); + x = Math.abs(p) + Math.abs(q) + Math.abs(r); + if(x != 0.0) { + p = p / x; + q = q / x; + r = r / x; + } + } + if(x == 0.0) { + break; + } + s = Math.sqrt(p * p + q * q + r * r); + if(p < 0) { + s = -s; + } + if(s != 0) { + if(k != m) { + H[k][k - 1] = -s * x; + } + else if(l != m) { + H[k][k - 1] = -H[k][k - 1]; + } + p = p + s; + x = p / s; + y = q / s; + z = r / s; + q = q / p; + r = r / p; + + // Row modification + + for(int j = k; j < nn; j++) { + p = H[k][j] + q * H[k + 1][j]; + if(notlast) { + p = p + r * H[k + 2][j]; + H[k + 2][j] = H[k + 2][j] - p * z; + } + H[k][j] = H[k][j] - p * x; + H[k + 1][j] = H[k + 1][j] - p * y; + } + + // Column modification + + for(int i = 0; i <= Math.min(n, k + 3); i++) { + p = x * H[i][k] + y * H[i][k + 1]; + if(notlast) { + p = p + z * H[i][k + 2]; + H[i][k + 2] = H[i][k + 2] - p * r; + } + H[i][k] = H[i][k] - p; + H[i][k + 1] = H[i][k + 1] - p * q; + } + + // Accumulate transformations + + for(int i = low; i <= high; i++) { + p = x * V[i][k] + y * V[i][k + 1]; + if(notlast) { + p = p + z * V[i][k + 2]; + V[i][k + 2] = V[i][k + 2] - p * r; + } + V[i][k] = V[i][k] - p; + V[i][k + 1] = V[i][k + 1] - p * q; + } + } // (s != 0) + } // k loop + } // check convergence + } // while (n >= low) + + // Backsubstitute to find vectors of upper triangular form + + if(norm == 0.0) { + return; + } + + for(n = nn - 1; n >= 0; n--) { + p = d[n]; + q = e[n]; + + // Real vector + + if(q == 0) { + int l = n; + H[n][n] = 1.0; + for(int i = n - 1; i >= 0; i--) { + w = H[i][i] - p; + r = 0.0; + for(int j = l; j <= n; j++) { + r = r + H[i][j] * H[j][n]; + } + if(e[i] < 0.0) { + z = w; + s = r; + } + else { + l = i; + if(e[i] == 0.0) { + if(w != 0.0) { + H[i][n] = -r / w; + } + else { + H[i][n] = -r / (eps * norm); + } + + // Solve real equations + + } + else { + x = H[i][i + 1]; + y = H[i + 1][i]; + q = (d[i] - p) * (d[i] - p) + e[i] * e[i]; + t = (x * s - z * r) / q; + H[i][n] = t; + if(Math.abs(x) > Math.abs(z)) { + H[i + 1][n] = (-r - w * t) / x; + } + else { + H[i + 1][n] = (-s - y * t) / z; + } + } + + // Overflow control + + t = Math.abs(H[i][n]); + if((eps * t) * t > 1) { + for(int j = i; j <= n; j++) { + H[j][n] = H[j][n] / t; + } + } + } + } + + // Complex vector + + } + else if(q < 0) { + int l = n - 1; + + // Last vector component imaginary so matrix is triangular + + if(Math.abs(H[n][n - 1]) > Math.abs(H[n - 1][n])) { + H[n - 1][n - 1] = q / H[n][n - 1]; + H[n - 1][n] = -(H[n][n] - p) / H[n][n - 1]; + } + else { + cdiv(0.0, -H[n - 1][n], H[n - 1][n - 1] - p, q); + H[n - 1][n - 1] = cdivr; + H[n - 1][n] = cdivi; + } + H[n][n - 1] = 0.0; + H[n][n] = 1.0; + for(int i = n - 2; i >= 0; i--) { + double ra, sa, vr, vi; + ra = 0.0; + sa = 0.0; + for(int j = l; j <= n; j++) { + ra = ra + H[i][j] * H[j][n - 1]; + sa = sa + H[i][j] * H[j][n]; + } + w = H[i][i] - p; + + if(e[i] < 0.0) { + z = w; + r = ra; + s = sa; + } + else { + l = i; + if(e[i] == 0) { + cdiv(-ra, -sa, w, q); + H[i][n - 1] = cdivr; + H[i][n] = cdivi; + } + else { + + // Solve complex equations + + x = H[i][i + 1]; + y = H[i + 1][i]; + vr = (d[i] - p) * (d[i] - p) + e[i] * e[i] - q * q; + vi = (d[i] - p) * 2.0 * q; + if(vr == 0.0 & vi == 0.0) { + vr = eps * norm * (Math.abs(w) + Math.abs(q) + Math.abs(x) + Math.abs(y) + Math.abs(z)); + } + cdiv(x * r - z * ra + q * sa, x * s - z * sa - q * ra, vr, vi); + H[i][n - 1] = cdivr; + H[i][n] = cdivi; + if(Math.abs(x) > (Math.abs(z) + Math.abs(q))) { + H[i + 1][n - 1] = (-ra - w * H[i][n - 1] + q * H[i][n]) / x; + H[i + 1][n] = (-sa - w * H[i][n] - q * H[i][n - 1]) / x; + } + else { + cdiv(-r - y * H[i][n - 1], -s - y * H[i][n], z, q); + H[i + 1][n - 1] = cdivr; + H[i + 1][n] = cdivi; + } + } + + // Overflow control + + t = Math.max(Math.abs(H[i][n - 1]), Math.abs(H[i][n])); + if((eps * t) * t > 1) { + for(int j = i; j <= n; j++) { + H[j][n - 1] = H[j][n - 1] / t; + H[j][n] = H[j][n] / t; + } + } + } + } + } + } + + // Vectors of isolated roots + + for(int i = 0; i < nn; i++) { + if(i < low | i > high) { + for(int j = i; j < nn; j++) { + V[i][j] = H[i][j]; + } + } + } + + // Back transformation to get eigenvectors of original matrix + + for(int j = nn - 1; j >= low; j--) { + for(int i = low; i <= high; i++) { + z = 0.0; + for(int k = low; k <= Math.min(j, high); k++) { + z = z + V[i][k] * H[k][j]; + } + V[i][j] = z; + } + } + } + + /* + * ------------------------ Constructor ------------------------ + */ + + /** + * Check for symmetry, then construct the eigenvalue decomposition + * + * @param Arg Square matrix + */ + + public EigenvalueDecomposition(Matrix Arg) { + double[][] A = Arg.getArrayRef(); + n = Arg.getColumnDimensionality(); + V = new double[n][n]; + d = new double[n]; + e = new double[n]; + + issymmetric = true; + for(int j = 0; (j < n) & issymmetric; j++) { + for(int i = 0; (i < n) & issymmetric; i++) { + issymmetric = (A[i][j] == A[j][i]); + if(Double.isNaN(A[i][j])) { + throw new IllegalArgumentException("NaN in EigenvalueDecomposition!"); + } + if(Double.isInfinite(A[i][j])) { + throw new IllegalArgumentException("+-inf in EigenvalueDecomposition!"); + } + } + } + + if(issymmetric) { + for(int i = 0; i < n; i++) { + for(int j = 0; j < n; j++) { + V[i][j] = A[i][j]; + } + } + + // Tridiagonalize. + tred2(); + + // Diagonalize. + tql2(); + + } + else { + H = new double[n][n]; + ort = new double[n]; + + for(int j = 0; j < n; j++) { + for(int i = 0; i < n; i++) { + H[i][j] = A[i][j]; + } + } + + // Reduce to Hessenberg form. + orthes(); + + // Reduce Hessenberg to real Schur form. + hqr2(); + } + } + + /* + * ------------------------ Public Methods ------------------------ + */ + + /** + * Return the eigenvector matrix + * + * @return V + */ + public Matrix getV() { + return new Matrix(V); + } + + /** + * Return the real parts of the eigenvalues + * + * @return real(diag(D)) + */ + public double[] getRealEigenvalues() { + return d; + } + + /** + * Return the imaginary parts of the eigenvalues + * + * @return imag(diag(D)) + */ + public double[] getImagEigenvalues() { + return e; + } + + /** + * Return the block diagonal eigenvalue matrix + * + * @return D + */ + public Matrix getD() { + Matrix X = new Matrix(n, n); + double[][] D = X.getArrayRef(); + for(int i = 0; i < n; i++) { + for(int j = 0; j < n; j++) { + D[i][j] = 0.0; + } + D[i][i] = d[i]; + if(e[i] > 0) { + D[i][i + 1] = e[i]; + } + else if(e[i] < 0) { + D[i][i - 1] = e[i]; + } + } + return X; + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/LUDecomposition.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/LUDecomposition.java new file mode 100644 index 00000000..07faa89d --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/LUDecomposition.java @@ -0,0 +1,295 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * LU Decomposition. + * <P> + * For an m-by-n matrix A with m >= n, the LU decomposition is an m-by-n unit + * lower triangular matrix L, an n-by-n upper triangular matrix U, and a + * permutation vector piv of length m so that A(piv,:) = L*U. If m < n, then + * L is m-by-m and U is m-by-n. + * <P> + * The LU decompostion with pivoting always exists, even if the matrix is + * singular, so the constructor will never fail. The primary use of the LU + * decomposition is in the solution of square systems of simultaneous linear + * equations. This will fail if isNonsingular() returns false. + * + * @apiviz.uses Matrix - - transforms + */ +public class LUDecomposition implements java.io.Serializable { + /** + * Serial version + */ + private static final long serialVersionUID = 1L; + + /** + * Array for internal storage of decomposition. + * + * @serial internal array storage. + */ + private double[][] LU; + + /** + * Row and column dimensions, and pivot sign. + * + * @serial column dimension. + * @serial row dimension. + * @serial pivot sign. + */ + private int m, n, pivsign; + + /** + * Internal storage of pivot vector. + * + * @serial pivot vector. + */ + private int[] piv; + + /* + * ------------------------ Constructor ------------------------ + */ + + /** + * LU Decomposition + * + * @param A Rectangular matrix + */ + public LUDecomposition(Matrix A) { + // Use a "left-looking", dot-product, Crout/Doolittle algorithm. + + LU = A.getArrayCopy(); + m = A.getRowDimensionality(); + n = A.getColumnDimensionality(); + piv = new int[m]; + for(int i = 0; i < m; i++) { + piv[i] = i; + } + pivsign = 1; + double[] LUrowi; + double[] LUcolj = new double[m]; + + // Outer loop. + + for(int j = 0; j < n; j++) { + // Make a copy of the j-th column to localize references. + + for(int i = 0; i < m; i++) { + LUcolj[i] = LU[i][j]; + } + + // Apply previous transformations. + + for(int i = 0; i < m; i++) { + LUrowi = LU[i]; + + // Most of the time is spent in the following dot product. + + int kmax = Math.min(i, j); + double s = 0.0; + for(int k = 0; k < kmax; k++) { + s += LUrowi[k] * LUcolj[k]; + } + + LUrowi[j] = LUcolj[i] -= s; + } + + // Find pivot and exchange if necessary. + + int p = j; + for(int i = j + 1; i < m; i++) { + if(Math.abs(LUcolj[i]) > Math.abs(LUcolj[p])) { + p = i; + } + } + if(p != j) { + for(int k = 0; k < n; k++) { + double t = LU[p][k]; + LU[p][k] = LU[j][k]; + LU[j][k] = t; + } + int k = piv[p]; + piv[p] = piv[j]; + piv[j] = k; + pivsign = -pivsign; + } + + // Compute multipliers. + + if(j < m & LU[j][j] != 0.0) { + for(int i = j + 1; i < m; i++) { + LU[i][j] /= LU[j][j]; + } + } + } + } + + /* + * ------------------------ Public Methods ------------------------ + */ + + /** + * Is the matrix nonsingular? + * + * @return true if U, and hence A, is nonsingular. + */ + public boolean isNonsingular() { + for(int j = 0; j < n; j++) { + if(LU[j][j] == 0) { + return false; + } + } + return true; + } + + /** + * Return lower triangular factor + * + * @return L + */ + public Matrix getL() { + Matrix X = new Matrix(m, n); + double[][] L = X.getArrayRef(); + for(int i = 0; i < m; i++) { + for(int j = 0; j < n; j++) { + if(i > j) { + L[i][j] = LU[i][j]; + } + else if(i == j) { + L[i][j] = 1.0; + } + else { + L[i][j] = 0.0; + } + } + } + return X; + } + + /** + * Return upper triangular factor + * + * @return U + */ + public Matrix getU() { + Matrix X = new Matrix(n, n); + double[][] U = X.getArrayRef(); + for(int i = 0; i < n; i++) { + for(int j = 0; j < n; j++) { + if(i <= j) { + U[i][j] = LU[i][j]; + } + else { + U[i][j] = 0.0; + } + } + } + return X; + } + + /** + * Return pivot permutation vector + * + * @return piv + */ + public int[] getPivot() { + int[] p = new int[m]; + for(int i = 0; i < m; i++) { + p[i] = piv[i]; + } + return p; + } + + /** + * Return pivot permutation vector as a one-dimensional double array + * + * @return (double) piv + */ + public double[] getDoublePivot() { + double[] vals = new double[m]; + for(int i = 0; i < m; i++) { + vals[i] = piv[i]; + } + return vals; + } + + /** + * Determinant + * + * @return det(A) + * @exception IllegalArgumentException Matrix must be square + */ + public double det() { + if(m != n) { + throw new IllegalArgumentException("Matrix must be square."); + } + double d = pivsign; + for(int j = 0; j < n; j++) { + d *= LU[j][j]; + } + return d; + } + + /** + * Solve A*X = B + * + * @param B A Matrix with as many rows as A and any number of columns. + * @return X so that L*U*X = B(piv,:) + * @exception IllegalArgumentException Matrix row dimensions must agree. + * @exception RuntimeException Matrix is singular. + */ + public Matrix solve(Matrix B) { + if(B.getRowDimensionality() != m) { + throw new IllegalArgumentException("Matrix row dimensions must agree."); + } + if(!this.isNonsingular()) { + throw new RuntimeException("Matrix is singular."); + } + + // Copy right hand side with pivoting + int nx = B.getColumnDimensionality(); + Matrix Xmat = B.getMatrix(piv, 0, nx - 1); + double[][] X = Xmat.getArrayRef(); + + // Solve L*Y = B(piv,:) + for(int k = 0; k < n; k++) { + for(int i = k + 1; i < n; i++) { + for(int j = 0; j < nx; j++) { + X[i][j] -= X[k][j] * LU[i][k]; + } + } + } + // Solve U*X = Y; + for(int k = n - 1; k >= 0; k--) { + for(int j = 0; j < nx; j++) { + X[k][j] /= LU[k][k]; + } + for(int i = 0; i < k; i++) { + for(int j = 0; j < nx; j++) { + X[i][j] -= X[k][j] * LU[i][k]; + } + } + } + return Xmat; + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/LinearEquationSystem.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/LinearEquationSystem.java new file mode 100644 index 00000000..fb3c7a00 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/LinearEquationSystem.java @@ -0,0 +1,770 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +import java.text.DecimalFormat; +import java.text.DecimalFormatSymbols; +import java.text.NumberFormat; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Locale; + +import de.lmu.ifi.dbs.elki.logging.Logging; +import de.lmu.ifi.dbs.elki.utilities.FormatUtil; +import de.lmu.ifi.dbs.elki.utilities.pairs.IntIntPair; + +/** + * Class for systems of linear equations. + * + * @author Elke Achtert + */ +public class LinearEquationSystem { + /** + * Logger. + */ + private static final Logging logger = Logging.getLogger(LinearEquationSystem.class); + + /** + * Indicates trivial pivot search strategy. + */ + private static final int TRIVAL_PIVOT_SEARCH = 0; + + /** + * Indicates total pivot search strategy. + */ + private static final int TOTAL_PIVOT_SEARCH = 1; + + /** + * Indicates if linear equation system is solvable. + */ + private boolean solvable; + + /** + * Indicates if solvability has been checked. + */ + private boolean solved; + + /** + * The rank of the coefficient matrix. + */ + private int rank; + + /** + * The matrix of coefficients. + */ + private double[][] coeff; + + /** + * The right hand side of the equation system. + */ + private double[] rhs; + + /** + * Encodes row permutations, row i is at position row[i]. + */ + private int[] row; + + /** + * Encodes column permutations, column j is at position col[j]. + */ + private int[] col; + + /** + * Holds the special solution vector. + */ + private double[] x_0; + + /** + * Holds the space of solutions of the homogeneous linear equation system. + */ + private double[][] u; + + /** + * Indicates if linear equation system is in reduced row echelon form. + */ + private boolean reducedRowEchelonForm; + + /** + * Constructs a linear equation system with given coefficient matrix + * <code>a</code> and right hand side <code>b</code>. + * + * @param a the matrix of the coefficients of the linear equation system + * @param b the right hand side of the linear equation system + */ + public LinearEquationSystem(double[][] a, double[] b) { + if(a == null) { + throw new IllegalArgumentException("Coefficient array is null!"); + } + if(b == null) { + throw new IllegalArgumentException("Right hand side is null!"); + } + if(a.length != b.length) { + throw new IllegalArgumentException("Coefficient matrix and right hand side " + "differ in row dimensionality!"); + } + + coeff = a; + rhs = b; + row = new int[coeff.length]; + for(int i = 0; i < coeff.length; i++) { + row[i] = i; + } + col = new int[coeff[0].length]; + for(int j = 0; j < coeff[0].length; j++) { + col[j] = j; + } + rank = 0; + x_0 = null; + solved = false; + solvable = false; + reducedRowEchelonForm = false; + } + + /** + * Constructs a linear equation system with given coefficient matrix + * <code>a</code> and right hand side <code>b</code>. + * + * @param a the matrix of the coefficients of the linear equation system + * @param b the right hand side of the linear equation system + * @param rowPermutations the row permutations, row i is at position row[i] + * @param columnPermutations the column permutations, column i is at position + * column[i] + */ + public LinearEquationSystem(double[][] a, double[] b, int[] rowPermutations, int[] columnPermutations) { + if(a == null) { + throw new IllegalArgumentException("Coefficient array is null!"); + } + if(b == null) { + throw new IllegalArgumentException("Right hand side is null!"); + } + if(a.length != b.length) { + throw new IllegalArgumentException("Coefficient matrix and right hand side " + "differ in row dimensionality!"); + } + if(rowPermutations.length != a.length) { + throw new IllegalArgumentException("Coefficient matrix and row permutation array " + "differ in row dimensionality!"); + } + if(columnPermutations.length != a[0].length) { + throw new IllegalArgumentException("Coefficient matrix and column permutation array " + "differ in column dimensionality!"); + } + + coeff = a; + rhs = b; + this.row = rowPermutations; + this.col = columnPermutations; + rank = 0; + x_0 = null; + solved = false; + solvable = false; + reducedRowEchelonForm = false; + } + + /** + * Returns a copy of the coefficient array of this linear equation system. + * + * @return a copy of the coefficient array of this linear equation system + */ + public double[][] getCoefficents() { + return coeff.clone(); + } + + /** + * Returns a copy of the right hand side of this linear equation system. + * + * @return a copy of the right hand side of this linear equation system + */ + public double[] getRHS() { + return rhs.clone(); + } + + /** + * Returns a copy of the row permutations, row i is at position row[i]. + * + * @return a copy of the row permutations + */ + public int[] getRowPermutations() { + return row.clone(); + } + + /** + * Returns a copy of the column permutations, column i is at position + * column[i]. + * + * @return a copy of the column permutations + */ + public int[] getColumnPermutations() { + return col.clone(); + } + + /** + * Tests if system has already been tested for solvability. + * + * @return true if a solution has already been computed, false otherwise. + */ + public boolean isSolved() { + return solved; + } + + /** + * Solves this linear equation system by total pivot search. + * "Total pivot search" takes as pivot element the element in the current + * column having the biggest value. If we have: <br> + * <code> + * ( a_11 ... a_1n ) <br> + * ( 0 ... a_2n ) <br> + * ( 0 ... a_ii ... a_in )<br> + * ( 0 ... a_(i+1)i ... a_(i+1)n ) <br> + * ( 0 ... a_ni ... a_nn ) <br> + * </code> Then we search for x,y in {i,...n}, so that |a_xy| > |a_ij| + */ + public void solveByTotalPivotSearch() { + solve(TOTAL_PIVOT_SEARCH); + } + + /** + * Solves this linear equation system by trivial pivot search. + * "Trivial pivot search" takes as pivot element the next element in the + * current column beeing non zero. + */ + public void solveByTrivialPivotSearch() { + solve(TRIVAL_PIVOT_SEARCH); + } + + /** + * Checks if a solved system is solvable. + * + * @return true if this linear equation system is solved and solvable + */ + public boolean isSolvable() { + return solvable && solved; + } + + /** + * Returns a string representation of this equation system. + * + * @param prefix the prefix of each line + * @param fractionDigits the number of fraction digits for output accuracy + * @return a string representation of this equation system + */ + public String equationsToString(String prefix, int fractionDigits) { + DecimalFormat nf = new DecimalFormat(); + nf.setMinimumFractionDigits(fractionDigits); + nf.setMaximumFractionDigits(fractionDigits); + nf.setDecimalFormatSymbols(new DecimalFormatSymbols(Locale.US)); + nf.setNegativePrefix(""); + nf.setPositivePrefix(""); + return equationsToString(prefix, nf); + } + + /** + * Returns a string representation of this equation system. + * + * @param prefix the prefix of each line + * @param nf the number format + * @return a string representation of this equation system + */ + public String equationsToString(String prefix, NumberFormat nf) { + if((coeff == null) || (rhs == null) || (row == null) || (col == null)) { + throw new NullPointerException(); + } + + int[] coeffDigits = maxIntegerDigits(coeff); + int rhsDigits = maxIntegerDigits(rhs); + + StringBuffer buffer = new StringBuffer(); + buffer.append(prefix).append("\n").append(prefix); + for(int i = 0; i < coeff.length; i++) { + for(int j = 0; j < coeff[row[0]].length; j++) { + format(nf, buffer, coeff[row[i]][col[j]], coeffDigits[col[j]]); + buffer.append(" * x_" + col[j]); + } + buffer.append(" ="); + format(nf, buffer, rhs[row[i]], rhsDigits); + + if(i < coeff.length - 1) { + buffer.append("\n").append(prefix); + } + else { + buffer.append("\n").append(prefix); + } + } + return buffer.toString(); + } + + /** + * Returns a string representation of this equation system. + * + * @param nf the number format + * @return a string representation of this equation system + */ + public String equationsToString(NumberFormat nf) { + return equationsToString("", nf); + } + + /** + * Returns a string representation of this equation system. + * + * @param fractionDigits the number of fraction digits for output accuracy + * @return a string representation of this equation system + */ + public String equationsToString(int fractionDigits) { + return equationsToString("", fractionDigits); + } + + /** + * Returns a string representation of the solution of this equation system. + * + * @param fractionDigits precision + * + * @return a string representation of the solution of this equation system + */ + public String solutionToString(int fractionDigits) { + if(!isSolvable()) { + throw new IllegalStateException("System is not solvable!"); + } + + DecimalFormat nf = new DecimalFormat(); + nf.setMinimumFractionDigits(fractionDigits); + nf.setMaximumFractionDigits(fractionDigits); + nf.setDecimalFormatSymbols(new DecimalFormatSymbols(Locale.US)); + nf.setNegativePrefix(""); + nf.setPositivePrefix(""); + + int row = coeff[0].length / 2; + int params = u.length; + int paramsDigits = integerDigits(params); + + int x0Digits = maxIntegerDigits(x_0); + int[] uDigits = maxIntegerDigits(u); + StringBuffer buffer = new StringBuffer(); + for(int i = 0; i < x_0.length; i++) { + double value = x_0[i]; + format(nf, buffer, value, x0Digits); + for(int j = 0; j < u[0].length; j++) { + if(i == row) { + buffer.append(" + a_" + j + " * "); + } + else { + buffer.append(" "); + for(int d = 0; d < paramsDigits; d++) { + buffer.append(" "); + } + } + format(nf, buffer, u[i][j], uDigits[j]); + } + buffer.append("\n"); + } + return buffer.toString(); + } + + /** + * Brings this linear equation system into reduced row echelon form with + * choice of pivot method. + * + * @param method the pivot search method to use + */ + private void reducedRowEchelonForm(int method) { + final int rows = coeff.length; + final int cols = coeff[0].length; + + int k = -1; // denotes current position on diagonal + int pivotRow; // row index of pivot element + int pivotCol; // column index of pivot element + double pivot; // value of pivot element + + // main loop, transformation to reduced row echelon form + boolean exitLoop = false; + + while(!exitLoop) { + k++; + + // pivot search for entry in remaining matrix + // (depends on chosen method in switch) + // store position in pivotRow, pivotCol + + // TODO: Note that we're using "row, col", whereas "col, row" would be + // more common? + IntIntPair pivotPos = new IntIntPair(0, 0); + IntIntPair currPos = new IntIntPair(k, k); + + switch(method){ + case TRIVAL_PIVOT_SEARCH: + pivotPos = nonZeroPivotSearch(k); + break; + case TOTAL_PIVOT_SEARCH: + pivotPos = totalPivotSearch(k); + break; + } + pivotRow = pivotPos.first; + pivotCol = pivotPos.second; + pivot = coeff[this.row[pivotRow]][col[pivotCol]]; + + if(logger.isDebugging()) { + StringBuffer msg = new StringBuffer(); + msg.append("equations ").append(equationsToString(4)); + msg.append(" *** pivot at (").append(pivotRow).append(",").append(pivotCol).append(") = ").append(pivot).append("\n"); + logger.debugFine(msg.toString()); + } + + // permute rows and columns to get this entry onto + // the diagonal + permutePivot(pivotPos, currPos); + + // test conditions for exiting loop + // after this iteration + // reasons are: Math.abs(pivot) == 0 + if((Math.abs(pivot) <= Matrix.DELTA)) { + exitLoop = true; + } + + // pivoting only if Math.abs(pivot) > 0 + // and k <= m - 1 + if((Math.abs(pivot) > Matrix.DELTA)) { + rank++; + pivotOperation(k); + } + + // test conditions for exiting loop + // after this iteration + // reasons are: k == rows-1 : no more rows + // k == cols-1 : no more columns + if(k == rows - 1 || k == cols - 1) { + exitLoop = true; + } + }// end while + + reducedRowEchelonForm = true; + } + + /** + * Method for total pivot search, searches for x,y in {k,...n}, so that |a_xy| + * > |a_ij| + * + * @param k search starts at entry (k,k) + * @return the position of the found pivot element + */ + private IntIntPair totalPivotSearch(int k) { + double max = 0; + int i, j, pivotRow = k, pivotCol = k; + double absValue; + for(i = k; i < coeff.length; i++) { + for(j = k; j < coeff[0].length; j++) { + // compute absolute value of + // current entry in absValue + absValue = Math.abs(coeff[row[i]][col[j]]); + + // compare absValue with value max + // found so far + if(max < absValue) { + // remember new value and position + max = absValue; + pivotRow = i; + pivotCol = j; + }// end if + }// end for j + }// end for k + return new IntIntPair(pivotRow, pivotCol); + } + + /** + * Method for trivial pivot search, searches for non-zero entry. + * + * @param k search starts at entry (k,k) + * @return the position of the found pivot element + */ + private IntIntPair nonZeroPivotSearch(int k) { + + int i, j; + double absValue; + for(i = k; i < coeff.length; i++) { + for(j = k; j < coeff[0].length; j++) { + // compute absolute value of + // current entry in absValue + absValue = Math.abs(coeff[row[i]][col[j]]); + + // check if absValue is non-zero + if(absValue > 0) { // found a pivot element + return new IntIntPair(i, j); + }// end if + }// end for j + }// end for k + return new IntIntPair(k, k); + } + + /** + * permutes two matrix rows and two matrix columns + * + * @param pos1 the fist position for the permutation + * @param pos2 the second position for the permutation + */ + private void permutePivot(IntIntPair pos1, IntIntPair pos2) { + int r1 = pos1.first; + int c1 = pos1.second; + int r2 = pos2.first; + int c2 = pos2.second; + int index; + index = row[r2]; + row[r2] = row[r1]; + row[r1] = index; + index = col[c2]; + col[c2] = col[c1]; + col[c1] = index; + } + + /** + * performs a pivot operation + * + * @param k pivoting takes place below (k,k) + */ + private void pivotOperation(int k) { + double pivot = coeff[row[k]][col[k]]; + + // pivot row: set pivot to 1 + coeff[row[k]][col[k]] = 1; + for(int i = k + 1; i < coeff[k].length; i++) { + coeff[row[k]][col[i]] /= pivot; + } + rhs[row[k]] /= pivot; + + if(logger.isDebugging()) { + StringBuffer msg = new StringBuffer(); + msg.append("set pivot element to 1 ").append(equationsToString(4)); + logger.debugFine(msg.toString()); + } + + // for (int i = k + 1; i < coeff.length; i++) { + for(int i = 0; i < coeff.length; i++) { + if(i == k) { + continue; + } + + // compute factor + double q = coeff[row[i]][col[k]]; + + // modify entry a[i,k], i <> k + coeff[row[i]][col[k]] = 0; + + // modify entries a[i,j], i > k fixed, j = k+1...n-1 + for(int j = k + 1; j < coeff[0].length; j++) { + coeff[row[i]][col[j]] = coeff[row[i]][col[j]] - coeff[row[k]][col[j]] * q; + }// end for j + + // modify right-hand-side + rhs[row[i]] = rhs[row[i]] - rhs[row[k]] * q; + }// end for k + + if(logger.isDebugging()) { + StringBuffer msg = new StringBuffer(); + msg.append("after pivot operation ").append(equationsToString(4)); + logger.debugFine(msg.toString()); + } + } + + /** + * solves linear system with the chosen method + * + * @param method the pivot search method + */ + private void solve(int method) throws NullPointerException { + // solution exists + if(solved) { + return; + } + + // bring in reduced row echelon form + if(!reducedRowEchelonForm) { + reducedRowEchelonForm(method); + } + + if(!isSolvable(method)) { + if(logger.isDebugging()) { + logger.debugFine("Equation system is not solvable!"); + } + return; + } + + // compute one special solution + int cols = coeff[0].length; + List<Integer> boundIndices = new ArrayList<Integer>(); + x_0 = new double[cols]; + for(int i = 0; i < coeff.length; i++) { + for(int j = i; j < coeff[row[i]].length; j++) { + if(coeff[row[i]][col[j]] == 1) { + x_0[col[i]] = rhs[row[i]]; + boundIndices.add(col[i]); + break; + } + } + } + List<Integer> freeIndices = new ArrayList<Integer>(); + for(int i = 0; i < coeff[0].length; i++) { + if(boundIndices.contains(i)) { + continue; + } + freeIndices.add(i); + } + + StringBuffer msg = new StringBuffer(); + if(logger.isDebugging()) { + msg.append("\nSpecial solution x_0 = [").append(FormatUtil.format(x_0, ",", 4)).append("]"); + msg.append("\nbound Indices ").append(boundIndices); + msg.append("\nfree Indices ").append(freeIndices); + } + + // compute solution space of homogeneous linear equation system + Integer[] freeParameters = freeIndices.toArray(new Integer[freeIndices.size()]); + Integer[] boundParameters = boundIndices.toArray(new Integer[boundIndices.size()]); + Arrays.sort(boundParameters); + int freeIndex = 0; + int boundIndex = 0; + u = new double[cols][freeIndices.size()]; + + for(int j = 0; j < u[0].length; j++) { + for(int i = 0; i < u.length; i++) { + if(freeIndex < freeParameters.length && i == freeParameters[freeIndex]) { + u[i][j] = 1; + } + else if(boundIndex < boundParameters.length && i == boundParameters[boundIndex]) { + u[i][j] = -coeff[row[boundIndex]][freeParameters[freeIndex]]; + boundIndex++; + } + } + freeIndex++; + boundIndex = 0; + + } + + if(logger.isDebugging()) { + msg.append("\nU"); + for(double[] anU : u) { + msg.append("\n").append(FormatUtil.format(anU, ",", 4)); + } + logger.debugFine(msg.toString()); + } + + solved = true; + } + + /** + * Checks solvability of this linear equation system with the chosen method. + * + * @param method the pivot search method + * @return true if linear system in solvable + */ + private boolean isSolvable(int method) throws NullPointerException { + if(solved) { + return solvable; + } + + if(!reducedRowEchelonForm) { + reducedRowEchelonForm(method); + } + + // test if rank(coeff) == rank(coeff|rhs) + for(int i = rank; i < rhs.length; i++) { + if(Math.abs(rhs[row[i]]) > Matrix.DELTA) { + solvable = false; + return false; // not solvable + } + } + + solvable = true; + return true; + } + + /** + * Returns the maximum integer digits in each column of the specified values. + * + * @param values the values array + * @return the maximum integer digits in each column of the specified values + */ + private int[] maxIntegerDigits(double[][] values) { + int[] digits = new int[values[0].length]; + for(int j = 0; j < values[0].length; j++) { + for(double[] value : values) { + digits[j] = Math.max(digits[j], integerDigits(value[j])); + } + } + return digits; + } + + /** + * Returns the maximum integer digits of the specified values. + * + * @param values the values array + * @return the maximum integer digits of the specified values + */ + private int maxIntegerDigits(double[] values) { + int digits = 0; + for(double value : values) { + digits = Math.max(digits, integerDigits(value)); + } + return digits; + } + + /** + * Returns the integer digits of the specified double value. + * + * @param d the double value + * @return the integer digits of the specified double value + */ + private int integerDigits(double d) { + double value = Math.abs(d); + if(value < 10) { + return 1; + } + return (int) (Math.log(value) / Math.log(10) + 1); + } + + /** + * Helper method for output of equations and solution. Appends the specified + * double value to the given string buffer according the number format and the + * maximum number of integer digits. + * + * @param nf the number format + * @param buffer the string buffer to append the value to + * @param value the value to append + * @param maxIntegerDigits the maximum number of integer digits + */ + private void format(NumberFormat nf, StringBuffer buffer, double value, int maxIntegerDigits) { + if(value >= 0) { + buffer.append(" + "); + } + else { + buffer.append(" - "); + } + int digits = maxIntegerDigits - integerDigits(value); + for(int d = 0; d < digits; d++) { + buffer.append(" "); + } + buffer.append(nf.format(Math.abs(value))); + } + + /** + * Return dimensionality of spanned subspace. + * + * @return dim + */ + public int subspacedim() { + return coeff[0].length - coeff.length; + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/Matrix.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/Matrix.java new file mode 100644 index 00000000..77556332 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/Matrix.java @@ -0,0 +1,1824 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +import java.io.BufferedReader; +import java.io.Serializable; +import java.io.StreamTokenizer; +import java.util.Arrays; +import java.util.logging.Logger; + +import de.lmu.ifi.dbs.elki.data.RationalNumber; +import de.lmu.ifi.dbs.elki.logging.LoggingConfiguration; +import de.lmu.ifi.dbs.elki.math.MathUtil; +import de.lmu.ifi.dbs.elki.utilities.FormatUtil; + +/** + * A two-dimensional matrix class, where the data is stored as two-dimensional + * array. + * + * Implementation note: this class contains various optimizations that + * theoretically the java hotspot compiler should optimize on its own. However, + * they do show up a hotspots in the profiler (in cpu=times mode), so it does + * make a difference at least when optimizing other parts of ELKI. + * + * @author Elke Achtert + * @author Erich Schubert + * + * @apiviz.uses MatrixLike oneway - - reads + * @apiviz.uses Vector + * @apiviz.landmark + */ +public class Matrix implements MatrixLike<Matrix>, Serializable { + /** + * Serial version + */ + private static final long serialVersionUID = 1L; + + /** + * A small number to handle numbers near 0 as 0. + */ + public static final double DELTA = 1E-3; + + /** + * Array for internal storage of elements. + * + * @serial internal array storage. + */ + protected final double[][] elements; + + // row dimensionality == elements.length! + + /** + * Column dimension. + */ + final int columndimension; + + /** + * Constructs an m-by-n matrix of zeros. + * + * @param m number of rows + * @param n number of columns + */ + public Matrix(final int m, final int n) { + this.columndimension = n; + elements = new double[m][n]; + } + + /** + * Constructs an m-by-n constant matrix. + * + * @param m number of rows + * @param n number of columns + * @param s A scalar value defining the constant value in the matrix + */ + public Matrix(final int m, final int n, final double s) { + this.columndimension = n; + elements = new double[m][n]; + for(int i = 0; i < m; i++) { + for(int j = 0; j < n; j++) { + elements[i][j] = s; + } + } + } + + /** + * Constructs a matrix from a 2-D array. + * + * @param elements an array of arrays of doubles defining the values of the + * matrix + * @throws IllegalArgumentException if not all rows conform in the same length + */ + public Matrix(final double[][] elements) { + columndimension = elements[0].length; + for(int i = 0; i < elements.length; i++) { + if(elements[i].length != columndimension) { + throw new IllegalArgumentException("All rows must have the same length."); + } + } + this.elements = elements; + } + + /** + * Constructs a Matrix for a given array of arrays of {@link RationalNumber}s. + * + * @param q an array of arrays of RationalNumbers. q is not checked for + * consistency (i.e. whether all rows are of equal length) + */ + public Matrix(final RationalNumber[][] q) { + columndimension = q[0].length; + elements = new double[q.length][columndimension]; + for(int row = 0; row < q.length; row++) { + for(int col = 0; col < q[row].length; col++) { + elements[row][col] = q[row][col].doubleValue(); + } + } + } + + /** + * Construct a matrix from a one-dimensional packed array + * + * @param values One-dimensional array of doubles, packed by columns (ala + * Fortran). + * @param m Number of rows. + * @throws IllegalArgumentException Array length must be a multiple of m. + */ + public Matrix(final double values[], final int m) { + columndimension = (m != 0 ? values.length / m : 0); + if(m * columndimension != values.length) { + throw new IllegalArgumentException("Array length must be a multiple of m."); + } + elements = new double[m][columndimension]; + for(int i = 0; i < m; i++) { + for(int j = 0; j < columndimension; j++) { + elements[i][j] = values[i + j * m]; + } + } + } + + /** + * Constructor, cloning an existing matrix. + * + * @param mat Matrix to clone + */ + public Matrix(Matrix mat) { + this(mat.getArrayCopy()); + } + + /** + * Construct a matrix from a copy of a 2-D array. + * + * @param A Two-dimensional array of doubles. + * @return new matrix + * @throws IllegalArgumentException All rows must have the same length + */ + public final static Matrix constructWithCopy(final double[][] A) { + final int m = A.length; + final int n = A[0].length; + final Matrix X = new Matrix(m, n); + for(int i = 0; i < m; i++) { + if(A[i].length != n) { + throw new IllegalArgumentException("All rows must have the same length."); + } + System.arraycopy(A[i], 0, X.elements[i], 0, n); + } + return X; + } + + /** + * Returns the unit matrix of the specified dimension. + * + * @param dim the dimensionality of the unit matrix + * @return the unit matrix of the specified dimension + */ + public static final Matrix unitMatrix(final int dim) { + final double[][] e = new double[dim][dim]; + for(int i = 0; i < dim; i++) { + e[i][i] = 1; + } + return new Matrix(e); + } + + /** + * Returns the zero matrix of the specified dimension. + * + * @param dim the dimensionality of the unit matrix + * @return the zero matrix of the specified dimension + */ + public static final Matrix zeroMatrix(final int dim) { + final double[][] z = new double[dim][dim]; + return new Matrix(z); + } + + /** + * Generate matrix with random elements + * + * @param m Number of rows. + * @param n Number of columns. + * @return An m-by-n matrix with uniformly distributed random elements. + */ + public static final Matrix random(final int m, final int n) { + final Matrix A = new Matrix(m, n); + for(int i = 0; i < m; i++) { + for(int j = 0; j < n; j++) { + A.elements[i][j] = Math.random(); + } + } + return A; + } + + /** + * Generate identity matrix + * + * @param m Number of rows. + * @param n Number of columns. + * @return An m-by-n matrix with ones on the diagonal and zeros elsewhere. + */ + public static final Matrix identity(final int m, final int n) { + final Matrix A = new Matrix(m, n); + for(int i = 0; i < Math.min(m, n); i++) { + A.elements[i][i] = 1.0; + } + return A; + } + + /** + * Returns a quadratic Matrix consisting of zeros and of the given values on + * the diagonal. + * + * @param diagonal the values on the diagonal + * @return the resulting matrix + */ + public static final Matrix diagonal(final double[] diagonal) { + final Matrix result = new Matrix(diagonal.length, diagonal.length); + for(int i = 0; i < diagonal.length; i++) { + result.elements[i][i] = diagonal[i]; + } + return result; + } + + /** + * Returns a quadratic Matrix consisting of zeros and of the given values on + * the diagonal. + * + * @param diagonal the values on the diagonal + * @return the resulting matrix + */ + public static final Matrix diagonal(final Vector diagonal) { + final Matrix result = new Matrix(diagonal.elements.length, diagonal.elements.length); + for(int i = 0; i < diagonal.elements.length; i++) { + result.elements[i][i] = diagonal.elements[i]; + } + return result; + } + + /** + * Make a deep copy of a matrix. + * + * @return a new matrix containing the same values as this matrix + */ + @Override + public final Matrix copy() { + final Matrix X = new Matrix(elements.length, columndimension); + for(int i = 0; i < elements.length; i++) { + System.arraycopy(elements[i], 0, X.elements[i], 0, columndimension); + } + return X; + } + + /** + * Clone the Matrix object. + */ + @Override + public Matrix clone() { + return this.copy(); + } + + /** + * Access the internal two-dimensional array. + * + * @return Pointer to the two-dimensional array of matrix elements. + */ + public final double[][] getArrayRef() { + return elements; + } + + /** + * Copy the internal two-dimensional array. + * + * @return Two-dimensional array copy of matrix elements. + */ + public final double[][] getArrayCopy() { + final double[][] C = new double[elements.length][columndimension]; + for(int i = 0; i < elements.length; i++) { + for(int j = 0; j < columndimension; j++) { + C[i][j] = elements[i][j]; + } + } + return C; + } + + /** + * Returns the dimensionality of the rows of this matrix. + * + * @return m, the number of rows. + */ + @Override + public final int getRowDimensionality() { + return elements.length; + } + + /** + * Returns the dimensionality of the columns of this matrix. + * + * @return n, the number of columns. + */ + @Override + public final int getColumnDimensionality() { + return columndimension; + } + + /** + * Get a single element. + * + * @param i Row index. + * @param j Column index. + * @return A(i,j) + * @throws ArrayIndexOutOfBoundsException on bounds error + */ + @Override + public final double get(final int i, final int j) { + return elements[i][j]; + } + + /** + * Set a single element. + * + * @param i Row index. + * @param j Column index. + * @param s A(i,j). + * @return modified matrix + * @throws ArrayIndexOutOfBoundsException on bounds error + */ + @Override + public final Matrix set(final int i, final int j, final double s) { + elements[i][j] = s; + return this; + } + + /** + * Increments a single element. + * + * @param i the row index + * @param j the column index + * @param s the increment value: A(i,j) = A(i.j) + s. + * @return modified matrix + * @throws ArrayIndexOutOfBoundsException on bounds error + */ + @Override + public final Matrix increment(final int i, final int j, final double s) { + elements[i][j] += s; + return this; +} + + /** + * Make a one-dimensional row packed copy of the internal array. + * + * @return Matrix elements packed in a one-dimensional array by rows. + */ + public final double[] getRowPackedCopy() { + double[] vals = new double[elements.length * columndimension]; + for(int i = 0; i < elements.length; i++) { + for(int j = 0; j < columndimension; j++) { + vals[i * columndimension + j] = elements[i][j]; + } + } + return vals; + } + + /** + * Make a one-dimensional column packed copy of the internal array. + * + * @return Matrix elements packed in a one-dimensional array by columns. + */ + public final double[] getColumnPackedCopy() { + final double[] vals = new double[elements.length * columndimension]; + for(int i = 0; i < elements.length; i++) { + for(int j = 0; j < columndimension; j++) { + vals[i + j * elements.length] = elements[i][j]; + } + } + return vals; + } + + /** + * Get a submatrix. + * + * @param i0 Initial row index + * @param i1 Final row index + * @param j0 Initial column index + * @param j1 Final column index + * @return A(i0:i1,j0:j1) + * @throws ArrayIndexOutOfBoundsException Submatrix indices + */ + public final Matrix getMatrix(final int i0, final int i1, final int j0, final int j1) { + final Matrix X = new Matrix(i1 - i0 + 1, j1 - j0 + 1); + try { + for(int i = i0; i <= i1; i++) { + for(int j = j0; j <= j1; j++) { + X.elements[i - i0][j - j0] = elements[i][j]; + } + } + } + catch(ArrayIndexOutOfBoundsException e) { + throw new ArrayIndexOutOfBoundsException("Submatrix indices"); + } + return X; + } + + /** + * Get a submatrix. + * + * @param r Array of row indices. + * @param c Array of column indices. + * @return A(r(:),c(:)) + * @throws ArrayIndexOutOfBoundsException Submatrix indices + */ + public final Matrix getMatrix(final int[] r, final int[] c) { + final Matrix X = new Matrix(r.length, c.length); + try { + for(int i = 0; i < r.length; i++) { + for(int j = 0; j < c.length; j++) { + X.elements[i][j] = elements[r[i]][c[j]]; + } + } + } + catch(ArrayIndexOutOfBoundsException e) { + throw new ArrayIndexOutOfBoundsException("Submatrix indices"); + } + return X; + } + + /** + * Get a submatrix. + * + * @param r Array of row indices. + * @param j0 Initial column index + * @param j1 Final column index + * @return A(r(:),j0:j1) + * @throws ArrayIndexOutOfBoundsException Submatrix indices + */ + public final Matrix getMatrix(final int[] r, final int j0, final int j1) { + final Matrix X = new Matrix(r.length, j1 - j0 + 1); + try { + for(int i = 0; i < r.length; i++) { + for(int j = j0; j <= j1; j++) { + X.elements[i][j - j0] = elements[r[i]][j]; + } + } + } + catch(ArrayIndexOutOfBoundsException e) { + throw new ArrayIndexOutOfBoundsException("Submatrix indices"); + } + return X; + } + + /** + * Get a submatrix. + * + * @param i0 Initial row index + * @param i1 Final row index + * @param c Array of column indices. + * @return A(i0:i1,c(:)) + * @throws ArrayIndexOutOfBoundsException Submatrix indices + */ + public final Matrix getMatrix(final int i0, final int i1, final int[] c) { + final Matrix X = new Matrix(i1 - i0 + 1, c.length); + try { + for(int i = i0; i <= i1; i++) { + for(int j = 0; j < c.length; j++) { + X.elements[i - i0][j] = elements[i][c[j]]; + } + } + } + catch(ArrayIndexOutOfBoundsException e) { + throw new ArrayIndexOutOfBoundsException("Submatrix indices"); + } + return X; + } + + /** + * Set a submatrix. + * + * @param i0 Initial row index + * @param i1 Final row index + * @param j0 Initial column index + * @param j1 Final column index + * @param X A(i0:i1,j0:j1) + * @throws ArrayIndexOutOfBoundsException Submatrix indices + */ + public final void setMatrix(final int i0, final int i1, final int j0, final int j1, final Matrix X) { + try { + for(int i = i0; i <= i1; i++) { + for(int j = j0; j <= j1; j++) { + elements[i][j] = X.elements[i - i0][j - j0]; + } + } + } + catch(ArrayIndexOutOfBoundsException e) { + throw new ArrayIndexOutOfBoundsException("Submatrix indices: " + e); + } + } + + /** + * Set a submatrix. + * + * @param r Array of row indices. + * @param c Array of column indices. + * @param X A(r(:),c(:)) + * @throws ArrayIndexOutOfBoundsException Submatrix indices + */ + public final void setMatrix(final int[] r, final int[] c, final Matrix X) { + try { + for(int i = 0; i < r.length; i++) { + for(int j = 0; j < c.length; j++) { + elements[r[i]][c[j]] = X.elements[i][j]; + } + } + } + catch(ArrayIndexOutOfBoundsException e) { + throw new ArrayIndexOutOfBoundsException("Submatrix indices"); + } + } + + /** + * Set a submatrix. + * + * @param r Array of row indices. + * @param j0 Initial column index + * @param j1 Final column index + * @param X A(r(:),j0:j1) + * @throws ArrayIndexOutOfBoundsException Submatrix indices + */ + public final void setMatrix(final int[] r, final int j0, final int j1, final Matrix X) { + try { + for(int i = 0; i < r.length; i++) { + for(int j = j0; j <= j1; j++) { + elements[r[i]][j] = X.elements[i][j - j0]; + } + } + } + catch(ArrayIndexOutOfBoundsException e) { + throw new ArrayIndexOutOfBoundsException("Submatrix indices"); + } + } + + /** + * Set a submatrix. + * + * @param i0 Initial row index + * @param i1 Final row index + * @param c Array of column indices. + * @param X A(i0:i1,c(:)) + * @throws ArrayIndexOutOfBoundsException Submatrix indices + */ + public final void setMatrix(final int i0, final int i1, final int[] c, final Matrix X) { + try { + for(int i = i0; i <= i1; i++) { + for(int j = 0; j < c.length; j++) { + elements[i][c[j]] = X.elements[i - i0][j]; + } + } + } + catch(ArrayIndexOutOfBoundsException e) { + throw new ArrayIndexOutOfBoundsException("Submatrix indices"); + } + } + + /** + * Returns the <code>i</code>th row of this matrix. + * + * @param i the index of the row to be returned + * @return the <code>i</code>th row of this matrix + */ + public final Matrix getRow(final int i) { + return getMatrix(i, i, 0, columndimension - 1); + } + + /** + * Returns the <code>i</code>th row of this matrix as vector. + * + * @param i the index of the row to be returned + * @return the <code>i</code>th row of this matrix + */ + public final Vector getRowVector(final int i) { + double[] row = elements[i].clone(); + return new Vector(row); + } + + /** + * Sets the <code>j</code>th row of this matrix to the specified vector. + * + * @param j the index of the row to be set + * @param row the value of the row to be set + */ + public final void setRow(final int j, final Matrix row) { + if(row.columndimension != columndimension) { + throw new IllegalArgumentException("Matrix must consist of the same no of columns!"); + } + if(row.elements.length != 1) { + throw new IllegalArgumentException("Matrix must consist of one row!"); + } + setMatrix(elements.length - 1, 0, j, j, row); + } + + /** + * Sets the <code>j</code>th row of this matrix to the specified vector. + * + * @param j the index of the column to be set + * @param row the value of the column to be set + */ + public final void setRowVector(final int j, final Vector row) { + if(row.elements.length != columndimension) { + throw new IllegalArgumentException("Matrix must consist of the same no of columns!"); + } + for(int i = 0; i < columndimension; i++) { + elements[j][i] = row.elements[i]; + } + } + + /** + * Returns the <code>j</code>th column of this matrix. + * + * @param j the index of the column to be returned + * @return the <code>j</code>th column of this matrix + */ + public final Matrix getColumn(final int j) { + return getMatrix(0, elements.length - 1, j, j); + } + + /** + * Returns the <code>j</code>th column of this matrix as vector. + * + * @param j the index of the column to be returned + * @return the <code>j</code>th column of this matrix + */ + @Override + public final Vector getColumnVector(final int j) { + final Vector v = new Vector(elements.length); + for(int i = 0; i < elements.length; i++) { + v.elements[i] = elements[i][j]; + } + return v; + } + + /** + * Sets the <code>j</code>th column of this matrix to the specified column. + * + * @param j the index of the column to be set + * @param column the value of the column to be set + */ + public final void setColumn(final int j, final Matrix column) { + if(column.elements.length != elements.length) { + throw new IllegalArgumentException("Matrix must consist of the same no of rows!"); + } + if(column.columndimension != 1) { + throw new IllegalArgumentException("Matrix must consist of one column!"); + } + setMatrix(0, elements.length - 1, j, j, column); + } + + /** + * Sets the <code>j</code>th column of this matrix to the specified column. + * + * @param j the index of the column to be set + * @param column the value of the column to be set + */ + public final void setColumnVector(final int j, final Vector column) { + if(column.elements.length != elements.length) { + throw new IllegalArgumentException("Matrix must consist of the same no of rows!"); + } + for(int i = 0; i < elements.length; i++) { + elements[i][j] = column.elements[i]; + } + } + + /** + * Matrix transpose. + * + * @return A<sup>T</sup> + */ + @Override + public final Matrix transpose() { + final Matrix X = new Matrix(columndimension, elements.length); + for(int i = 0; i < elements.length; i++) { + for(int j = 0; j < columndimension; j++) { + X.elements[j][i] = elements[i][j]; + } + } + return X; + } + + /** + * C = A + B + * + * @param B another matrix + * @return A + B in a new Matrix + */ + @Override + public final Matrix plus(final Matrix B) { + return copy().plusEquals(B); + } + + /** + * C = A + s * B + * + * @param B another matrix + * @param s scalar + * @return A + s * B in a new Matrix + */ + @Override + public final Matrix plusTimes(final Matrix B, final double s) { + return copy().plusTimesEquals(B, s); + } + + /** + * A = A + B + * + * @param B another matrix + * @return A + B in this Matrix + */ + @Override + public final Matrix plusEquals(final Matrix B) { + checkMatrixDimensions(B); + for(int i = 0; i < elements.length; i++) { + for(int j = 0; j < columndimension; j++) { + elements[i][j] += B.elements[i][j]; + } + } + return this; + } + + /** + * A = A + s * B + * + * @param B another matrix + * @param s Scalar + * @return A + s * B in this Matrix + */ + @Override + public final Matrix plusTimesEquals(final Matrix B, final double s) { + checkMatrixDimensions(B); + for(int i = 0; i < elements.length; i++) { + for(int j = 0; j < columndimension; j++) { + elements[i][j] += s * B.elements[i][j]; + } + } + return this; + } + + /** + * C = A - B + * + * @param B another matrix + * @return A - B in a new Matrix + */ + @Override + public final Matrix minus(final Matrix B) { + return copy().minusEquals(B); + } + + /** + * C = A - s * B + * + * @param B another matrix + * @param s Scalar + * @return A - s * B in a new Matrix + */ + @Override + public final Matrix minusTimes(final Matrix B, final double s) { + return copy().minusTimesEquals(B, s); + } + + /** + * A = A - B + * + * @param B another matrix + * @return A - B in this Matrix + */ + @Override + public final Matrix minusEquals(final Matrix B) { + checkMatrixDimensions(B); + for(int i = 0; i < elements.length; i++) { + for(int j = 0; j < columndimension; j++) { + elements[i][j] -= B.elements[i][j]; + } + } + return this; + } + + /** + * A = A - s * B + * + * @param B another matrix + * @param s Scalar + * @return A - s * B in this Matrix + */ + @Override + public final Matrix minusTimesEquals(final Matrix B, final double s) { + checkMatrixDimensions(B); + for(int i = 0; i < elements.length; i++) { + for(int j = 0; j < columndimension; j++) { + elements[i][j] -= s * B.elements[i][j]; + } + } + return this; + } + + /** + * Multiply a matrix by a scalar, C = s*A + * + * @param s scalar + * @return s*A + */ + @Override + public final Matrix times(final double s) { + return copy().timesEquals(s); + } + + /** + * Multiply a matrix by a scalar in place, A = s*A + * + * @param s scalar + * @return replace A by s*A + */ + @Override + public final Matrix timesEquals(final double s) { + for(int i = 0; i < elements.length; i++) { + for(int j = 0; j < columndimension; j++) { + elements[i][j] *= s; + } + } + return this; + } + + /** + * Linear algebraic matrix multiplication, A * B + * + * @param B another matrix + * @return Matrix product, A * B + * @throws IllegalArgumentException Matrix inner dimensions must agree. + */ + public final Matrix times(final Matrix B) { + // Optimized implementation, exploiting the storage layout + if(B.elements.length != this.columndimension) { + throw new IllegalArgumentException("Matrix inner dimensions must agree: "+getRowDimensionality()+","+getColumnDimensionality()+" * "+B.getRowDimensionality()+","+B.getColumnDimensionality()); + } + final Matrix X = new Matrix(this.elements.length, B.columndimension); + // Optimized ala Jama. jik order. + final double[] Bcolj = new double[this.columndimension]; + for(int j = 0; j < X.columndimension; j++) { + // Make a linear copy of column j from B + // TODO: use column getter from B? + for(int k = 0; k < this.columndimension; k++) { + Bcolj[k] = B.elements[k][j]; + } + // multiply it with each row from A + for(int i = 0; i < this.elements.length; i++) { + final double[] Arowi = this.elements[i]; + double s = 0; + for(int k = 0; k < this.columndimension; k++) { + s += Arowi[k] * Bcolj[k]; + } + X.elements[i][j] = s; + } + } + return X; + } + + /** + * Linear algebraic matrix multiplication, A * B + * + * @param B a vector + * @return Matrix product, A * B + * @throws IllegalArgumentException Matrix inner dimensions must agree. + */ + public final Vector times(final Vector B) { + if(B.elements.length != this.columndimension) { + throw new IllegalArgumentException("Matrix inner dimensions must agree."); + } + final Vector X = new Vector(this.elements.length); + // multiply it with each row from A + for(int i = 0; i < this.elements.length; i++) { + final double[] Arowi = this.elements[i]; + double s = 0; + for(int k = 0; k < this.columndimension; k++) { + s += Arowi[k] * B.elements[k]; + } + X.elements[i] = s; + } + return X; + } + + /** + * Linear algebraic matrix multiplication, A<sup>T</sup> * B + * + * @param B another matrix + * @return Matrix product, A<sup>T</sup> * B + * @throws IllegalArgumentException Matrix inner dimensions must agree. + */ + public final Vector transposeTimes(final Vector B) { + if(B.elements.length != elements.length) { + throw new IllegalArgumentException("Matrix inner dimensions must agree."); + } + final Vector X = new Vector(this.columndimension); + // multiply it with each row from A + for(int i = 0; i < this.columndimension; i++) { + double s = 0; + for(int k = 0; k < elements.length; k++) { + s += elements[k][i] * B.elements[k]; + } + X.elements[i] = s; + } + return X; + } + + /** + * Linear algebraic matrix multiplication, A<sup>T</sup> * B + * + * @param B another matrix + * @return Matrix product, A<sup>T</sup> * B + * @throws IllegalArgumentException Matrix inner dimensions must agree. + */ + public final Matrix transposeTimes(final Matrix B) { + if(B.elements.length != elements.length) { + throw new IllegalArgumentException("Matrix inner dimensions must agree."); + } + final Matrix X = new Matrix(this.columndimension, B.columndimension); + final double[] Bcolj = new double[elements.length]; + for(int j = 0; j < X.columndimension; j++) { + // Make a linear copy of column j from B + for(int k = 0; k < elements.length; k++) { + Bcolj[k] = B.elements[k][j]; + } + // multiply it with each row from A + for(int i = 0; i < this.columndimension; i++) { + double s = 0; + for(int k = 0; k < elements.length; k++) { + s += elements[k][i] * Bcolj[k]; + } + X.elements[i][j] = s; + } + } + return X; + } + + /** + * Linear algebraic matrix multiplication, A * B^T + * + * @param B another matrix + * @return Matrix product, A * B^T + * @throws IllegalArgumentException Matrix inner dimensions must agree. + */ + public final Matrix timesTranspose(final Matrix B) { + if(B.columndimension != this.columndimension) { + throw new IllegalArgumentException("Matrix inner dimensions must agree."); + } + final Matrix X = new Matrix(this.elements.length, B.elements.length); + for(int j = 0; j < X.elements.length; j++) { + final double[] Browj = B.elements[j]; + // multiply it with each row from A + for(int i = 0; i < this.elements.length; i++) { + final double[] Arowi = this.elements[i]; + double s = 0; + for(int k = 0; k < this.columndimension; k++) { + s += Arowi[k] * Browj[k]; + } + X.elements[i][j] = s; + } + } + return X; + } + + /** + * Linear algebraic matrix multiplication, A^T * B^T. Computed as (B*A)^T + * + * @param B another matrix + * @return Matrix product, A^T * B^T + * @throws IllegalArgumentException Matrix inner dimensions must agree. + */ + public final Matrix transposeTimesTranspose(Matrix B) { + // Optimized implementation, exploiting the storage layout + if(this.elements.length != B.columndimension) { + throw new IllegalArgumentException("Matrix inner dimensions must agree: "+getRowDimensionality()+","+getColumnDimensionality()+" * "+B.getRowDimensionality()+","+B.getColumnDimensionality()); + } + final Matrix X = new Matrix(this.columndimension, B.elements.length); + // Optimized ala Jama. jik order. + final double[] Acolj = new double[this.elements.length]; + for(int j = 0; j < X.elements.length; j++) { + // Make a linear copy of column j from B + for(int k = 0; k < this.elements.length; k++) { + Acolj[k] = this.elements[k][j]; + } + final double[] Xrow = X.elements[j]; + // multiply it with each row from A + for(int i = 0; i < B.elements.length; i++) { + final double[] Browi = B.elements[i]; + double s = 0; + for(int k = 0; k < B.columndimension; k++) { + s += Browi[k] * Acolj[k]; + } + Xrow[i] = s; + } + } + return X; + } + + /** + * Returns the scalar product of the colA column of this and the colB column + * of B. + * + * @param colA The column of A to compute scalar product for + * @param B second Matrix + * @param colB The column of B to compute scalar product for + * @return double The scalar product of the first column of this and B + */ + public double scalarProduct(int colA, Matrix B, int colB) { + double scalarProduct = 0.0; + for(int row = 0; row < getRowDimensionality(); row++) { + double prod = elements[row][colA] * B.elements[row][colB]; + scalarProduct += prod; + } + return scalarProduct; + } + + /** + * Returns the scalar product of the colA column of this and the colB column + * of B. + * + * @param colA The column of A to compute scalar product for + * @param B Vector + * @return double The scalar product of the first column of this and B + */ + public double scalarProduct(int colA, Vector B) { + double scalarProduct = 0.0; + for(int row = 0; row < getRowDimensionality(); row++) { + double prod = elements[row][colA] * B.elements[row]; + scalarProduct += prod; + } + return scalarProduct; + } + + /** + * LU Decomposition + * + * @return LUDecomposition + * @see LUDecomposition + */ + public final LUDecomposition lu() { + return new LUDecomposition(this); + } + + /** + * QR Decomposition + * + * @return QRDecomposition + * @see QRDecomposition + */ + public final QRDecomposition qr() { + return new QRDecomposition(this); + } + + /** + * Cholesky Decomposition + * + * @return CholeskyDecomposition + * @see CholeskyDecomposition + */ + public final CholeskyDecomposition chol() { + return new CholeskyDecomposition(this); + } + + /** + * Singular Value Decomposition + * + * @return SingularValueDecomposition + * @see SingularValueDecomposition + */ + public final SingularValueDecomposition svd() { + return new SingularValueDecomposition(this); + } + + /** + * Eigenvalue Decomposition + * + * @return EigenvalueDecomposition + * @see EigenvalueDecomposition + */ + public final EigenvalueDecomposition eig() { + return new EigenvalueDecomposition(this); + } + + /** + * Solve A*X = B + * + * @param B right hand side + * @return solution if A is square, least squares solution otherwise + */ + public final Matrix solve(final Matrix B) { + return (elements.length == columndimension ? (new LUDecomposition(this)).solve(B) : (new QRDecomposition(this)).solve(B)); + } + + /** + * Solve X*A = B, which is also A'*X' = B' + * + * @param B right hand side + * @return solution if A is square, least squares solution otherwise. + */ + public final Matrix solveTranspose(final Matrix B) { + return transpose().solve(B.transpose()); + } + + /** + * Matrix inverse or pseudoinverse + * + * @return inverse(A) if A is square, pseudoinverse otherwise. + */ + public final Matrix inverse() { + return solve(identity(elements.length, elements.length)); + } + + /** + * Matrix determinant + * + * @return determinant + */ + public final double det() { + return new LUDecomposition(this).det(); + } + + /** + * Matrix rank + * + * @return effective numerical rank, obtained from SVD. + */ + public final int rank() { + return new SingularValueDecomposition(this).rank(); + } + + /** + * Matrix condition (2 norm) + * + * @return ratio of largest to smallest singular value. + */ + public final double cond() { + return new SingularValueDecomposition(this).cond(); + } + + /** + * Matrix trace. + * + * @return sum of the diagonal elements. + */ + public final double trace() { + double t = 0; + for(int i = 0; i < Math.min(elements.length, columndimension); i++) { + t += elements[i][i]; + } + return t; + } + + /** + * One norm + * + * @return maximum column sum. + */ + public double norm1() { + double f = 0; + for(int j = 0; j < columndimension; j++) { + double s = 0; + for(int i = 0; i < elements.length; i++) { + s += Math.abs(elements[i][j]); + } + f = Math.max(f, s); + } + return f; + } + + /** + * Two norm + * + * @return maximum singular value. + */ + public final double norm2() { + return (new SingularValueDecomposition(this).norm2()); + } + + /** + * Infinity norm + * + * @return maximum row sum. + */ + public double normInf() { + double f = 0; + for(int i = 0; i < elements.length; i++) { + double s = 0; + for(int j = 0; j < columndimension; j++) { + s += Math.abs(elements[i][j]); + } + f = Math.max(f, s); + } + return f; + } + + /** + * Frobenius norm + * + * @return sqrt of sum of squares of all elements. + */ + public double normF() { + double f = 0; + for(int i = 0; i < elements.length; i++) { + for(int j = 0; j < columndimension; j++) { + f = MathUtil.hypotenuse(f, elements[i][j]); + } + } + return f; + } + + /** + * distanceCov returns distance of two Matrices A and B, i.e. the root of the + * sum of the squared distances A<sub>ij</sub>-B<sub>ij</sub>. + * + * @param B Matrix to compute distance from this (A) + * @return distance of Matrices + */ + // TODO: unused - remove / move into a MatrixDistance helper? + public final double distanceCov(final Matrix B) { + double distance = 0.0; + double distIJ; + int row; + for(int col = 0; col < columndimension; col++) { + for(row = 0; row < elements.length; row++) { + distIJ = elements[row][col] - B.elements[row][col]; + distance += (distIJ * distIJ); + } + } + distance = Math.sqrt(distance); + return distance; + } + + /** + * getDiagonal returns array of diagonal-elements. + * + * @return double[] the values on the diagonal of the Matrix + */ + public final double[] getDiagonal() { + int n = Math.min(columndimension, elements.length); + final double[] diagonal = new double[n]; + for(int i = 0; i < n; i++) { + diagonal[i] = elements[i][i]; + } + return diagonal; + } + + /** + * Normalizes the columns of this matrix to length of 1.0. + */ + public void normalizeColumns() { + for(int col = 0; col < columndimension; col++) { + double norm = 0.0; + for(int row = 0; row < elements.length; row++) { + norm = norm + (elements[row][col] * elements[row][col]); + } + norm = Math.sqrt(norm); + if(norm != 0) { + for(int row = 0; row < elements.length; row++) { + elements[row][col] /= norm; + } + } + // TODO: else: throw an exception? + } + } + + /** + * Returns true if the specified column matrix <code>a</code> is linearly + * independent to the columns of this matrix. Linearly independence is given, + * if the matrix resulting from appending <code>a</code> to this matrix has + * full rank. + * + * @param columnMatrix the column matrix to be tested for linear independence + * @return true if the specified column matrix is linearly independent to the + * columns of this matrix + */ + public final boolean linearlyIndependent(final Matrix columnMatrix) { + if(columnMatrix.columndimension != 1) { + throw new IllegalArgumentException("a.getColumnDimension() != 1"); + } + if(this.elements.length != columnMatrix.elements.length) { + throw new IllegalArgumentException("a.getRowDimension() != b.getRowDimension()"); + } + if(this.columndimension + columnMatrix.columndimension > this.elements.length) { + return false; + } + final StringBuffer msg = LoggingConfiguration.DEBUG ? new StringBuffer() : null; + + final double[][] a = new double[columndimension + 1][elements.length - 1]; + final double[] b = new double[columndimension + 1]; + + for(int i = 0; i < a.length; i++) { + for(int j = 0; j < a[i].length; j++) { + if(i < columndimension) { + a[i][j] = elements[j][i]; + } + else { + a[i][j] = columnMatrix.elements[j][0]; + } + } + } + + for(int i = 0; i < b.length; i++) { + if(i < columndimension) { + b[i] = elements[elements.length - 1][i]; + } + else { + b[i] = columnMatrix.elements[i][0]; + } + } + + final LinearEquationSystem les = new LinearEquationSystem(a, b); + les.solveByTotalPivotSearch(); + + final double[][] coefficients = les.getCoefficents(); + final double[] rhs = les.getRHS(); + + if(msg != null) { + msg.append("\na' " + FormatUtil.format(this.getArrayRef())); + msg.append("\nb' " + FormatUtil.format(columnMatrix.getColumnPackedCopy())); + + msg.append("\na " + FormatUtil.format(a)); + msg.append("\nb " + FormatUtil.format(b)); + msg.append("\nleq " + les.equationsToString(4)); + } + + for(int i = 0; i < coefficients.length; i++) { + boolean allCoefficientsZero = true; + for(int j = 0; j < coefficients[i].length; j++) { + final double value = coefficients[i][j]; + if(Math.abs(value) > DELTA) { + allCoefficientsZero = false; + break; + } + } + // allCoefficients=0 && rhs=0 -> linearly dependent + if(allCoefficientsZero) { + final double value = rhs[i]; + if(Math.abs(value) < DELTA) { + if(msg != null) { + msg.append("\nvalue " + value + "[" + i + "]"); + msg.append("\nlinearly independent " + false); + Logger.getLogger(this.getClass().getName()).fine(msg.toString()); + } + return false; + } + } + } + + if(msg != null) { + msg.append("\nlinearly independent " + true); + Logger.getLogger(this.getClass().getName()).fine(msg.toString()); + } + return true; + } + + /** + * Returns a matrix derived by Gauss-Jordan-elimination using RationalNumbers + * for the transformations. + * + * @return a matrix derived by Gauss-Jordan-elimination using RationalNumbers + * for the transformations + */ + public final Matrix exactGaussJordanElimination() { + final RationalNumber[][] gauss = exactGaussElimination(); + + // reduced form + for(int row = gauss.length - 1; row > 0; row--) { + int firstCol = -1; + for(int col = 0; col < gauss[row].length && firstCol == -1; col++) { + // if(gauss.get(row, col) != 0.0) // i.e. == 1 + if(gauss[row][col].equals(RationalNumber.ONE)) { + firstCol = col; + } + } + if(firstCol > -1) { + for(int currentRow = row - 1; currentRow >= 0; currentRow--) { + RationalNumber multiplier = gauss[currentRow][firstCol].copy(); + for(int col = firstCol; col < gauss[currentRow].length; col++) { + RationalNumber subtrahent = gauss[row][col].times(multiplier); + gauss[currentRow][col] = gauss[currentRow][col].minus(subtrahent); + } + } + } + } + return new Matrix(gauss); + } + + /** + * Perform an exact Gauss-elimination of this Matrix using RationalNumbers to + * yield highest possible accuracy. + * + * @return an array of arrays of RationalNumbers representing the + * Gauss-eliminated form of this Matrix + */ + private final RationalNumber[][] exactGaussElimination() { + final RationalNumber[][] gauss = new RationalNumber[elements.length][this.columndimension]; + for(int row = 0; row < elements.length; row++) { + for(int col = 0; col < this.columndimension; col++) { + gauss[row][col] = new RationalNumber(elements[row][col]); + } + } + return exactGaussElimination(gauss); + } + + /** + * Perform recursive Gauss-elimination on the given matrix of RationalNumbers. + * + * @param gauss an array of arrays of RationalNumber + * @return recursive derived Gauss-elimination-form of the given matrix of + * RationalNumbers + */ + private static final RationalNumber[][] exactGaussElimination(final RationalNumber[][] gauss) { + int firstCol = -1; + int firstRow = -1; + + // 1. find first column unequal to zero + for(int col = 0; col < gauss[0].length && firstCol == -1; col++) { + for(int row = 0; row < gauss.length && firstCol == -1; row++) { + // if(gauss.get(row, col) != 0.0) + if(!gauss[row][col].equals(RationalNumber.ZERO)) { + firstCol = col; + firstRow = row; + } + } + } + + // 2. set row as first row + if(firstCol != -1) { + if(firstRow != 0) { + final RationalNumber[] row = new RationalNumber[gauss[firstRow].length]; + System.arraycopy(gauss[firstRow], 0, row, 0, gauss[firstRow].length); + System.arraycopy(gauss[0], 0, gauss[firstRow], 0, gauss[firstRow].length); + System.arraycopy(row, 0, gauss[0], 0, row.length); + } + + // 3. create leading 1 + if(!gauss[0][firstCol].equals(RationalNumber.ONE)) { + final RationalNumber inverse = gauss[0][firstCol].multiplicativeInverse(); + for(int col = 0; col < gauss[0].length; col++) { + gauss[0][col] = gauss[0][col].times(inverse); + } + } + + // 4. eliminate values unequal to zero below leading 1 + for(int row = 1; row < gauss.length; row++) { + final RationalNumber multiplier = gauss[row][firstCol].copy(); + // if(multiplier != 0.0) + if(!multiplier.equals(RationalNumber.ZERO)) { + for(int col = firstCol; col < gauss[row].length; col++) { + final RationalNumber subtrahent = gauss[0][col].times(multiplier); + gauss[row][col] = gauss[row][col].minus(subtrahent); + } + } + } + + // 5. recursion + if(gauss.length > 1) { + final RationalNumber[][] subMatrix = new RationalNumber[gauss.length - 1][gauss[1].length]; + System.arraycopy(gauss, 1, subMatrix, 0, gauss.length - 1); + final RationalNumber[][] eliminatedSubMatrix = exactGaussElimination(subMatrix); + System.arraycopy(eliminatedSubMatrix, 0, gauss, 1, eliminatedSubMatrix.length); + } + } + return gauss; + } + + /** + * Returns true, if this matrix is symmetric, false otherwise. + * + * @return true, if this matrix is symmetric, false otherwise + */ + public final boolean isSymmetric() { + if(elements.length != columndimension) { + return false; + } + for(int i = 0; i < elements.length; i++) { + for(int j = i + 1; j < columndimension; j++) { + if(elements[i][j] != elements[j][i]) { + return false; + } + } + } + return true; + } + + /** + * Completes this d x c basis of a subspace of R^d to a d x d basis of R^d, + * i.e. appends c-d columns to this basis. + * + * @return the appended columns + */ + public final Matrix completeBasis() { + Matrix basis = copy(); + Matrix result = null; + for(int i = 0; i < elements.length; i++) { + final Matrix e_i = new Matrix(elements.length, 1); + e_i.elements[0][i] = 1.0; + final boolean li = basis.linearlyIndependent(e_i); + + if(li) { + if(result == null) { + result = e_i.copy(); + } + else { + result = result.appendColumns(e_i); + } + basis = basis.appendColumns(e_i); + } + } + return result; + } + + /** + * Completes this d x c basis of a subspace of R^d to a d x d basis of R^d, + * i.e. appends c-d columns to this basis. + * + * @return the appended columns + */ + public final Matrix completeToOrthonormalBasis() { + Matrix basis = copy(); + Matrix result = null; + for(int i = 0; i < elements.length; i++) { + final Matrix e_i = new Matrix(elements.length, 1); + e_i.elements[i][0] = 1.0; + final boolean li = basis.linearlyIndependent(e_i); + + if(li) { + if(result == null) { + result = e_i.copy(); + } + else { + result = result.appendColumns(e_i); + } + basis = basis.appendColumns(e_i); + } + } + basis = basis.orthonormalize(); + return basis.getMatrix(0, basis.elements.length - 1, columndimension, basis.columndimension - 1); + } + + /** + * Returns a matrix which consists of this matrix and the specified columns. + * + * @param columns the columns to be appended + * @return the new matrix with the appended columns + */ + public final Matrix appendColumns(final Matrix columns) { + if(elements.length != columns.elements.length) { + throw new IllegalArgumentException("m.getRowDimension() != column.getRowDimension()"); + } + + final Matrix result = new Matrix(elements.length, columndimension + columns.columndimension); + for(int i = 0; i < result.columndimension; i++) { + // FIXME: optimize - excess copying! + if(i < columndimension) { + result.setColumn(i, getColumn(i)); + } + else { + result.setColumn(i, columns.getColumn(i - columndimension)); + } + } + return result; + } + + /** + * Returns an orthonormalization of this matrix. + * + * @return the orthonormalized matrix + */ + public final Matrix orthonormalize() { + Matrix v = getColumn(0); + + // FIXME: optimize - excess copying! + for(int i = 1; i < columndimension; i++) { + final Matrix u_i = getColumn(i); + final Matrix sum = new Matrix(elements.length, 1); + for(int j = 0; j < i; j++) { + final Matrix v_j = v.getColumn(j); + double scalar = u_i.scalarProduct(0, v_j, 0) / v_j.scalarProduct(0, v_j, 0); + sum.plusEquals(v_j.times(scalar)); + } + final Matrix v_i = u_i.minus(sum); + v = v.appendColumns(v_i); + } + + v.normalizeColumns(); + return v; + } + + /** + * Adds a given value to the diagonal entries if the entry is smaller than the + * constant. + * + * @param constant value to add to the diagonal entries + * @return a new Matrix differing from this Matrix by the given value added to + * the diagonal entries + */ + public final Matrix cheatToAvoidSingularity(final double constant) { + final Matrix a = this.copy(); + for(int i = 0; i < a.columndimension && i < a.elements.length; i++) { + // if(a.get(i, i) < constant) + { + a.elements[i][i] += constant; + } + } + return a; + } + + /** + * Read a matrix from a stream. The format is the same the print method, so + * printed matrices can be read back in (provided they were printed using US + * Locale). Elements are separated by whitespace, all the elements for each + * row appear on a single line, the last row is followed by a blank line. + * + * @param input the input stream. + * @return New matrix + * @throws java.io.IOException on input error + */ + public static final Matrix read(final BufferedReader input) throws java.io.IOException { + final StreamTokenizer tokenizer = new StreamTokenizer(input); + + // Although StreamTokenizer will parse numbers, it doesn't recognize + // scientific notation (E or D); however, Double.valueOf does. + // The strategy here is to disable StreamTokenizer's number parsing. + // We'll only get whitespace delimited words, EOL's and EOF's. + // These words should all be numbers, for Double.valueOf to parse. + + tokenizer.resetSyntax(); + tokenizer.wordChars(0, 255); + tokenizer.whitespaceChars(0, ' '); + tokenizer.eolIsSignificant(true); + java.util.Vector<Double> v = new java.util.Vector<Double>(); + + // Ignore initial empty lines + while(tokenizer.nextToken() == StreamTokenizer.TT_EOL) { + // ignore initial empty lines + } + if(tokenizer.ttype == StreamTokenizer.TT_EOF) { + throw new java.io.IOException("Unexpected EOF on matrix read."); + } + do { + v.addElement(Double.valueOf(tokenizer.sval)); // Read & store 1st + // row. + } + while(tokenizer.nextToken() == StreamTokenizer.TT_WORD); + + int n = v.size(); // Now we've got the number of columns! + double row[] = new double[n]; + for(int j = 0; j < n; j++) { + // extract the elements of the 1st row. + row[j] = v.elementAt(j); + } + // v.removeAllElements(); + java.util.Vector<double[]> rowV = new java.util.Vector<double[]>(); + rowV.addElement(row); // Start storing rows instead of columns. + while(tokenizer.nextToken() == StreamTokenizer.TT_WORD) { + // While non-empty lines + rowV.addElement(row = new double[n]); + int j = 0; + do { + if(j >= n) { + throw new java.io.IOException("Row " + v.size() + " is too long."); + } + row[j++] = (Double.valueOf(tokenizer.sval)); + } + while(tokenizer.nextToken() == StreamTokenizer.TT_WORD); + if(j < n) { + throw new java.io.IOException("Row " + v.size() + " is too short."); + } + } + int m = rowV.size(); // Now we've got the number of rows. + double[][] A = new double[m][]; + rowV.copyInto(A); // copy the rows out of the vector + return new Matrix(A); + } + + /** + * Check if size(A) == size(B) + */ + protected void checkMatrixDimensions(MatrixLike<?> B) { + if(B.getRowDimensionality() != getRowDimensionality() || B.getColumnDimensionality() != getColumnDimensionality()) { + throw new IllegalArgumentException("Matrix dimensions must agree."); + } + } + + @Override + public int hashCode() { + final int PRIME = 31; + int result = 1; + result = PRIME * result + Arrays.hashCode(this.elements); + result = PRIME * result + this.elements.length; + result = PRIME * result + this.columndimension; + return result; + } + + @Override + public boolean equals(Object obj) { + if(this == obj) { + return true; + } + if(obj == null) { + return false; + } + if(getClass() != obj.getClass()) { + return false; + } + final Matrix other = (Matrix) obj; + if(this.elements.length != other.elements.length) { + return false; + } + if(this.columndimension != other.columndimension) { + return false; + } + for(int i = 0; i < this.elements.length; i++) { + for(int j = 0; j < this.columndimension; j++) { + if(this.elements[i][j] != other.elements[i][j]) { + return false; + } + } + } + return true; + } + + /** + * Compare two matrices with a delta parameter to take numerical errors into + * account. + * + * @param obj other object to compare with + * @param maxdelta maximum delta allowed + * @return true if delta smaller than maximum + */ + public boolean almostEquals(Object obj, double maxdelta) { + if(this == obj) { + return true; + } + if(obj == null) { + return false; + } + if(getClass() != obj.getClass()) { + return false; + } + final Matrix other = (Matrix) obj; + if(this.elements.length != other.elements.length) { + return false; + } + if(this.columndimension != other.columndimension) { + return false; + } + for(int i = 0; i < this.elements.length; i++) { + for(int j = 0; j < this.columndimension; j++) { + if(Math.abs(this.elements[i][j] - other.elements[i][j]) > maxdelta) { + return false; + } + } + } + return true; + } + + /** + * Compare two matrices with a delta parameter to take numerical errors into + * account. + * + * @param obj other object to compare with + * @return almost equals with delta {@link #DELTA} + */ + public boolean almostEquals(Object obj) { + return almostEquals(obj, DELTA); + } + + /** + * Returns the dimensionality of this matrix as a string. + * + * @return the dimensionality of this matrix as a string + */ + public String dimensionInfo() { + return getRowDimensionality() + " x " + getColumnDimensionality(); + } + + /** + * toString returns String-representation of Matrix. + */ + @Override + public String toString() { + return FormatUtil.format(this); + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/MatrixLike.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/MatrixLike.java new file mode 100644 index 00000000..aa783e1c --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/MatrixLike.java @@ -0,0 +1,192 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * Common Interface for Matrix and Vector objects, where M is the actual type. + * + * The type M guarantees type safety for many operations. + * + * @param M the actual type + * + * @apiviz.landmark + * + * @author Elke Achtert + * @author Erich Schubert + */ +public interface MatrixLike<M extends MatrixLike<M>> extends Cloneable { + /** + * Make a deep copy of a matrix. + * + * @return a new matrix containing the same values as this matrix + */ + public M copy(); + + /** + * Clone the Matrix object. + */ + public Object clone(); + + /** + * Returns the dimensionality of the rows of this matrix. + * + * @return m, the number of rows. + */ + public int getRowDimensionality(); + + /** + * Returns the dimensionality of the columns of this matrix. + * + * @return n, the number of columns. + */ + public int getColumnDimensionality(); + + /** + * Get a single element. + * + * @param i Row index. + * @param j Column index. + * @return A(i,j) + * @throws ArrayIndexOutOfBoundsException on bounds error + */ + public double get(int i, int j); + + /** + * Set a single element. + * + * @param i Row index. + * @param j Column index. + * @param s A(i,j). + * @throws ArrayIndexOutOfBoundsException on bounds error + */ + public M set(int i, int j, double s); + + /** + * Increments a single element. + * + * @param i the row index + * @param j the column index + * @param s the increment value: A(i,j) = A(i.j) + s. + * @throws ArrayIndexOutOfBoundsException on bounds error + */ + public M increment(int i, int j, double s); + + /** + * Returns the <code>i</code>th column of this matrix as vector. + * + * @param i the index of the column to be returned + * @return the <code>i</code>th column of this matrix + */ + public Vector getColumnVector(int i); + + /** + * Matrix transpose. + * + * @return A<sup>T</sup> + */ + public Matrix transpose(); + + /** + * C = A + B + * + * @param B another matrix + * @return A + B in a new Matrix + */ + public M plus(M B); + + /** + * C = A + s*B + * + * @param B another matrix + * @param s scalar + * @return A + s*B in a new Matrix + */ + public M plusTimes(M B, double s); + + /** + * A = A + B + * + * @param B another matrix + * @return A + B in this Matrix + */ + public M plusEquals(M B); + + /** + * C = A + s*B + * + * @param B another matrix + * @param s scalar + * @return A + s*B in this Matrix + */ + public M plusTimesEquals(M B, double s); + + /** + * C = A - B + * + * @param B another matrix + * @return A - B in a new Matrix + */ + public M minus(M B); + + /** + * C = A - s*B + * + * @param B another matrix + * @param s Scalar + * @return A - s*B in a new Matrix + */ + public M minusTimes(M B, double s); + + /** + * A = A - B + * + * @param B another matrix + * @return A - B in this Matrix + */ + public M minusEquals(M B); + + /** + * C = A - s*B + * + * @param B another matrix + * @param s Scalar + * @return A - s*B in a new Matrix + */ + public M minusTimesEquals(M B, double s); + + /** + * Multiply a matrix by a scalar, C = s*A + * + * @param s scalar + * @return s*A + */ + public M times(double s); + + /** + * Multiply a matrix by a scalar in place, A = s*A + * + * @param s scalar + * @return replace A by s*A + */ + public M timesEquals(double s); +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/ProjectedCentroid.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/ProjectedCentroid.java new file mode 100644 index 00000000..7dc136e4 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/ProjectedCentroid.java @@ -0,0 +1,162 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +import java.util.BitSet; + +import de.lmu.ifi.dbs.elki.data.NumberVector; +import de.lmu.ifi.dbs.elki.database.ids.DBID; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil; + +/** + * Centroid only using a subset of dimensions. + * + * This class abstracts the mathematics of efficient and numerically stable + * computation of projected centroids. + * + * See {@link de.lmu.ifi.dbs.elki.utilities.DatabaseUtil DatabaseUtil} for + * easier to use APIs. + * + * @author Erich Schubert + */ +public class ProjectedCentroid extends Centroid { + /** + * Serial version + */ + private static final long serialVersionUID = 1L; + + /** + * The selected dimensions. + */ + private BitSet dims; + + /** + * Constructor for updating use. + * + * @param dims Dimensions to use (indexed with 0) + * @param dim Full dimensionality + */ + public ProjectedCentroid(BitSet dims, int dim) { + super(dim); + this.dims = dims; + assert (dims.size() <= dim); + } + + /** + * Add a single value with weight 1.0 + * + * @param val Value + */ + @Override + public void put(double[] val) { + assert (val.length == elements.length); + wsum += 1.0; + for(int i = dims.nextSetBit(0); i >= 0; i = dims.nextSetBit(i + 1)) { + final double delta = val[i] - elements[i]; + elements[i] += delta / wsum; + } + } + + /** + * Add data with a given weight. + * + * @param val data + * @param weight weight + */ + @Override + public void put(double val[], double weight) { + assert (val.length == elements.length); + final double nwsum = weight + wsum; + for(int i = dims.nextSetBit(0); i >= 0; i = dims.nextSetBit(i + 1)) { + final double delta = val[i] - elements[i]; + final double rval = delta * weight / nwsum; + elements[i] += rval; + } + wsum = nwsum; + } + + /** + * Add a single value with weight 1.0 + * + * @param val Value + */ + @Override + public void put(NumberVector<?, ?> val) { + assert (val.getDimensionality() == elements.length); + wsum += 1.0; + for(int i = dims.nextSetBit(0); i >= 0; i = dims.nextSetBit(i + 1)) { + final double delta = val.doubleValue(i + 1) - elements[i]; + elements[i] += delta / wsum; + } + } + + /** + * Add data with a given weight. + * + * @param val data + * @param weight weight + */ + @Override + public void put(NumberVector<?, ?> val, double weight) { + assert (val.getDimensionality() == elements.length); + final double nwsum = weight + wsum; + for(int i = dims.nextSetBit(0); i >= 0; i = dims.nextSetBit(i + 1)) { + final double delta = val.doubleValue(i + 1) - elements[i]; + final double rval = delta * weight / nwsum; + elements[i] += rval; + } + wsum = nwsum; + } + + /** + * Static Constructor from a relation. + * + * @param dims Dimensions to use (indexed with 0) + * @param relation Relation to process + */ + public static ProjectedCentroid make(BitSet dims, Relation<? extends NumberVector<?, ?>> relation) { + ProjectedCentroid c = new ProjectedCentroid(dims, DatabaseUtil.dimensionality(relation)); + assert (dims.size() <= DatabaseUtil.dimensionality(relation)); + for(DBID id : relation.iterDBIDs()) { + c.put(relation.get(id)); + } + return c; + } + + /** + * Static Constructor from a relation. + * + * @param dims Dimensions to use (indexed with 0) + * @param relation Relation to process + * @param ids IDs to process + */ + public static ProjectedCentroid make(BitSet dims, Relation<? extends NumberVector<?, ?>> relation, Iterable<DBID> ids) { + ProjectedCentroid c = new ProjectedCentroid(dims, DatabaseUtil.dimensionality(relation)); + assert (dims.size() <= DatabaseUtil.dimensionality(relation)); + for(DBID id : ids) { + c.put(relation.get(id)); + } + return c; + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/ProjectionResult.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/ProjectionResult.java new file mode 100644 index 00000000..286028e2 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/ProjectionResult.java @@ -0,0 +1,47 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * Interface representing a simple projection result. + * + * This can either come from a full PCA, or just from an axis-parallel subspace selection + * + * @author Erich Schubert + */ +// TODO: cleanup +public interface ProjectionResult { + /** + * Get the number of "strong" dimensions + * + * @return number of strong (correlated) dimensions + */ + public int getCorrelationDimension(); + + /** + * Projection matrix + * + * @return projection matrix + */ + public Matrix similarityMatrix(); +} diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/QRDecomposition.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/QRDecomposition.java new file mode 100644 index 00000000..0d261678 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/QRDecomposition.java @@ -0,0 +1,257 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +import de.lmu.ifi.dbs.elki.math.MathUtil; + +/** + * QR Decomposition. + * <P> + * For an m-by-n matrix A with m >= n, the QR decomposition is an m-by-n + * orthogonal matrix Q and an n-by-n upper triangular matrix R so that A = Q*R. + * <P> + * The QR decompostion always exists, even if the matrix does not have full + * rank, so the constructor will never fail. The primary use of the QR + * decomposition is in the least squares solution of nonsquare systems of + * simultaneous linear equations. This will fail if isFullRank() returns false. + * + * @apiviz.uses Matrix - - transforms + */ +public class QRDecomposition implements java.io.Serializable { + /** + * Serial version + */ + private static final long serialVersionUID = 1L; + + /** + * Array for internal storage of decomposition. + * + * @serial internal array storage. + */ + private double[][] QR; + + /** + * Row and column dimensions. + * + * @serial column dimension. + * @serial row dimension. + */ + private int m, n; + + /** + * Array for internal storage of diagonal of R. + * + * @serial diagonal of R. + */ + private double[] Rdiag; + + /* + * ------------------------ Constructor ------------------------ + */ + + /** + * QR Decomposition, computed by Householder reflections. + * + * @param A Rectangular matrix + * + */ + public QRDecomposition(Matrix A) { + // Initialize. + QR = A.getArrayCopy(); + m = A.getRowDimensionality(); + n = A.getColumnDimensionality(); + Rdiag = new double[n]; + + // Main loop. + for(int k = 0; k < n; k++) { + // Compute 2-norm of k-th column without under/overflow. + double nrm = 0; + for(int i = k; i < m; i++) { + nrm = MathUtil.hypotenuse(nrm, QR[i][k]); + } + + if(nrm != 0.0) { + // Form k-th Householder vector. + if(QR[k][k] < 0) { + nrm = -nrm; + } + for(int i = k; i < m; i++) { + QR[i][k] /= nrm; + } + QR[k][k] += 1.0; + + // Apply transformation to remaining columns. + for(int j = k + 1; j < n; j++) { + double s = 0.0; + for(int i = k; i < m; i++) { + s += QR[i][k] * QR[i][j]; + } + s = -s / QR[k][k]; + for(int i = k; i < m; i++) { + QR[i][j] += s * QR[i][k]; + } + } + } + Rdiag[k] = -nrm; + } + } + + /* + * ------------------------ Public Methods ------------------------ + */ + + /** + * Is the matrix full rank? + * + * @return true if R, and hence A, has full rank. + */ + public boolean isFullRank() { + for(int j = 0; j < n; j++) { + if(Rdiag[j] == 0) { + return false; + } + } + return true; + } + + /** + * Return the Householder vectors + * + * @return Lower trapezoidal matrix whose columns define the reflections + */ + public Matrix getH() { + Matrix X = new Matrix(m, n); + double[][] H = X.getArrayRef(); + for(int i = 0; i < m; i++) { + for(int j = 0; j < n; j++) { + if(i >= j) { + H[i][j] = QR[i][j]; + } + else { + H[i][j] = 0.0; + } + } + } + return X; + } + + /** + * Return the upper triangular factor + * + * @return R + */ + public Matrix getR() { + Matrix X = new Matrix(n, n); + double[][] R = X.getArrayRef(); + for(int i = 0; i < n; i++) { + for(int j = 0; j < n; j++) { + if(i < j) { + R[i][j] = QR[i][j]; + } + else if(i == j) { + R[i][j] = Rdiag[i]; + } + else { + R[i][j] = 0.0; + } + } + } + return X; + } + + /** + * Generate and return the (economy-sized) orthogonal factor + * + * @return Q + */ + public Matrix getQ() { + Matrix X = new Matrix(m, n); + double[][] Q = X.getArrayRef(); + for(int k = n - 1; k >= 0; k--) { + for(int i = 0; i < m; i++) { + Q[i][k] = 0.0; + } + Q[k][k] = 1.0; + for(int j = k; j < n; j++) { + if(QR[k][k] != 0) { + double s = 0.0; + for(int i = k; i < m; i++) { + s += QR[i][k] * Q[i][j]; + } + s = -s / QR[k][k]; + for(int i = k; i < m; i++) { + Q[i][j] += s * QR[i][k]; + } + } + } + } + return X; + } + + /** + * Least squares solution of A*X = B + * + * @param B A Matrix with as many rows as A and any number of columns. + * @return X that minimizes the two norm of Q*R*X-B. + * @exception IllegalArgumentException Matrix row dimensions must agree. + * @exception RuntimeException Matrix is rank deficient. + */ + public Matrix solve(Matrix B) { + if(B.getRowDimensionality() != m) { + throw new IllegalArgumentException("Matrix row dimensions must agree."); + } + if(!this.isFullRank()) { + throw new RuntimeException("Matrix is rank deficient."); + } + + // Copy right hand side + int nx = B.getColumnDimensionality(); + double[][] X = B.getArrayCopy(); + + // Compute Y = transpose(Q)*B + for(int k = 0; k < n; k++) { + for(int j = 0; j < nx; j++) { + double s = 0.0; + for(int i = k; i < m; i++) { + s += QR[i][k] * X[i][j]; + } + s = -s / QR[k][k]; + for(int i = k; i < m; i++) { + X[i][j] += s * QR[i][k]; + } + } + } + // Solve R*X = Y; + for(int k = n - 1; k >= 0; k--) { + for(int j = 0; j < nx; j++) { + X[k][j] /= Rdiag[k]; + } + for(int i = 0; i < k; i++) { + for(int j = 0; j < nx; j++) { + X[i][j] -= X[k][j] * QR[i][k]; + } + } + } + return (new Matrix(X).getMatrix(0, n - 1, 0, nx - 1)); + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/SingularValueDecomposition.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/SingularValueDecomposition.java new file mode 100644 index 00000000..8a125b93 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/SingularValueDecomposition.java @@ -0,0 +1,590 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +import de.lmu.ifi.dbs.elki.math.MathUtil; + +/** + * Singular Value Decomposition. + * <P> + * For an m-by-n matrix A with m >= n, the singular value decomposition is an + * m-by-n orthogonal matrix U, an n-by-n diagonal matrix S, and an n-by-n + * orthogonal matrix V so that A = U*S*V'. + * <P> + * The singular values, sigma[k] = S[k][k], are ordered so that sigma[0] >= + * sigma[1] >= ... >= sigma[n-1]. + * <P> + * The singular value decompostion always exists, so the constructor will never + * fail. The matrix condition number and the effective numerical rank can be + * computed from this decomposition. + * + * @apiviz.uses Matrix - - transforms + */ +@SuppressWarnings("serial") +public class SingularValueDecomposition implements java.io.Serializable { + /* + * ------------------------ Class variables ------------------------ + */ + + /** + * Arrays for internal storage of U and V. + * + * @serial internal storage of U. + * @serial internal storage of V. + */ + private double[][] U, V; + + /** + * Array for internal storage of singular values. + * + * @serial internal storage of singular values. + */ + private double[] s; + + /** + * Row and column dimensions. + * + * @serial row dimension. + * @serial column dimension. + */ + private int m, n; + + /* + * ------------------------ Constructor ------------------------ + */ + + /** + * Construct the singular value decomposition + * + * @param Arg Rectangular matrix + */ + + public SingularValueDecomposition(Matrix Arg) { + + // Derived from LINPACK code. + // Initialize. + double[][] A = Arg.getArrayCopy(); + m = Arg.getRowDimensionality(); + n = Arg.getColumnDimensionality(); + int nu = Math.min(m, n); + s = new double[Math.min(m + 1, n)]; + U = new double[m][nu]; + V = new double[n][n]; + double[] e = new double[n]; + double[] work = new double[m]; + boolean wantu = true; + boolean wantv = true; + + // Reduce A to bidiagonal form, storing the diagonal elements + // in s and the super-diagonal elements in e. + + int nct = Math.min(m - 1, n); + int nrt = Math.max(0, Math.min(n - 2, m)); + for(int k = 0; k < Math.max(nct, nrt); k++) { + if(k < nct) { + + // Compute the transformation for the k-th column and + // place the k-th diagonal in s[k]. + // Compute 2-norm of k-th column without under/overflow. + s[k] = 0; + for(int i = k; i < m; i++) { + s[k] = MathUtil.hypotenuse(s[k], A[i][k]); + } + if(s[k] != 0.0) { + if(A[k][k] < 0.0) { + s[k] = -s[k]; + } + for(int i = k; i < m; i++) { + A[i][k] /= s[k]; + } + A[k][k] += 1.0; + } + s[k] = -s[k]; + } + for(int j = k + 1; j < n; j++) { + if((k < nct) & (s[k] != 0.0)) { + + // Apply the transformation. + + double t = 0; + for(int i = k; i < m; i++) { + t += A[i][k] * A[i][j]; + } + t = -t / A[k][k]; + for(int i = k; i < m; i++) { + A[i][j] += t * A[i][k]; + } + } + + // Place the k-th row of A into e for the + // subsequent calculation of the row transformation. + + e[j] = A[k][j]; + } + if(wantu & (k < nct)) { + + // Place the transformation in U for subsequent back + // multiplication. + + for(int i = k; i < m; i++) { + U[i][k] = A[i][k]; + } + } + if(k < nrt) { + + // Compute the k-th row transformation and place the + // k-th super-diagonal in e[k]. + // Compute 2-norm without under/overflow. + e[k] = 0; + for(int i = k + 1; i < n; i++) { + e[k] = MathUtil.hypotenuse(e[k], e[i]); + } + if(e[k] != 0.0) { + if(e[k + 1] < 0.0) { + e[k] = -e[k]; + } + for(int i = k + 1; i < n; i++) { + e[i] /= e[k]; + } + e[k + 1] += 1.0; + } + e[k] = -e[k]; + if((k + 1 < m) & (e[k] != 0.0)) { + + // Apply the transformation. + + for(int i = k + 1; i < m; i++) { + work[i] = 0.0; + } + for(int j = k + 1; j < n; j++) { + for(int i = k + 1; i < m; i++) { + work[i] += e[j] * A[i][j]; + } + } + for(int j = k + 1; j < n; j++) { + double t = -e[j] / e[k + 1]; + for(int i = k + 1; i < m; i++) { + A[i][j] += t * work[i]; + } + } + } + if(wantv) { + + // Place the transformation in V for subsequent + // back multiplication. + + for(int i = k + 1; i < n; i++) { + V[i][k] = e[i]; + } + } + } + } + + // Set up the final bidiagonal matrix or order p. + + int p = Math.min(n, m + 1); + if(nct < n) { + s[nct] = A[nct][nct]; + } + if(m < p) { + s[p - 1] = 0.0; + } + if(nrt + 1 < p) { + e[nrt] = A[nrt][p - 1]; + } + e[p - 1] = 0.0; + + // If required, generate U. + + if(wantu) { + for(int j = nct; j < nu; j++) { + for(int i = 0; i < m; i++) { + U[i][j] = 0.0; + } + U[j][j] = 1.0; + } + for(int k = nct - 1; k >= 0; k--) { + if(s[k] != 0.0) { + for(int j = k + 1; j < nu; j++) { + double t = 0; + for(int i = k; i < m; i++) { + t += U[i][k] * U[i][j]; + } + t = -t / U[k][k]; + for(int i = k; i < m; i++) { + U[i][j] += t * U[i][k]; + } + } + for(int i = k; i < m; i++) { + U[i][k] = -U[i][k]; + } + U[k][k] = 1.0 + U[k][k]; + for(int i = 0; i < k - 1; i++) { + U[i][k] = 0.0; + } + } + else { + for(int i = 0; i < m; i++) { + U[i][k] = 0.0; + } + U[k][k] = 1.0; + } + } + } + + // If required, generate V. + + if(wantv) { + for(int k = n - 1; k >= 0; k--) { + if((k < nrt) & (e[k] != 0.0)) { + for(int j = k + 1; j < nu; j++) { + double t = 0; + for(int i = k + 1; i < n; i++) { + t += V[i][k] * V[i][j]; + } + t = -t / V[k + 1][k]; + for(int i = k + 1; i < n; i++) { + V[i][j] += t * V[i][k]; + } + } + } + for(int i = 0; i < n; i++) { + V[i][k] = 0.0; + } + V[k][k] = 1.0; + } + } + + // Main iteration loop for the singular values. + + int pp = p - 1; + int iter = 0; + double eps = Math.pow(2.0, -52.0); + while(p > 0) { + int k, kase; + + // Here is where a test for too many iterations would go. + + // This section of the program inspects for + // negligible elements in the s and e arrays. On + // completion the variables kase and k are set as follows. + + // kase = 1 if s(p) and e[k-1] are negligible and k<p + // kase = 2 if s(k) is negligible and k<p + // kase = 3 if e[k-1] is negligible, k<p, and + // s(k), ..., s(p) are not negligible (qr step). + // kase = 4 if e(p-1) is negligible (convergence). + + for(k = p - 2; k >= -1; k--) { + if(k == -1) { + break; + } + if(Math.abs(e[k]) <= eps * (Math.abs(s[k]) + Math.abs(s[k + 1]))) { + e[k] = 0.0; + break; + } + } + if(k == p - 2) { + kase = 4; + } + else { + int ks; + for(ks = p - 1; ks >= k; ks--) { + if(ks == k) { + break; + } + double t = (ks != p ? Math.abs(e[ks]) : 0.) + (ks != k + 1 ? Math.abs(e[ks - 1]) : 0.); + if(Math.abs(s[ks]) <= eps * t) { + s[ks] = 0.0; + break; + } + } + if(ks == k) { + kase = 3; + } + else if(ks == p - 1) { + kase = 1; + } + else { + kase = 2; + k = ks; + } + } + k++; + + // Perform the task indicated by kase. + + switch(kase){ + + // Deflate negligible s(p). + + case 1: { + double f = e[p - 2]; + e[p - 2] = 0.0; + for(int j = p - 2; j >= k; j--) { + double t = MathUtil.hypotenuse(s[j], f); + double cs = s[j] / t; + double sn = f / t; + s[j] = t; + if(j != k) { + f = -sn * e[j - 1]; + e[j - 1] = cs * e[j - 1]; + } + if(wantv) { + for(int i = 0; i < n; i++) { + t = cs * V[i][j] + sn * V[i][p - 1]; + V[i][p - 1] = -sn * V[i][j] + cs * V[i][p - 1]; + V[i][j] = t; + } + } + } + } + break; + + // Split at negligible s(k). + + case 2: { + double f = e[k - 1]; + e[k - 1] = 0.0; + for(int j = k; j < p; j++) { + double t = MathUtil.hypotenuse(s[j], f); + double cs = s[j] / t; + double sn = f / t; + s[j] = t; + f = -sn * e[j]; + e[j] = cs * e[j]; + if(wantu) { + for(int i = 0; i < m; i++) { + t = cs * U[i][j] + sn * U[i][k - 1]; + U[i][k - 1] = -sn * U[i][j] + cs * U[i][k - 1]; + U[i][j] = t; + } + } + } + } + break; + + // Perform one qr step. + + case 3: { + + // Calculate the shift. + + double scale = Math.max(Math.max(Math.max(Math.max(Math.abs(s[p - 1]), Math.abs(s[p - 2])), Math.abs(e[p - 2])), Math.abs(s[k])), Math.abs(e[k])); + double sp = s[p - 1] / scale; + double spm1 = s[p - 2] / scale; + double epm1 = e[p - 2] / scale; + double sk = s[k] / scale; + double ek = e[k] / scale; + double b = ((spm1 + sp) * (spm1 - sp) + epm1 * epm1) / 2.0; + double c = (sp * epm1) * (sp * epm1); + double shift = 0.0; + if((b != 0.0) | (c != 0.0)) { + shift = Math.sqrt(b * b + c); + if(b < 0.0) { + shift = -shift; + } + shift = c / (b + shift); + } + double f = (sk + sp) * (sk - sp) + shift; + double g = sk * ek; + + // Chase zeros. + + for(int j = k; j < p - 1; j++) { + double t = MathUtil.hypotenuse(f, g); + double cs = f / t; + double sn = g / t; + if(j != k) { + e[j - 1] = t; + } + f = cs * s[j] + sn * e[j]; + e[j] = cs * e[j] - sn * s[j]; + g = sn * s[j + 1]; + s[j + 1] = cs * s[j + 1]; + if(wantv) { + for(int i = 0; i < n; i++) { + t = cs * V[i][j] + sn * V[i][j + 1]; + V[i][j + 1] = -sn * V[i][j] + cs * V[i][j + 1]; + V[i][j] = t; + } + } + t = MathUtil.hypotenuse(f, g); + cs = f / t; + sn = g / t; + s[j] = t; + f = cs * e[j] + sn * s[j + 1]; + s[j + 1] = -sn * e[j] + cs * s[j + 1]; + g = sn * e[j + 1]; + e[j + 1] = cs * e[j + 1]; + if(wantu && (j < m - 1)) { + for(int i = 0; i < m; i++) { + t = cs * U[i][j] + sn * U[i][j + 1]; + U[i][j + 1] = -sn * U[i][j] + cs * U[i][j + 1]; + U[i][j] = t; + } + } + } + e[p - 2] = f; + iter = iter + 1; + } + break; + + // Convergence. + + case 4: { + + // Make the singular values positive. + + if(s[k] <= 0.0) { + s[k] = (s[k] < 0.0 ? -s[k] : 0.0); + if(wantv) { + for(int i = 0; i <= pp; i++) { + V[i][k] = -V[i][k]; + } + } + } + + // Order the singular values. + + while(k < pp) { + if(s[k] >= s[k + 1]) { + break; + } + double t = s[k]; + s[k] = s[k + 1]; + s[k + 1] = t; + if(wantv && (k < n - 1)) { + for(int i = 0; i < n; i++) { + t = V[i][k + 1]; + V[i][k + 1] = V[i][k]; + V[i][k] = t; + } + } + if(wantu && (k < m - 1)) { + for(int i = 0; i < m; i++) { + t = U[i][k + 1]; + U[i][k + 1] = U[i][k]; + U[i][k] = t; + } + } + k++; + } + iter = 0; + p--; + } + break; + } + } + } + + /* + * ------------------------ Public Methods ------------------------ + */ + + /** + * Return the left singular vectors + * + * @return U + */ + + public Matrix getU() { + return new Matrix(U); + } + + /** + * Return the right singular vectors + * + * @return V + */ + + public Matrix getV() { + return new Matrix(V); + } + + /** + * Return the one-dimensional array of singular values + * + * @return diagonal of S. + */ + + public double[] getSingularValues() { + return s; + } + + /** + * Return the diagonal matrix of singular values + * + * @return S + */ + + public Matrix getS() { + Matrix X = new Matrix(n, n); + double[][] S = X.getArrayRef(); + for(int i = 0; i < n; i++) { + for(int j = 0; j < n; j++) { + S[i][j] = 0.0; + } + S[i][i] = this.s[i]; + } + return X; + } + + /** + * Two norm + * + * @return max(S) + */ + + public double norm2() { + return s[0]; + } + + /** + * Two norm condition number + * + * @return max(S)/min(S) + */ + + public double cond() { + return s[0] / s[Math.min(m, n) - 1]; + } + + /** + * Effective numerical matrix rank + * + * @return Number of nonnegligible singular values. + */ + + public int rank() { + double eps = Math.pow(2.0, -52.0); + double tol = Math.max(m, n) * s[0] * eps; + int r = 0; + for(int i = 0; i < s.length; i++) { + if(s[i] > tol) { + r++; + } + } + return r; + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/SortedEigenPairs.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/SortedEigenPairs.java new file mode 100644 index 00000000..be189978 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/SortedEigenPairs.java @@ -0,0 +1,204 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +import java.util.Arrays; +import java.util.Comparator; +import java.util.List; + +/** + * Helper class which encapsulates an array of eigenpairs (i.e. an array of + * eigenvectors and their corresponding eigenvalues). This class is used to sort + * eigenvectors (and -values). + * + * @author Elke Achtert + * + * @apiviz.composedOf de.lmu.ifi.dbs.elki.math.linearalgebra.EigenPair + */ +public class SortedEigenPairs { + /** + * The array of eigenpairs. + */ + private EigenPair[] eigenPairs; + + /** + * Creates a new empty SortedEigenPairs object. Can only be called from the + * copy() method. + */ + private SortedEigenPairs() { + // nothing to do here. + } + + /** + * Creates a new SortedEigenPairs object from the specified eigenvalue + * decomposition. The eigenvectors are sorted according to the specified + * order. + * + * @param evd the underlying eigenvalue decomposition + * @param ascending a boolean that indicates ascending order + */ + public SortedEigenPairs(EigenvalueDecomposition evd, final boolean ascending) { + double[] eigenvalues = evd.getRealEigenvalues(); + Matrix eigenvectors = evd.getV(); + + this.eigenPairs = new EigenPair[eigenvalues.length]; + for(int i = 0; i < eigenvalues.length; i++) { + double e = java.lang.Math.abs(eigenvalues[i]); + Vector v = eigenvectors.getColumnVector(i); + eigenPairs[i] = new EigenPair(v, e); + } + + Comparator<EigenPair> comp = new Comparator<EigenPair>() { + @Override + public int compare(EigenPair o1, EigenPair o2) { + int comp = o1.compareTo(o2); + if(!ascending) { + comp = -1 * comp; + } + return comp; + } + }; + + Arrays.sort(eigenPairs, comp); + } + + /** + * Creates a new SortedEigenPairs object from the specified list. The + * eigenvectors are sorted in descending order. + * + * @param eigenPairs the eigenpairs to be sorted + */ + public SortedEigenPairs(List<EigenPair> eigenPairs) { + Comparator<EigenPair> comp = new Comparator<EigenPair>() { + @Override + public int compare(EigenPair o1, EigenPair o2) { + return -1 * o1.compareTo(o2); + } + }; + + this.eigenPairs = eigenPairs.toArray(new EigenPair[eigenPairs.size()]); + Arrays.sort(this.eigenPairs, comp); + } + + /** + * Returns the sorted eigenvalues. + * + * @return the sorted eigenvalues + */ + public double[] eigenValues() { + double[] eigenValues = new double[eigenPairs.length]; + for(int i = 0; i < eigenPairs.length; i++) { + EigenPair eigenPair = eigenPairs[i]; + eigenValues[i] = eigenPair.getEigenvalue(); + } + return eigenValues; + } + + /** + * Returns the sorted eigenvectors. + * + * @return the sorted eigenvectors + */ + public Matrix eigenVectors() { + Matrix eigenVectors = new Matrix(eigenPairs.length, eigenPairs.length); + for(int i = 0; i < eigenPairs.length; i++) { + EigenPair eigenPair = eigenPairs[i]; + eigenVectors.setColumnVector(i, eigenPair.getEigenvector()); + } + return eigenVectors; + } + + /** + * Returns the first <code>n</code> sorted eigenvectors as a matrix. + * + * @param n the number of eigenvectors (columns) to be returned + * @return the first <code>n</code> sorted eigenvectors + */ + public Matrix eigenVectors(int n) { + Matrix eigenVectors = new Matrix(eigenPairs.length, n); + for(int i = 0; i < n; i++) { + EigenPair eigenPair = eigenPairs[i]; + eigenVectors.setColumnVector(i, eigenPair.getEigenvector()); + } + return eigenVectors; + } + + /** + * Returns the last <code>n</code> sorted eigenvectors as a matrix. + * + * @param n the number of eigenvectors (columns) to be returned + * @return the last <code>n</code> sorted eigenvectors + */ + public Matrix reverseEigenVectors(int n) { + Matrix eigenVectors = new Matrix(eigenPairs.length, n); + for(int i = 0; i < n; i++) { + EigenPair eigenPair = eigenPairs[eigenPairs.length - 1 - i]; + eigenVectors.setColumnVector(i, eigenPair.getEigenvector()); + } + return eigenVectors; + } + + /** + * Returns the eigenpair at the specified index. + * + * @param index the index of the eigenpair to be returned + * @return the eigenpair at the specified index + */ + public EigenPair getEigenPair(int index) { + return eigenPairs[index]; + } + + /** + * Returns the number of the eigenpairs. + * + * @return the number of the eigenpairs + */ + public int size() { + return eigenPairs.length; + } + + /** + * Returns a string representation of this EigenPair. + * + * @return a string representation of this EigenPair + */ + @Override + public String toString() { + StringBuffer result = new StringBuffer(); + for(EigenPair eigenPair : eigenPairs) { + result.append("\n").append(eigenPair); + } + return result.toString(); + } + + /** + * Returns a deep copy of this object + * + * @return new copy + */ + public SortedEigenPairs copy() { + SortedEigenPairs cp = new SortedEigenPairs(); + cp.eigenPairs = this.eigenPairs.clone(); + return cp; + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/SubspaceProjectionResult.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/SubspaceProjectionResult.java new file mode 100644 index 00000000..6716d8ec --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/SubspaceProjectionResult.java @@ -0,0 +1,64 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * Simple class wrapping the result of a subspace projection. + * + * @author Erich Schubert + * + * @apiviz.composedOf Matrix + */ +public class SubspaceProjectionResult implements ProjectionResult { + /** + * The correlation dimensionality + */ + private int correlationDimensionality; + + /** + * The similarity matrix + */ + private Matrix similarityMat; + + /** + * Constructor. + * + * @param correlationDimensionality dimensionality + * @param similarityMat projection matrix + */ + public SubspaceProjectionResult(int correlationDimensionality, Matrix similarityMat) { + super(); + this.correlationDimensionality = correlationDimensionality; + this.similarityMat = similarityMat; + } + + @Override + public int getCorrelationDimension() { + return correlationDimensionality; + } + + @Override + public Matrix similarityMatrix() { + return similarityMat; + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/Vector.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/Vector.java new file mode 100644 index 00000000..159f91e0 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/Vector.java @@ -0,0 +1,652 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +import java.io.Serializable; +import java.util.Arrays; + +import de.lmu.ifi.dbs.elki.math.MathUtil; +import de.lmu.ifi.dbs.elki.utilities.FormatUtil; + +/** + * Provides a vector object that encapsulates an m x 1 - matrix object. + * + * @author Elke Achtert + * + * @apiviz.landmark + */ +public class Vector implements MatrixLike<Vector>, Serializable { + /** + * Serial version + */ + private static final long serialVersionUID = 1L; + + /** + * Array for internal storage of elements. + * + * @serial internal array storage. + */ + protected final double[] elements; + + /** + * Construct a vector from a given array. + * + * @param values array of doubles + */ + public Vector(final double... values) { + elements = values; + } + + /** + * Provides an m x 1 vector. + * + * @param m the number of rows + */ + public Vector(final int m) { + elements = new double[m]; + } + + /** + * Returns a randomly created vector of length 1.0 + * + * @param dimensionality dimensionality + * @return the dimensionality of the vector + */ + // FIXME: may also return null vector by chance. + public static final Vector randomNormalizedVector(final int dimensionality) { + final Vector v = new Vector(dimensionality); + for(int i = 0; i < dimensionality; i++) { + v.elements[i] = Math.random(); + } + v.normalize(); + return v; + } + + /** + * Returns the ith unit vector of the specified dimensionality. + * + * @param dimensionality the dimensionality of the vector + * @param i the index + * @return the ith unit vector of the specified dimensionality + */ + public static final Vector unitVector(final int dimensionality, final int i) { + final Vector v = new Vector(dimensionality); + v.elements[i] = 1; + return v; + } + + /** + * Returns a copy of this vector. + * + * @return a copy of this vector + */ + @Override + public final Vector copy() { + return new Vector(elements.clone()); + } + + /** + * Clone the Vector object. + */ + @Override + public Vector clone() { + return this.copy(); + } + + /** + * Access the internal two-dimensional array. + * + * @return Pointer to the two-dimensional array of matrix elements. + */ + public final double[] getArrayRef() { + return elements; + } + + /** + * Copy the internal two-dimensional array. + * + * @return Two-dimensional array copy of matrix elements. + */ + public final double[] getArrayCopy() { + return elements.clone(); + } + + /** + * Returns the dimensionality of this vector. + * + * @return the dimensionality of this vector + */ + public final int getDimensionality() { + return elements.length; + } + + @Override + public final int getRowDimensionality() { + return elements.length; + } + + @Override + public final int getColumnDimensionality() { + return 1; + } + + /** + * Returns the value at the specified row. + * + * @param i the row index + * @return the value at row i + */ + public final double get(final int i) { + return elements[i]; + } + + @Override + public final double get(final int i, final int j) { + if(j != 0) { + throw new ArrayIndexOutOfBoundsException(); + } + return elements[i]; + } + + /** + * Sets the value at the specified row. + * + * @param i the row index + * @param value the value to be set + * + * @return the modified vector + */ + public final Vector set(final int i, final double value) { + elements[i] = value; + return this; + } + + @Override + public final Vector set(final int i, final int j, final double s) { + if(j != 0) { + throw new ArrayIndexOutOfBoundsException(); + } + elements[i] = s; + return this; + } + + @Override + public final Vector increment(final int i, final int j, final double s) { + if(j != 0) { + throw new ArrayIndexOutOfBoundsException(); + } + elements[i] += s; + return this; + } + + @Override + public final Vector getColumnVector(final int i) { + if(i != 0) { + throw new ArrayIndexOutOfBoundsException(); + } + return this; + } + + @Override + public final Matrix transpose() { + return new Matrix(this.elements, 1); + } + + /** + * Returns a new vector which is the result of this vector plus the specified + * vector. + * + * @param v the vector to be added + * @return the resulting vector + */ + @Override + public final Vector plus(final Vector v) { + checkDimensions(v); + final Vector result = new Vector(elements.length); + for(int i = 0; i < elements.length; i++) { + result.elements[i] = elements[i] + v.elements[i]; + } + return result; + } + + /** + * Returns a new vector which is the result of this vector plus the specified + * vector times the given factor. + * + * @param v the vector to be added + * @param s the scalar + * @return the resulting vector + */ + @Override + public final Vector plusTimes(final Vector v, final double s) { + checkDimensions(v); + final Vector result = new Vector(elements.length); + for(int i = 0; i < elements.length; i++) { + result.elements[i] = elements[i] + v.elements[i] * s; + } + return result; + } + + /** + * A = A + B + * + * @param B another matrix + * @return A + B in this Matrix + */ + @Override + public final Vector plusEquals(final Vector B) { + checkDimensions(B); + for(int i = 0; i < elements.length; i++) { + elements[i] += B.get(i, 0); + } + return this; + } + + /** + * A = A + s * B + * + * @param B another matrix + * @param s Scalar + * @return A + s * B in this Matrix + */ + @Override + public final Vector plusTimesEquals(final Vector B, final double s) { + checkDimensions(B); + for(int i = 0; i < elements.length; i++) { + elements[i] += s * B.get(i, 0); + } + return this; + } + + /** + * Add a constant value to all dimensions. + * + * @param d Value to add + * @return Modified vector + */ + public final Vector plusEquals(final double d) { + for(int i = 0; i < elements.length; i++) { + elements[i] += d; + } + return this; + } + + /** + * Returns this vector minus the specified vector v. + * + * @param v the vector to be subtracted from this vector + * @return this vector minus the specified vector v + */ + @Override + public final Vector minus(final Vector v) { + final Vector sub = new Vector(elements.length); + for(int i = 0; i < elements.length; i++) { + sub.elements[i] = elements[i] - v.elements[i]; + } + return sub; + } + + /** + * Returns this vector minus the specified vector v times s. + * + * @param v the vector to be subtracted from this vector + * @param s the scaling factor + * @return this vector minus the specified vector v + */ + @Override + public final Vector minusTimes(final Vector v, final double s) { + final Vector sub = new Vector(elements.length); + for(int i = 0; i < elements.length; i++) { + sub.elements[i] = elements[i] - v.elements[i] * s; + } + return sub; + } + + /** + * A = A - B + * + * @param B another matrix + * @return A - B in this Matrix + */ + @Override + public final Vector minusEquals(final Vector B) { + checkDimensions(B); + for(int i = 0; i < elements.length; i++) { + elements[i] -= B.get(i, 0); + } + return this; + } + + /** + * A = A - s * B + * + * @param B another matrix + * @param s Scalar + * @return A - s * B in this Matrix + */ + @Override + public final Vector minusTimesEquals(final Vector B, final double s) { + checkDimensions(B); + for(int i = 0; i < elements.length; i++) { + elements[i] -= s * B.get(i, 0); + } + return this; + } + + /** + * Subtract a constant value from all dimensions. + * + * @param d Value to subtract + * @return Modified vector + */ + public final Vector minusEquals(final double d) { + for(int i = 0; i < elements.length; i++) { + elements[i] -= d; + } + return this; + } + + /** + * Returns a new vector which is the result of this vector multiplied by the + * specified scalar. + * + * @param s the scalar to be multiplied + * @return the resulting vector + */ + @Override + public final Vector times(final double s) { + final Vector v = new Vector(elements.length); + for(int i = 0; i < elements.length; i++) { + v.elements[i] = elements[i] * s; + } + return v; + } + + /** + * Multiply a matrix by a scalar in place, A = s*A + * + * @param s scalar + * @return replace A by s*A + */ + @Override + public final Vector timesEquals(final double s) { + for(int i = 0; i < elements.length; i++) { + elements[i] *= s; + } + return this; + } + + /** + * Linear algebraic matrix multiplication, A * B + * + * @param B another matrix + * @return Matrix product, A * B + * @throws IllegalArgumentException Matrix inner dimensions must agree. + */ + public final Matrix times(final Matrix B) { + if(B.elements.length != 1) { + throw new IllegalArgumentException("Matrix inner dimensions must agree."); + } + final Matrix X = new Matrix(this.elements.length, B.columndimension); + for(int j = 0; j < B.columndimension; j++) { + for(int i = 0; i < this.elements.length; i++) { + X.elements[i][j] = elements[i] * B.elements[0][j]; + } + } + return X; + } + + /** + * Linear algebraic matrix multiplication, A<sup>T</sup> * B + * + * @param B another matrix + * @return Matrix product, A<sup>T</sup> * B + * @throws IllegalArgumentException Matrix inner dimensions must agree. + */ + public final Matrix transposeTimes(final Matrix B) { + if(B.elements.length != this.elements.length) { + throw new IllegalArgumentException("Matrix inner dimensions must agree."); + } + final Matrix X = new Matrix(1, B.columndimension); + for(int j = 0; j < B.columndimension; j++) { + // multiply it with each row from A + double s = 0; + for(int k = 0; k < this.elements.length; k++) { + s += this.elements[k] * B.elements[k][j]; + } + X.elements[0][j] = s; + } + return X; + } + + /** + * Linear algebraic matrix multiplication, A<sup>T</sup> * B + * + * @param B another vector + * @return Matrix product, A<sup>T</sup> * B + * @throws IllegalArgumentException Matrix inner dimensions must agree. + */ + public final double transposeTimes(final Vector B) { + if(B.elements.length != this.elements.length) { + throw new IllegalArgumentException("Matrix inner dimensions must agree."); + } + double s = 0; + for(int k = 0; k < this.elements.length; k++) { + s += this.elements[k] * B.elements[k]; + } + return s; + } + + /** + * Linear algebraic matrix multiplication, A * B^T + * + * @param B another matrix + * @return Matrix product, A * B^T + * @throws IllegalArgumentException Matrix inner dimensions must agree. + */ + public final Matrix timesTranspose(final Matrix B) { + if(B.columndimension != 1) { + throw new IllegalArgumentException("Matrix inner dimensions must agree."); + } + final Matrix X = new Matrix(this.elements.length, B.elements.length); + for(int j = 0; j < B.elements.length; j++) { + for(int i = 0; i < this.elements.length; i++) { + X.elements[i][j] = elements[i] * B.elements[0][j]; + } + } + return X; + } + + /** + * Returns the scalar product of this vector and the specified vector v. + * + * @param v the vector + * @return double the scalar product of this vector and v + */ + public final double scalarProduct(final Vector v) { + checkDimensions(v); + double scalarProduct = 0.0; + for(int row = 0; row < elements.length; row++) { + final double prod = elements[row] * v.elements[row]; + scalarProduct += prod; + } + return scalarProduct; + } + + /** + * Inverts every element of the vector. + * + * @return the resulting vector + */ + public final Vector inverseVector() { + final Vector inv = new Vector(elements.length); + for(int i = 0; i < elements.length; i++) { + inv.elements[i] = 1.0 / elements[i]; + } + return inv; + } + + /** + * Square roots every element of the vector. + * + * @return the resulting vector + */ + public final Vector sqrtVector() { + final Vector sqrt = new Vector(elements.length); + for(int i = 0; i < elements.length; i++) { + sqrt.elements[i] = Math.sqrt(elements[i]); + } + return sqrt; + } + + /** + * Returns the length of this vector. + * + * @return the length of this vector + */ + public final double euclideanLength() { + double sqlen = 0.0; + for(int row = 0; row < elements.length; row++) { + sqlen += elements[row] * elements[row]; + } + return Math.sqrt(sqlen); + } + + /** + * Frobenius norm + * + * @return sqrt of sum of squares of all elements. + */ + public double normF() { + double f = 0; + for(int i = 0; i < elements.length; i++) { + f = MathUtil.hypotenuse(f, elements[i]); + } + return f; + } + + /** + * Normalizes this vector to the length of 1.0. + */ + public final Vector normalize() { + double norm = euclideanLength(); + if(norm != 0) { + for(int row = 0; row < elements.length; row++) { + elements[row] /= norm; + } + } + return this; + } + + /** + * Projects this row vector into the subspace formed by the specified matrix + * v. + * + * @param v the subspace matrix + * @return the projection of p into the subspace formed by v + * @throws IllegalArgumentException if this matrix is no row vector, i.e. this + * matrix has more than one column or this matrix and v have different + * length of rows + */ + public final Vector projection(final Matrix v) { + if(elements.length != v.elements.length) { + throw new IllegalArgumentException("p and v differ in row dimensionality!"); + } + Vector sum = new Vector(elements.length); + for(int i = 0; i < v.columndimension; i++) { + // TODO: optimize - copy less. + Vector v_i = v.getColumnVector(i); + sum.plusEquals(v_i.times(scalarProduct(v_i))); + } + return sum; + } + + /** + * Check if this.getDimensionality() == v.getDimensionality(). + * + * @throws IllegalArgumentException if the dimensions do not agree + */ + private final void checkDimensions(final Vector v) { + if(this.elements.length != v.elements.length) { + throw new IllegalArgumentException("Vector dimensions must agree."); + } + } + + @Override + public int hashCode() { + final int PRIME = 31; + int result = 1; + result = PRIME * result + Arrays.hashCode(this.elements); + return result; + } + + @Override + public boolean equals(Object obj) { + if(this == obj) { + return true; + } + if(obj == null) { + return false; + } + if(getClass() != obj.getClass()) { + return false; + } + final Vector other = (Vector) obj; + if(this.elements.length != other.elements.length) { + return false; + } + for(int i = 0; i < this.elements.length; i++) { + if(this.elements[i] != other.elements[i]) { + return false; + } + } + return true; + } + + /** + * Returns a string representation of this vector. + * + * @return a string representation of this vector. + */ + @Override + public final String toString() { + return FormatUtil.format(this); + } + + /** + * Returns a string representation of this vector without adding extra + * whitespace + * + * @return a string representation of this vector. + */ + public final String toStringNoWhitespace() { + return "[" + FormatUtil.format(elements, ",") + "]"; + } + + /** + * Reset the Vector to 0. + */ + public void setZero() { + Arrays.fill(elements, 0.0); + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/FittingFunction.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/FittingFunction.java new file mode 100644 index 00000000..c75ee71e --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/FittingFunction.java @@ -0,0 +1,39 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra.fitting; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * Interface for a function used in Levenberg-Marquard-Fitting + * + * @author Erich Schubert + */ +public interface FittingFunction { + /** + * Compute value at position x as well as gradients for the parameters + * + * @param x Current coordinate + * @param params Function parameters parameters + * @return Array consisting of y value and parameter gradients + */ + public FittingFunctionResult eval(double x, double[] params); +} diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/FittingFunctionResult.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/FittingFunctionResult.java new file mode 100644 index 00000000..0c4db5c1 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/FittingFunctionResult.java @@ -0,0 +1,51 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra.fitting; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * Result returned by a fitting function. + * + * @author Erich Schubert + */ +public class FittingFunctionResult { + /** + * Value at the given coordinate + */ + public double y; + /** + * Parameter gradients at the given coordinate + */ + public double[] gradients; + + /** + * Trivial/generic constructor for the result class + * + * @param y value at the coordinate + * @param gradients parameter gradients + */ + public FittingFunctionResult(double y, double[] gradients) { + super(); + this.y = y; + this.gradients = gradients; + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/GaussianFittingFunction.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/GaussianFittingFunction.java new file mode 100644 index 00000000..9edcd116 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/GaussianFittingFunction.java @@ -0,0 +1,96 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra.fitting; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +import de.lmu.ifi.dbs.elki.math.MathUtil; + +/** + * Gaussian function for parameter fitting + * + * Based loosely on fgauss in the book "Numerical Recipies". <br /> + * We did not bother to implement all optimizations at the benefit of having + * easier to use parameters. Instead of position, amplitude and width used in + * the book, we use the traditional Gaussian parameters mean, standard deviation + * and a linear scaling factor (which is mostly useful when combining multiple + * distributions) The cost are some additional computations such as a square + * root and probably a slight loss in precision. This could of course have been + * handled by an appropriate wrapper instead. + * + * Due to their license, we cannot use their code, but we have to implement the + * mathematics ourselves. We hope the loss in precision isn't big. + * + * They are also arranged differently: the book uses + * + * <pre> + * amplitude, position, width + * </pre> + * + * whereas we use + * + * <pre> + * mean, stddev, scaling + * </pre> + * + * But we're obviously using essentially the same mathematics. + * + * The function also can use a mixture of gaussians, just use an appropriate + * number of parameters (which obviously needs to be a multiple of 3) + * + * @author Erich Schubert + */ +public class GaussianFittingFunction implements FittingFunction { + /** + * compute the mixture of Gaussians at the given position + */ + @Override + public FittingFunctionResult eval(double x, double[] params) { + int len = params.length; + + // We always need triples: (mean, stddev, scaling) + assert (len % 3) == 0; + + double y = 0.0; + double[] gradients = new double[len]; + + // Loosely based on the book: + // Numerical Recipes in C: The Art of Scientific Computing + // Due to their license, we cannot use their code, but we have to implement + // the mathematics ourselves. We hope the loss in precision is not too big. + for(int i = 0; i < params.length; i += 3) { + // Standardized Gaussian parameter (centered, scaled by stddev) + double stdpar = (x - params[i]) / params[i + 1]; + double e = Math.exp(-.5 * stdpar * stdpar); + double localy = params[i + 2] / (params[i + 1] * MathUtil.SQRTTWOPI) * e; + + y += localy; + // mean gradient + gradients[i] = localy * stdpar; + // stddev gradient + gradients[i + 1] = (stdpar * stdpar - 1.0) * localy; + // amplitude gradient + gradients[i + 2] = e / (params[i + 1] * MathUtil.SQRTTWOPI); + } + + return new FittingFunctionResult(y, gradients); + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/LevenbergMarquardtMethod.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/LevenbergMarquardtMethod.java new file mode 100644 index 00000000..3822c67b --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/LevenbergMarquardtMethod.java @@ -0,0 +1,380 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra.fitting; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +import de.lmu.ifi.dbs.elki.math.linearalgebra.LinearEquationSystem; + +/** + * Function parameter fitting using Levenberg-Marquardt method. + * + * The Levenberg-Marquardt Algorithm (LMA) is a combination of the Gauss-Newton + * Algorithm (GNA) and the method of steepest descent. As such it usually gives + * more stable results and better convergence. + * + * Implemented loosely based on the book: <br /> + * Numerical Recipes In C: The Art Of Scientific Computing <br/> + * ISBN 0-521-43108-5 <br/> + * Press, W.H. and Teukolsky, S.A. and Vetterling, W.T. and Flannery, B.P. <br/> + * Cambridge University Press, Cambridge, Mass, 1992 + * + * Due to their license, we cannot use their code, but we have to implement the + * mathematics ourselves. We hope the loss in precision isn't too big. + * + * TODO: Replace implementation by one based on <br/> + * M.I.A. Lourakis levmar:<br /> + * Levenberg-Marquardt nonlinear least squares algorithms in C/C++ + * + * Which supposedly offers increased robustness. + * + * @author Erich Schubert + * + * @apiviz.has FittingFunction + * @apiviz.uses FittingFunctionResult oneway - - «create» + */ +public class LevenbergMarquardtMethod { + /** + * Function to fit to + */ + public FittingFunction func; + + /** + * Data to fit the function to + */ + private double[] x; + + private double[] y; + + private double[] s; + + /** + * Number of parameters + */ + private int numparams; + + /** + * Parameters to use in fitting + */ + private double[] params; + + /** + * Chi-Squared information for parameters + */ + private double chisq; + + /** + * Number of parameters to fit + */ + private int numfit; + + /** + * Which parameters to fit + */ + private boolean[] dofit; + + /** + * Working space for covariance matrix + */ + private double[][] covmat; + + /** + * Working space for alphas + */ + private double[][] alpha; + + /** + * Lambda (refinement step size) + */ + private double lambda; + + /** + * More working buffers + */ + private double[] paramstry; + + private double[] beta; + + private double[] deltaparams; + + /** + * Maximum number of iterations in run() + */ + public int maxruns = 1000; + + /** + * Maximum number of small improvements (stopping condition) + */ + public int maxsmall = 3; + + /** + * "Small value" condition for stopping + */ + public double small = 0.01; + + /** + * Function fitting using Levenberg-Marquardt Method. + * + * @param func Function to fit to + * @param x Measurement points + * @param y Actual function values + * @param s Confidence / Variance in measurement data + * @param params Initial parameters + * @param dofit Flags on which parameters to optimize + */ + public LevenbergMarquardtMethod(FittingFunction func, double params[], boolean dofit[], double[] x, double[] y, double[] s) { + assert x.length == y.length; + assert x.length == s.length; + assert params.length == dofit.length; + + // function to optimize for + this.func = func; + + // Store parameters + this.x = x; + this.y = y; + this.s = s; + this.params = params; + this.dofit = dofit; + + // keep number of parameters ready + this.numparams = this.params.length; + + // count how many parameters to fit + numfit = 0; + for(int i = 0; i < numparams; i++) { + if(dofit[i]) { + numfit++; + } + } + + assert (numfit > 0); + + // initialize working spaces + covmat = new double[this.numfit][this.numfit]; + alpha = new double[this.numfit][this.numfit]; + + // set lambda to initial value + lambda = 0.001; + + // setup scratch spaces + paramstry = params.clone(); + beta = new double[this.numfit]; + deltaparams = new double[numparams]; + + chisq = simulateParameters(params); + } + + /** + * Compute new chisquared error + * + * This function also modifies the alpha and beta matrixes! + * + * @param curparams Parameters to use in computation. + * @return new chi squared + */ + private double simulateParameters(double[] curparams) { + // Initialize alpha, beta + for(int i = 0; i < numfit; i++) { + for(int j = 0; j < numfit; j++) { + alpha[i][j] = 0.0; + } + } + for(int i = 0; i < numfit; i++) { + beta[i] = 0.0; + } + + double newchisq = 0.0; + + // Simulation loop over all data + for(int di = 0; di < x.length; di++) { + FittingFunctionResult res = func.eval(x[di], curparams); + // compute inverse squared standard deviation of the point (confidence?) + double sigma2inv = 1.0 / (s[di] * s[di]); + double deltay = y[di] - res.y; + // i2 and j2 are the indices that only count the params with dofit true! + int i2 = 0; + for(int i = 0; i < numfit; i++) { + if(dofit[i]) { + double wt = res.gradients[i] * sigma2inv; + int j2 = 0; + // fill only half of the matrix, use symmetry below to complete the + // remainder. + for(int j = 0; j <= i; j++) { + if(dofit[j]) { + alpha[i2][j2] += wt * res.gradients[j]; + j2++; + } + } + beta[i2] = beta[i2] + deltay * wt; + i2++; + } + } + newchisq = newchisq + deltay * deltay * sigma2inv; + } + // fill symmetric side of matrix + for(int i = 1; i < numfit; i++) { + for(int j = i + 1; j < numfit; j++) { + alpha[i][j] = alpha[j][i]; + } + } + + return newchisq; + } + + /** + * Perform an iteration of the approximation loop. + */ + public void iterate() { + // build covmat out of fitting matrix by multiplying diagonal elements with + // 1+lambda + for(int i = 0; i < numfit; i++) { + for(int j = 0; j < numfit; j++) { + covmat[i][j] = alpha[i][j]; + } + covmat[i][i] = alpha[i][i] * (1.0 + lambda); + } + // System.out.println("Chisq: " + chisq); + // System.out.println("Lambda: " + lambda); + // System.out.print("beta: "); + // for (double d : beta) + // System.out.print(d + " "); + // System.out.println(); + // Solve the equation system (Gauss-Jordan) + LinearEquationSystem ls = new LinearEquationSystem(covmat, beta); + ls.solveByTotalPivotSearch(); + // update covmat with the inverse + covmat = ls.getCoefficents(); + // and deltaparams with the solution vector + deltaparams = ls.getRHS(); + // deltaparams = beta; + // System.out.print("deltaparams: "); + // for (double d : deltaparams) + // System.out.print(d + " "); + // System.out.println(); + int i2 = 0; + for(int i = 0; i < numparams; i++) { + if(dofit[i]) { + paramstry[i] = params[i] + deltaparams[i2]; + i2++; + } + } + double newchisq = simulateParameters(paramstry); + // have the results improved? + if(newchisq < chisq) { + // TODO: Do we need a larger limit than MIN_NORMAL? + if(lambda * 0.1 > Double.MIN_NORMAL) { + lambda = lambda * 0.1; + } + chisq = newchisq; + // keep modified covmat as new alpha matrix + // and da as new beta + for(int i = 0; i < numfit; i++) { + for(int j = 0; j < numfit; j++) { + alpha[i][j] = covmat[i][j]; + } + beta[i] = deltaparams[i]; + } + for(int i = 0; i < numparams; i++) { + params[i] = paramstry[i]; + } + } + else { + // TODO: Do we need a larger limit than MAX_VALUE? + // Does it ever make sense to go as far up? + // Anyway, this should prevent overflows. + if(lambda * 10 < Double.MAX_VALUE) { + lambda = lambda * 10; + } + } + } + + /** + * Get the final covariance matrix. + * + * Parameters that were not to be optimized are filled with zeros. + * + * @return covariance matrix for all parameters + */ + public double[][] getCovmat() { + // Since we worked only on params with dofit=true, we need to expand the + // matrix to cover all + // parameters. + double[][] fullcov = new double[numparams][numparams]; + int i2 = 0; + for(int i = 0; i < numparams; i++) { + int j2 = 0; + for(int j = 0; j < numparams; j++) { + if(dofit[i] && dofit[j]) { + fullcov[i][j] = covmat[i2][j2]; + } + else { + fullcov[i][j] = 0.0; + } + if(dofit[j]) { + j2++; + } + } + if(dofit[i]) { + i2++; + } + } + return fullcov; + } + + /** + * Get current parameters. + * + * @return parameters + */ + public double[] getParams() { + return params; + } + + /** + * Get current ChiSquared (squared error sum) + * + * @return error measure + */ + public double getChiSq() { + return chisq; + } + + /** + * Iterate until convergence, at most 100 times. + */ + public void run() { + while(maxruns > 0) { + double oldchi = getChiSq(); + iterate(); + maxruns--; + double newchi = getChiSq(); + // stop condition: only a small improvement in Chi. + double deltachi = newchi - oldchi; + if(deltachi < 0 && deltachi > -small) { + maxsmall--; + if(maxsmall < 0) { + break; + } + } + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/package-info.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/package-info.java new file mode 100644 index 00000000..f2ad8c35 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/package-info.java @@ -0,0 +1,28 @@ +/** + * <p>Function to numerically fit a function (such as a + * {@link de.lmu.ifi.dbs.elki.math.linearalgebra.fitting.GaussianFittingFunction Gaussian distribution} + * to given data.</p> + */ +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ +package de.lmu.ifi.dbs.elki.math.linearalgebra.fitting;
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/package-info.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/package-info.java new file mode 100644 index 00000000..d38b9a7b --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/package-info.java @@ -0,0 +1,64 @@ +/** +<p>Linear Algebra package provides classes and computational methods for operations on matrices.</p> +<p> + The content of this package is adapted from the Jama package. +</p> + +<p> + Five fundamental matrix decompositions, which consist of pairs or triples + of matrices, permutation vectors, and the like, produce results in five + decomposition classes. These decompositions are accessed by the Matrix + class to compute solutions of simultaneous linear equations, determinants, + inverses and other matrix functions. The five decompositions are: +<ul> + <li>Cholesky Decomposition of symmetric, positive definite matrices.</li> + <li>LU Decomposition of rectangular matrices.</li> + <li>QR Decomposition of rectangular matrices.</li> + <li>Singular Value Decomposition of rectangular matrices.</li> + <li>Eigenvalue Decomposition of both symmetric and nonsymmetric square matrices.</li> +</ul> +<dl> +<dt><b>Example of use:</b></dt> +<p> +<dd>Solve a linear system A x = b and compute the residual norm, ||b - A x||. +<p><pre> + double[][] vals = {{1.,2.,3},{4.,5.,6.},{7.,8.,10.}}; + Matrix A = new Matrix(vals); + Matrix b = Matrix.random(3,1); + Matrix x = A.solve(b); + Matrix r = A.times(x).minus(b); + double rnorm = r.normInf(); +</pre></dd> +</dl> + +<p>The original Jama-package has been developed by +the <a href="http://www.mathworks.com/">MathWorks</a> and <a +href="http://www.nist.gov/">NIST</a> and +can be found at <a href="http://math.nist.gov/javanumerics/jama/">http://math.nist.gov/javanumerics/jama/</a>. + + Here, for the adaption some classes and methods convenient for data mining applications within ELKI were added. + Furthermore some erroneous comments were corrected and the coding-style was subtly changed to a more Java-typical style. +*/ +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ +package de.lmu.ifi.dbs.elki.math.linearalgebra;
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/AbstractCovarianceMatrixBuilder.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/AbstractCovarianceMatrixBuilder.java new file mode 100644 index 00000000..3066b87d --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/AbstractCovarianceMatrixBuilder.java @@ -0,0 +1,71 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +import java.util.Collection; +import java.util.Iterator; + +import de.lmu.ifi.dbs.elki.data.NumberVector; +import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; +import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs; +import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; +import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.Parameterizable; + +/** + * Abstract class with the task of computing a Covariance matrix to be used in PCA. + * Mostly the specification of an interface. + * + * @author Erich Schubert + * + * @param <V> Vector class in use + */ +public abstract class AbstractCovarianceMatrixBuilder<V extends NumberVector<? extends V, ?>> implements Parameterizable, CovarianceMatrixBuilder<V> { + @Override + public Matrix processDatabase(Relation<? extends V> database) { + return processIds(database.getDBIDs(), database); + } + + @Override + public abstract Matrix processIds(DBIDs ids, Relation<? extends V> database); + + @Override + public <D extends NumberDistance<?, ?>> Matrix processQueryResults(Collection<DistanceResultPair<D>> results, Relation<? extends V> database, int k) { + ModifiableDBIDs ids = DBIDUtil.newArray(k); + int have = 0; + for(Iterator<DistanceResultPair<D>> it = results.iterator(); it.hasNext() && have < k; have++) { + ids.add(it.next().getDBID()); + } + return processIds(ids, database); + } + + @Override + final public <D extends NumberDistance<?, ?>> Matrix processQueryResults(Collection<DistanceResultPair<D>> results, Relation<? extends V> database) { + return processQueryResults(results, database, results.size()); + } + + // TODO: Allow KNNlist to avoid building the DBID array? +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/CompositeEigenPairFilter.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/CompositeEigenPairFilter.java new file mode 100644 index 00000000..eb42d654 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/CompositeEigenPairFilter.java @@ -0,0 +1,107 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +import java.util.List; + +import de.lmu.ifi.dbs.elki.math.linearalgebra.SortedEigenPairs; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectListParameter; + +/** + * The <code>CompositeEigenPairFilter</code> can be used to build a chain of + * eigenpair filters. + * + * @author Elke Achtert + */ +// todo parameter comments +public class CompositeEigenPairFilter implements EigenPairFilter { + /** + * The list of filters to use. + */ + public static final OptionID EIGENPAIR_FILTER_COMPOSITE_LIST = OptionID.getOrCreateOptionID("pca.filter.composite.list", "A comma separated list of the class names of the filters to be used. " + "The specified filters will be applied sequentially in the given order."); + + /** + * The filters to be applied. + */ + private List<EigenPairFilter> filters; + + /** + * Constructor. + * + * @param filters Filters to use. + */ + public CompositeEigenPairFilter(List<EigenPairFilter> filters) { + super(); + this.filters = filters; + } + + /** + * Filters the specified eigenpairs into strong and weak eigenpairs, where + * strong eigenpairs having high variances and weak eigenpairs having small + * variances. + * + * @param eigenPairs the eigenPairs (i.e. the eigenvectors and + * @return the filtered eigenpairs + */ + @Override + public FilteredEigenPairs filter(SortedEigenPairs eigenPairs) { + FilteredEigenPairs result = null; + for(EigenPairFilter f : filters) { + result = f.filter(eigenPairs); + eigenPairs = new SortedEigenPairs(result.getStrongEigenPairs()); + } + return result; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + /** + * The filters to be applied. + */ + private List<EigenPairFilter> filters = null; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + ObjectListParameter<EigenPairFilter> filtersP = new ObjectListParameter<EigenPairFilter>(EIGENPAIR_FILTER_COMPOSITE_LIST, EigenPairFilter.class); + + if(config.grab(filtersP)) { + filters = filtersP.instantiateClasses(config); + } + } + + @Override + protected CompositeEigenPairFilter makeInstance() { + return new CompositeEigenPairFilter(filters); + } + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/CovarianceMatrixBuilder.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/CovarianceMatrixBuilder.java new file mode 100644 index 00000000..668974e9 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/CovarianceMatrixBuilder.java @@ -0,0 +1,81 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +import java.util.Collection; + +import de.lmu.ifi.dbs.elki.data.NumberVector; +import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; +import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix; + +/** + * Interface for computing covariance matrixes on a data set. + * + * @author Erich Schubert + * + * @param <V> Vector base type + */ +public interface CovarianceMatrixBuilder<V extends NumberVector<? extends V, ?>> { + /** + * Compute Covariance Matrix for a complete database + * + * @param database the database used + * @return Covariance Matrix + */ + public Matrix processDatabase(Relation<? extends V> database); + + /** + * Compute Covariance Matrix for a collection of database IDs + * + * @param ids a collection of ids + * @param database the database used + * @return Covariance Matrix + */ + public Matrix processIds(DBIDs ids, Relation<? extends V> database); + + /** + * Compute Covariance Matrix for a QueryResult Collection + * + * By default it will just collect the ids and run processIds + * + * @param results a collection of QueryResults + * @param database the database used + * @param k the number of entries to process + * @return Covariance Matrix + */ + public <D extends NumberDistance<?, ?>> Matrix processQueryResults(Collection<DistanceResultPair<D>> results, Relation<? extends V> database, int k); + + /** + * Compute Covariance Matrix for a QueryResult Collection + * + * By default it will just collect the ids and run processIds + * + * @param results a collection of QueryResults + * @param database the database used + * @return Covariance Matrix + */ + public <D extends NumberDistance<?, ?>> Matrix processQueryResults(Collection<DistanceResultPair<D>> results, Relation<? extends V> database); +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/EigenPairFilter.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/EigenPairFilter.java new file mode 100644 index 00000000..a616cdf0 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/EigenPairFilter.java @@ -0,0 +1,51 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +import de.lmu.ifi.dbs.elki.math.linearalgebra.SortedEigenPairs; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.Parameterizable; + +/** + * The eigenpair filter is used to filter eigenpairs (i.e. eigenvectors + * and their corresponding eigenvalues) which are a result of a + * Variance Analysis Algorithm, e.g. Principal Component Analysis. + * The eigenpairs are filtered into two types: strong and weak eigenpairs, + * where strong eigenpairs having high variances + * and weak eigenpairs having small variances. + * + * @author Elke Achtert + * + * @apiviz.uses SortedEigenPairs oneway - - reads + * @apiviz.uses FilteredEigenPairs oneway - - «create» + */ +public interface EigenPairFilter extends Parameterizable { + /** + * Filters the specified eigenpairs into strong and weak eigenpairs, + * where strong eigenpairs having high variances + * and weak eigenpairs having small variances. + * + * @param eigenPairs the eigenPairs (i.e. the eigenvectors and + * @return the filtered eigenpairs + */ + public FilteredEigenPairs filter(SortedEigenPairs eigenPairs); +} diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/FilteredEigenPairs.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/FilteredEigenPairs.java new file mode 100644 index 00000000..787732f4 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/FilteredEigenPairs.java @@ -0,0 +1,101 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +import de.lmu.ifi.dbs.elki.math.linearalgebra.EigenPair; + +import java.util.List; + +/** + * Encapsulates weak and strong eigenpairs that have been filtered out + * by an eigenpair filter. + * + * @author Elke Achtert + * + * @apiviz.has de.lmu.ifi.dbs.elki.math.linearalgebra.EigenPair + */ +public class FilteredEigenPairs { + /** + * The weak eigenpairs. + */ + private final List<EigenPair> weakEigenPairs; + + /** + * The strong eigenpairs. + */ + private final List<EigenPair> strongEigenPairs; + + /** + * Creates a new object that encapsulates weak and strong eigenpairs + * that have been filtered out by an eigenpair filter + * + * @param weakEigenPairs the weak eigenpairs + * @param strongEigenPairs the strong eigenpairs + */ + public FilteredEigenPairs(List<EigenPair> weakEigenPairs, List<EigenPair> strongEigenPairs) { + this.weakEigenPairs = weakEigenPairs; + this.strongEigenPairs = strongEigenPairs; + } + + /** + * Returns the weak eigenpairs (no copy). + * @return the weak eigenpairs + */ + public List<EigenPair> getWeakEigenPairs() { + return weakEigenPairs; + } + + /** + * Counts the strong eigenpairs. + * @return number of strong eigenpairs + */ + public int countWeakEigenPairs() { + return strongEigenPairs.size(); + } + + /** + * Returns the strong eigenpairs (no copy). + * @return the strong eigenpairs + */ + public List<EigenPair> getStrongEigenPairs() { + return strongEigenPairs; + } + + /** + * Counts the strong eigenpairs. + * @return number of strong eigenpairs + */ + public int countStrongEigenPairs() { + return strongEigenPairs.size(); + } + + /** + * Returns a string representation of the object. + * + * @return a string representation of the object. + */ + @Override + public String toString() { + return "weak EP: " + weakEigenPairs + "\nstrong EP: " + strongEigenPairs; + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/FirstNEigenPairFilter.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/FirstNEigenPairFilter.java new file mode 100644 index 00000000..40ca5047 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/FirstNEigenPairFilter.java @@ -0,0 +1,134 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +import java.util.ArrayList; +import java.util.List; + +import de.lmu.ifi.dbs.elki.logging.Logging; +import de.lmu.ifi.dbs.elki.math.linearalgebra.EigenPair; +import de.lmu.ifi.dbs.elki.math.linearalgebra.SortedEigenPairs; +import de.lmu.ifi.dbs.elki.utilities.documentation.Description; +import de.lmu.ifi.dbs.elki.utilities.documentation.Title; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; + +/** + * The FirstNEigenPairFilter marks the n highest eigenpairs as strong + * eigenpairs, where n is a user specified number. + * + * @author Elke Achtert + */ +// todo parameter comments +@Title("First n Eigenpair filter") +@Description("Sorts the eigenpairs in decending order of their eigenvalues and marks the first n eigenpairs as strong eigenpairs.") +public class FirstNEigenPairFilter implements EigenPairFilter { + /** + * The logger for this class. + */ + private static final Logging logger = Logging.getLogger(FirstNEigenPairFilter.class); + + /** + * Paremeter n + */ + public static final OptionID EIGENPAIR_FILTER_N = OptionID.getOrCreateOptionID("pca.filter.n", "The number of strong eigenvectors: n eigenvectors with the n highest" + "eigenvalues are marked as strong eigenvectors."); + + /** + * The threshold for strong eigenvectors: n eigenvectors with the n highest + * eigenvalues are marked as strong eigenvectors. + */ + private int n; + + /** + * Constructor. + * + * @param n + */ + public FirstNEigenPairFilter(int n) { + super(); + this.n = n; + } + + @Override + public FilteredEigenPairs filter(SortedEigenPairs eigenPairs) { + StringBuffer msg = new StringBuffer(); + if(logger.isDebugging()) { + msg.append("sortedEigenPairs ").append(eigenPairs.toString()); + msg.append("\nn = ").append(n); + } + + // init strong and weak eigenpairs + List<EigenPair> strongEigenPairs = new ArrayList<EigenPair>(); + List<EigenPair> weakEigenPairs = new ArrayList<EigenPair>(); + + // determine strong and weak eigenpairs + for(int i = 0; i < eigenPairs.size(); i++) { + EigenPair eigenPair = eigenPairs.getEigenPair(i); + if(i < n) { + strongEigenPairs.add(eigenPair); + } + else { + weakEigenPairs.add(eigenPair); + } + } + + if(logger.isDebugging()) { + msg.append("\nstrong EigenPairs = ").append(strongEigenPairs); + msg.append("\nweak EigenPairs = ").append(weakEigenPairs); + logger.debugFine(msg.toString()); + } + + return new FilteredEigenPairs(weakEigenPairs, strongEigenPairs); + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + /** + * The number of eigenpairs to keep. + */ + protected int n = 0; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + IntParameter nP = new IntParameter(EIGENPAIR_FILTER_N, new GreaterEqualConstraint(0)); + if(config.grab(nP)) { + n = nP.getValue(); + } + } + + @Override + protected FirstNEigenPairFilter makeInstance() { + return new FirstNEigenPairFilter(n); + } + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/LimitEigenPairFilter.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/LimitEigenPairFilter.java new file mode 100644 index 00000000..338c6ba6 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/LimitEigenPairFilter.java @@ -0,0 +1,206 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +import java.util.ArrayList; +import java.util.List; +import java.util.Vector; + +import de.lmu.ifi.dbs.elki.logging.Logging; +import de.lmu.ifi.dbs.elki.math.linearalgebra.EigenPair; +import de.lmu.ifi.dbs.elki.math.linearalgebra.SortedEigenPairs; +import de.lmu.ifi.dbs.elki.utilities.documentation.Description; +import de.lmu.ifi.dbs.elki.utilities.documentation.Title; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.WrongParameterValueException; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GlobalParameterConstraint; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessEqualConstraint; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.ParameterConstraint; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.ParameterFlagGlobalConstraint; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag; + +/** + * The LimitEigenPairFilter marks all eigenpairs having an (absolute) eigenvalue + * below the specified threshold (relative or absolute) as weak eigenpairs, the + * others are marked as strong eigenpairs. + * + * @author Elke Achtert + */ +@Title("Limit-based Eigenpair Filter") +@Description("Filters all eigenpairs, which are lower than a given value.") +public class LimitEigenPairFilter implements EigenPairFilter { + /** + * The logger for this class. + */ + private static final Logging logger = Logging.getLogger(LimitEigenPairFilter.class); + + /** + * "absolute" Flag + */ + public static final OptionID EIGENPAIR_FILTER_ABSOLUTE = OptionID.getOrCreateOptionID("pca.filter.absolute", "Flag to mark delta as an absolute value."); + + /** + * Parameter delta + */ + public static final OptionID EIGENPAIR_FILTER_DELTA = OptionID.getOrCreateOptionID("pca.filter.delta", "The threshold for strong Eigenvalues. If not otherwise specified, delta " + "is a relative value w.r.t. the (absolute) highest Eigenvalues and has to be " + "a double between 0 and 1. To mark delta as an absolute value, use " + "the option -" + EIGENPAIR_FILTER_ABSOLUTE.getName() + "."); + + /** + * The default value for delta. + */ + public static final double DEFAULT_DELTA = 0.01; + + /** + * Threshold for strong eigenpairs, can be absolute or relative. + */ + private double delta; + + /** + * Indicates whether delta is an absolute or a relative value. + */ + private boolean absolute; + + /** + * Constructor. + * + * @param delta + * @param absolute + */ + public LimitEigenPairFilter(double delta, boolean absolute) { + super(); + this.delta = delta; + this.absolute = absolute; + } + + @Override + public FilteredEigenPairs filter(SortedEigenPairs eigenPairs) { + StringBuffer msg = new StringBuffer(); + if(logger.isDebugging()) { + msg.append("delta = ").append(delta); + } + + // determine limit + double limit; + if(absolute) { + limit = delta; + } + else { + double max = Double.NEGATIVE_INFINITY; + for(int i = 0; i < eigenPairs.size(); i++) { + EigenPair eigenPair = eigenPairs.getEigenPair(i); + double eigenValue = Math.abs(eigenPair.getEigenvalue()); + if(max < eigenValue) { + max = eigenValue; + } + } + limit = max * delta; + } + if(logger.isDebugging()) { + msg.append("\nlimit = ").append(limit); + } + + // init strong and weak eigenpairs + List<EigenPair> strongEigenPairs = new ArrayList<EigenPair>(); + List<EigenPair> weakEigenPairs = new ArrayList<EigenPair>(); + + // determine strong and weak eigenpairs + for(int i = 0; i < eigenPairs.size(); i++) { + EigenPair eigenPair = eigenPairs.getEigenPair(i); + double eigenValue = Math.abs(eigenPair.getEigenvalue()); + if(eigenValue >= limit) { + strongEigenPairs.add(eigenPair); + } + else { + weakEigenPairs.add(eigenPair); + } + } + if(logger.isDebugging()) { + msg.append("\nstrong EigenPairs = ").append(strongEigenPairs); + msg.append("\nweak EigenPairs = ").append(weakEigenPairs); + logger.debugFine(msg.toString()); + } + + return new FilteredEigenPairs(weakEigenPairs, strongEigenPairs); + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + /** + * Threshold for strong eigenpairs, can be absolute or relative. + */ + private double delta; + + /** + * Indicates whether delta is an absolute or a relative value. + */ + private boolean absolute; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + Flag absoluteF = new Flag(EIGENPAIR_FILTER_ABSOLUTE); + if(config.grab(absoluteF)) { + absolute = absoluteF.getValue(); + } + + DoubleParameter deltaP = new DoubleParameter(EIGENPAIR_FILTER_DELTA, new GreaterEqualConstraint(0), DEFAULT_DELTA); + if(config.grab(deltaP)) { + delta = deltaP.getValue(); + // TODO: make this a global constraint? + if(absolute && deltaP.tookDefaultValue()) { + config.reportError(new WrongParameterValueException("Illegal parameter setting: " + "Flag " + absoluteF.getName() + " is set, " + "but no value for " + deltaP.getName() + " is specified.")); + } + } + + // Conditional Constraint: + // delta must be >= 0 and <= 1 if it's a relative value + // Since relative or absolute is dependent on the absolute flag this is a + // global constraint! + List<ParameterConstraint<Number>> cons = new Vector<ParameterConstraint<Number>>(); + // TODO: Keep the constraint here - applies to non-conditional case as + // well, + // and is set above. + ParameterConstraint<Number> aboveNull = new GreaterEqualConstraint(0); + cons.add(aboveNull); + ParameterConstraint<Number> underOne = new LessEqualConstraint(1); + cons.add(underOne); + + GlobalParameterConstraint gpc = new ParameterFlagGlobalConstraint<Number, Double>(deltaP, cons, absoluteF, false); + config.checkConstraint(gpc); + } + + @Override + protected LimitEigenPairFilter makeInstance() { + return new LimitEigenPairFilter(delta, absolute); + } + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/NormalizingEigenPairFilter.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/NormalizingEigenPairFilter.java new file mode 100644 index 00000000..261455c0 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/NormalizingEigenPairFilter.java @@ -0,0 +1,91 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +import java.util.ArrayList; +import java.util.List; + +import de.lmu.ifi.dbs.elki.logging.Logging; +import de.lmu.ifi.dbs.elki.math.linearalgebra.EigenPair; +import de.lmu.ifi.dbs.elki.math.linearalgebra.SortedEigenPairs; +import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector; +import de.lmu.ifi.dbs.elki.utilities.documentation.Description; +import de.lmu.ifi.dbs.elki.utilities.documentation.Title; + +/** + * The NormalizingEigenPairFilter normalizes all eigenvectors s.t. <eigenvector, + * eigenvector> * eigenvalue = 1, where <,> is the standard dot product + * + * @author Simon Paradies + */ +@Title("Perecentage based Eigenpair filter") +@Description("Normalizes all eigenpairs, consisting of eigenvalue e and eigenvector v such that <v,v> * e = 1, where <,> is the standard dot product.") +public class NormalizingEigenPairFilter implements EigenPairFilter { + /** + * The logger for this class. + */ + private static final Logging logger = Logging.getLogger(NormalizingEigenPairFilter.class); + + /** + * Provides a new EigenPairFilter that normalizes all eigenvectors s.t. + * eigenvalue * <eigenvector, eigenvector> = 1, where <,> is the standard dot + * product + */ + public NormalizingEigenPairFilter() { + super(); + } + + @Override + public FilteredEigenPairs filter(final SortedEigenPairs eigenPairs) { + // initialize strong and weak eigenpairs + // all normalized eigenpairs are regarded as strong + final List<EigenPair> strongEigenPairs = new ArrayList<EigenPair>(); + final List<EigenPair> weakEigenPairs = new ArrayList<EigenPair>(); + for(int i = 0; i < eigenPairs.size(); i++) { + final EigenPair eigenPair = eigenPairs.getEigenPair(i); + normalizeEigenPair(eigenPair); + strongEigenPairs.add(eigenPair); + } + if(logger.isDebugging()) { + final StringBuffer msg = new StringBuffer(); + msg.append("strong EigenPairs = ").append(strongEigenPairs); + msg.append("\nweak EigenPairs = ").append(weakEigenPairs); + logger.debugFine(msg.toString()); + } + + return new FilteredEigenPairs(weakEigenPairs, strongEigenPairs); + } + + /** + * Normalizes an eigenpair consisting of eigenvector v and eigenvalue e s.t. + * <v,v> * e = 1 + * + * @param eigenPair the eigenpair to be normalized + * + */ + private void normalizeEigenPair(final EigenPair eigenPair) { + final Vector eigenvector = eigenPair.getEigenvector(); + final double scaling = 1.0 / Math.sqrt(eigenPair.getEigenvalue()) * eigenvector.normF(); + eigenvector.timesEquals(scaling); + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredResult.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredResult.java new file mode 100644 index 00000000..5d10d4bc --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredResult.java @@ -0,0 +1,267 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +import java.util.Iterator; +import java.util.List; + +import de.lmu.ifi.dbs.elki.math.linearalgebra.EigenPair; +import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix; +import de.lmu.ifi.dbs.elki.math.linearalgebra.ProjectionResult; +import de.lmu.ifi.dbs.elki.math.linearalgebra.SortedEigenPairs; +import de.lmu.ifi.dbs.elki.utilities.Util; + +/** + * Result class for a filtered PCA. This differs from regular PCA by having the + * Eigenvalues and Eigenvectors separated into "strong" and "weak" Eigenvectors, + * and thus a dimension. Usually this will be interpreted as having a "data" + * subspace and an "error" subspace. + * + * @author Erich Schubert + * + * @apiviz.landmark + */ +public class PCAFilteredResult extends PCAResult implements ProjectionResult { + /** + * The strong eigenvalues. + */ + private double[] strongEigenvalues; + + /** + * The strong eigenvectors to their corresponding filtered eigenvalues. + */ + private Matrix strongEigenvectors; + + /** + * The weak eigenvalues. + */ + private double[] weakEigenvalues; + + /** + * The weak eigenvectors to their corresponding filtered eigenvalues. + */ + private Matrix weakEigenvectors; + + /** + * The amount of Variance explained by strong Eigenvalues + */ + private double explainedVariance; + + /** + * The selection matrix of the weak eigenvectors. + */ + private Matrix e_hat; + + /** + * The selection matrix of the strong eigenvectors. + */ + private Matrix e_czech; + + /** + * The similarity matrix. + */ + private Matrix m_hat; + + /** + * The dissimilarity matrix. + */ + private Matrix m_czech; + + /** + * The diagonal matrix of adapted strong eigenvalues: eigenvectors * + * e_czech. + */ + private Matrix adapatedStrongEigenvectors; + + /** + * Construct a result object for the filtered PCA result. + * + * @param eigenPairs All EigenPairs + * @param filteredEigenPairs filtered EigenPairs + * @param big large value in selection matrix + * @param small small value in selection matrix + */ + + public PCAFilteredResult(SortedEigenPairs eigenPairs, FilteredEigenPairs filteredEigenPairs, double big, double small) { + super(eigenPairs); + + int dim = eigenPairs.getEigenPair(0).getEigenvector().getDimensionality(); + + double sumStrongEigenvalues = 0; + double sumWeakEigenvalues = 0; + {// strong eigenpairs + List<EigenPair> strongEigenPairs = filteredEigenPairs.getStrongEigenPairs(); + strongEigenvalues = new double[strongEigenPairs.size()]; + strongEigenvectors = new Matrix(dim, strongEigenPairs.size()); + int i = 0; + for(Iterator<EigenPair> it = strongEigenPairs.iterator(); it.hasNext(); i++) { + EigenPair eigenPair = it.next(); + strongEigenvalues[i] = eigenPair.getEigenvalue(); + strongEigenvectors.setColumnVector(i, eigenPair.getEigenvector()); + sumStrongEigenvalues += strongEigenvalues[i]; + } + } + + {// weak eigenpairs + List<EigenPair> weakEigenPairs = filteredEigenPairs.getWeakEigenPairs(); + weakEigenvalues = new double[weakEigenPairs.size()]; + weakEigenvectors = new Matrix(dim, weakEigenPairs.size()); + int i = 0; + for(Iterator<EigenPair> it = weakEigenPairs.iterator(); it.hasNext(); i++) { + EigenPair eigenPair = it.next(); + weakEigenvalues[i] = eigenPair.getEigenvalue(); + weakEigenvectors.setColumnVector(i, eigenPair.getEigenvector()); + sumWeakEigenvalues += weakEigenvalues[i]; + } + } + explainedVariance = sumStrongEigenvalues / (sumStrongEigenvalues + sumWeakEigenvalues); + int localdim = strongEigenvalues.length; + + // selection Matrix for weak and strong EVs + e_hat = new Matrix(dim, dim); + e_czech = new Matrix(dim, dim); + for(int d = 0; d < dim; d++) { + if(d < localdim) { + e_czech.set(d, d, big); + e_hat.set(d, d, small); + } + else { + e_czech.set(d, d, small); + e_hat.set(d, d, big); + } + } + + // TODO: unnecessary copy. + Matrix V = getEigenvectors(); + adapatedStrongEigenvectors = V.times(e_czech).times(Matrix.identity(dim, localdim)); + m_hat = V.times(e_hat).timesTranspose(V); + m_czech = V.times(e_czech).timesTranspose(V); + } + + /** + * Returns a copy of the matrix of strong eigenvectors after passing the eigen + * pair filter. + * + * @return the matrix of eigenvectors + */ + public final Matrix getStrongEigenvectors() { + return strongEigenvectors.copy(); + } + + /** + * Returns a copy of the strong eigenvalues of the object after passing the + * eigen pair filter. + * + * @return the eigenvalues + */ + public final double[] getStrongEigenvalues() { + return Util.copy(strongEigenvalues); + } + + /** + * Returns a copy of the matrix of weak eigenvectors after passing the eigen + * pair filter. + * + * @return the matrix of eigenvectors + */ + public final Matrix getWeakEigenvectors() { + return weakEigenvectors.copy(); + } + + /** + * Returns a copy of the weak eigenvalues of the object after passing the + * eigen pair filter. + * + * @return the eigenvalues + */ + public final double[] getWeakEigenvalues() { + return Util.copy(weakEigenvalues); + } + + /** + * Get correlation (subspace) dimensionality + * + * @return length of strong eigenvalues + */ + @Override + public final int getCorrelationDimension() { + return strongEigenvalues.length; + } + + /** + * Returns explained variance + * + * @return the variance explained by the strong Eigenvectors + */ + public double getExplainedVariance() { + return explainedVariance; + } + + /** + * Returns a copy of the selection matrix of the weak eigenvectors (E_hat) of + * the object to which this PCA belongs to. + * + * @return the selection matrix of the weak eigenvectors E_hat + */ + public Matrix selectionMatrixOfWeakEigenvectors() { + return e_hat.copy(); + } + + /** + * Returns a copy of the selection matrix of the strong eigenvectors (E_czech) + * of this LocalPCA. + * + * @return the selection matrix of the weak eigenvectors E_czech + */ + public Matrix selectionMatrixOfStrongEigenvectors() { + return e_czech.copy(); + } + + /** + * Returns a copy of the similarity matrix (M_hat) of this LocalPCA. + * + * @return the similarity matrix M_hat + */ + @Override + public Matrix similarityMatrix() { + return m_hat.copy(); + } + + /** + * Returns a copy of the dissimilarity matrix (M_czech) of this LocalPCA. + * + * @return the dissimilarity matrix M_hat + */ + public Matrix dissimilarityMatrix() { + return m_czech.copy(); + } + + /** + * Returns a copy of the adapted strong eigenvectors. + * + * @return the adapted strong eigenvectors + */ + public Matrix adapatedStrongEigenvectors() { + return adapatedStrongEigenvectors.copy(); + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredRunner.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredRunner.java new file mode 100644 index 00000000..297abac0 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredRunner.java @@ -0,0 +1,232 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +import java.util.Collection; + +import de.lmu.ifi.dbs.elki.data.NumberVector; +import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; +import de.lmu.ifi.dbs.elki.math.linearalgebra.EigenvalueDecomposition; +import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix; +import de.lmu.ifi.dbs.elki.math.linearalgebra.SortedEigenPairs; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessGlobalConstraint; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; + +/** + * PCA runner that will do dimensionality reduction. PCA is computed as with the + * regular runner, but afterwards, an {@link EigenPairFilter} is applied. + * + * @author Erich Schubert + * + * @apiviz.landmark + * @apiviz.uses PCAFilteredResult oneway - - «create» + * @apiviz.composedOf EigenPairFilter + * + * @param <V> Vector class to use + */ +public class PCAFilteredRunner<V extends NumberVector<? extends V, ?>> extends PCARunner<V> { + /** + * Parameter to specify the filter for determination of the strong and weak + * eigenvectors, must be a subclass of {@link EigenPairFilter}. + * <p/> + * Default value: {@link PercentageEigenPairFilter} + * </p> + * <p/> + * Key: {@code -pca.filter} + * </p> + */ + public static final OptionID PCA_EIGENPAIR_FILTER = OptionID.getOrCreateOptionID("pca.filter", "Filter class to determine the strong and weak eigenvectors."); + + /** + * Parameter to specify a constant big value to reset high eigenvalues, must + * be a double greater than 0. + * <p> + * Default value: {@code 1.0} + * </p> + * <p> + * Key: {@code -pca.big} + * </p> + */ + public static final OptionID BIG_ID = OptionID.getOrCreateOptionID("pca.big", "A constant big value to reset high eigenvalues."); + + /** + * Parameter to specify a constant small value to reset low eigenvalues, must + * be a double greater than 0. + * <p> + * Default value: {@code 0.0} + * </p> + * <p> + * Key: {@code -pca.small} + * </p> + */ + public static final OptionID SMALL_ID = OptionID.getOrCreateOptionID("pca.small", "A constant small value to reset low eigenvalues."); + + /** + * Holds the instance of the EigenPairFilter specified by + * {@link #PCA_EIGENPAIR_FILTER}. + */ + private EigenPairFilter eigenPairFilter; + + /** + * Holds the value of {@link #BIG_ID}. + */ + private double big; + + /** + * Holds the value of {@link #SMALL_ID}. + */ + private double small; + + /** + * Constructor. + * + * @param covarianceMatrixBuilder + * @param eigenPairFilter + * @param big + * @param small + */ + public PCAFilteredRunner(CovarianceMatrixBuilder<V> covarianceMatrixBuilder, EigenPairFilter eigenPairFilter, double big, double small) { + super(covarianceMatrixBuilder); + this.eigenPairFilter = eigenPairFilter; + this.big = big; + this.small = small; + } + + /** + * Run PCA on a collection of database IDs + * + * @param ids a collection of ids + * @param database the database used + * @return PCA result + */ + @Override + public PCAFilteredResult processIds(DBIDs ids, Relation<? extends V> database) { + return processCovarMatrix(covarianceMatrixBuilder.processIds(ids, database)); + } + + /** + * Run PCA on a QueryResult Collection + * + * @param results a collection of QueryResults + * @param database the database used + * @return PCA result + */ + @Override + public <D extends NumberDistance<?, ?>> PCAFilteredResult processQueryResult(Collection<DistanceResultPair<D>> results, Relation<? extends V> database) { + return processCovarMatrix(covarianceMatrixBuilder.processQueryResults(results, database)); + } + + /** + * Process an existing Covariance Matrix + * + * @param covarMatrix the matrix used for performing PCA + */ + @Override + public PCAFilteredResult processCovarMatrix(Matrix covarMatrix) { + // TODO: add support for a different implementation to do EVD? + EigenvalueDecomposition evd = covarMatrix.eig(); + return processEVD(evd); + } + + /** + * Process an existing eigenvalue decomposition + * + * @param evd eigenvalue decomposition to use + */ + @Override + public PCAFilteredResult processEVD(EigenvalueDecomposition evd) { + SortedEigenPairs eigenPairs = new SortedEigenPairs(evd, false); + FilteredEigenPairs filteredEigenPairs = eigenPairFilter.filter(eigenPairs); + return new PCAFilteredResult(eigenPairs, filteredEigenPairs, big, small); + } + + /** + * Retrieve the {@link EigenPairFilter} to be used. For derived PCA Runners + * + * @return eigenpair filter configured. + */ + protected EigenPairFilter getEigenPairFilter() { + return eigenPairFilter; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer<V extends NumberVector<? extends V, ?>> extends PCARunner.Parameterizer<V> { + /** + * Holds the instance of the EigenPairFilter specified by + * {@link #PCA_EIGENPAIR_FILTER}. + */ + protected EigenPairFilter eigenPairFilter; + + /** + * Holds the value of {@link #BIG_ID}. + */ + protected double big; + + /** + * Holds the value of {@link #SMALL_ID}. + */ + protected double small; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + ObjectParameter<EigenPairFilter> EIGENPAIR_FILTER_PARAM = new ObjectParameter<EigenPairFilter>(PCA_EIGENPAIR_FILTER, EigenPairFilter.class, PercentageEigenPairFilter.class); + if(config.grab(EIGENPAIR_FILTER_PARAM)) { + eigenPairFilter = EIGENPAIR_FILTER_PARAM.instantiateClass(config); + } + + DoubleParameter BIG_PARAM = new DoubleParameter(BIG_ID, new GreaterConstraint(0), 1.0); + if(config.grab(BIG_PARAM)) { + big = BIG_PARAM.getValue(); + + } + + DoubleParameter SMALL_PARAM = new DoubleParameter(SMALL_ID, new GreaterEqualConstraint(0), 0.0); + if(config.grab(SMALL_PARAM)) { + small = SMALL_PARAM.getValue(); + } + + // global constraint small <--> big + config.checkConstraint(new LessGlobalConstraint<Double>(SMALL_PARAM, BIG_PARAM)); + } + + @Override + protected PCAFilteredRunner<V> makeInstance() { + return new PCAFilteredRunner<V>(covarianceMatrixBuilder, eigenPairFilter, big, small); + } + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAResult.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAResult.java new file mode 100644 index 00000000..17c3e184 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAResult.java @@ -0,0 +1,118 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix; +import de.lmu.ifi.dbs.elki.math.linearalgebra.SortedEigenPairs; +import de.lmu.ifi.dbs.elki.utilities.Util; + +/** + * Result class for Principal Component Analysis with some convenience methods + * + * @author Erich Schubert + * + * @apiviz.landmark + * @apiviz.has de.lmu.ifi.dbs.elki.math.linearalgebra.SortedEigenPairs + */ +public class PCAResult { + /** + * The eigenpairs in decreasing order. + */ + private SortedEigenPairs eigenPairs; + + /** + * The eigenvalues in decreasing order. + */ + private double[] eigenvalues; + + /** + * The eigenvectors in decreasing order to their corresponding eigenvalues. + */ + private Matrix eigenvectors; + + /** + * Build a PCA result object. + * + * @param eigenvalues Eigenvalues + * @param eigenvectors Eigenvector matrix + * @param eigenPairs Eigenpairs + */ + + public PCAResult(double[] eigenvalues, Matrix eigenvectors, SortedEigenPairs eigenPairs) { + super(); + this.eigenPairs = eigenPairs; + this.eigenvalues = eigenvalues; + this.eigenvectors = eigenvectors; + } + + /** + * Build a PCA result from an existing set of EigenPairs. + * + * @param eigenPairs existing eigenpairs + */ + public PCAResult(SortedEigenPairs eigenPairs) { + super(); + // TODO: we might want to postpone the instantiation of eigenvalue and eigenvectors. + this.eigenPairs = eigenPairs; + this.eigenvalues = eigenPairs.eigenValues(); + this.eigenvectors = eigenPairs.eigenVectors(); + } + + /** + * Returns a copy of the matrix of eigenvectors of the object to which this + * PCA belongs to. + * + * @return the matrix of eigenvectors + */ + public final Matrix getEigenvectors() { + return eigenvectors.copy(); + } + + /** + * Returns a copy of the eigenvalues of the object to which this PCA belongs + * to in decreasing order. + * + * @return the eigenvalues + */ + public final double[] getEigenvalues() { + return Util.copy(eigenvalues); + } + + /** + * Returns a copy of the eigenpairs of the object to which this PCA belongs to + * in decreasing order. + * + * @return the eigenpairs + */ + public final SortedEigenPairs getEigenPairs() { + return eigenPairs.copy(); + } + + /** + * Returns the number of eigenvectors stored + * @return length + */ + public final int length() { + return eigenPairs.size(); + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCARunner.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCARunner.java new file mode 100644 index 00000000..07dcbfec --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCARunner.java @@ -0,0 +1,187 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +import java.util.Collection; + +import de.lmu.ifi.dbs.elki.data.NumberVector; +import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; +import de.lmu.ifi.dbs.elki.math.linearalgebra.EigenvalueDecomposition; +import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix; +import de.lmu.ifi.dbs.elki.math.linearalgebra.SortedEigenPairs; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.Parameterizable; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; + +/** + * Class to run PCA on given data. + * + * The various methods will start PCA at different places (e.g. with database + * IDs, database query results, a precomputed covariance matrix or eigenvalue + * decomposition). + * + * The runner can be parameterized by setting a covariance matrix builder (e.g. + * to a weighted covariance matrix builder) + * + * @author Erich Schubert + * + * @apiviz.landmark + * @apiviz.uses PCAResult oneway - - «create» + * @apiviz.composedOf CovarianceMatrixBuilder + * + * @param <V> Vector type + */ +public class PCARunner<V extends NumberVector<? extends V, ?>> implements Parameterizable { + /** + * Parameter to specify the class to compute the covariance matrix, must be a + * subclass of {@link CovarianceMatrixBuilder}. + * <p> + * Default value: {@link CovarianceMatrixBuilder} + * </p> + * <p> + * Key: {@code -pca.covariance} + * </p> + */ + public static final OptionID PCA_COVARIANCE_MATRIX = OptionID.getOrCreateOptionID("pca.covariance", "Class used to compute the covariance matrix."); + + /** + * The covariance computation class. + */ + protected CovarianceMatrixBuilder<V> covarianceMatrixBuilder; + + /** + * Constructor. + * + * @param covarianceMatrixBuilder Class for computing the covariance matrix + */ + public PCARunner(CovarianceMatrixBuilder<V> covarianceMatrixBuilder) { + super(); + this.covarianceMatrixBuilder = covarianceMatrixBuilder; + } + + /** + * Run PCA on the complete database + * + * @param database the database used + * @return PCA result + */ + public PCAResult processDatabase(Relation<? extends V> database) { + return processCovarMatrix(covarianceMatrixBuilder.processDatabase(database)); + } + + /** + * Run PCA on a collection of database IDs + * + * @param ids a collection of ids + * @param database the database used + * @return PCA result + */ + public PCAResult processIds(DBIDs ids, Relation<? extends V> database) { + return processCovarMatrix(covarianceMatrixBuilder.processIds(ids, database)); + } + + /** + * Run PCA on a QueryResult Collection + * + * @param results a collection of QueryResults + * @param database the database used + * @return PCA result + */ + public <D extends NumberDistance<?, ?>> PCAResult processQueryResult(Collection<DistanceResultPair<D>> results, Relation<? extends V> database) { + return processCovarMatrix(covarianceMatrixBuilder.processQueryResults(results, database)); + } + + /** + * Process an existing covariance Matrix + * + * @param covarMatrix the matrix used for performing pca + * @return PCA result + */ + public PCAResult processCovarMatrix(Matrix covarMatrix) { + // TODO: add support for a different implementation to do EVD? + EigenvalueDecomposition evd = covarMatrix.eig(); + return processEVD(evd); + } + + /** + * Process an existing eigenvalue decomposition + * + * @param evd eigenvalue decomposition to use + * @return PCA result + */ + public PCAResult processEVD(EigenvalueDecomposition evd) { + SortedEigenPairs eigenPairs = new SortedEigenPairs(evd, false); + return new PCAResult(eigenPairs); + } + + /** + * Get covariance matrix builder + * + * @return covariance matrix builder in use + */ + public CovarianceMatrixBuilder<V> getCovarianceMatrixBuilder() { + return covarianceMatrixBuilder; + } + + /** + * Set covariance matrix builder. + * + * @param covarianceBuilder New covariance matrix builder. + */ + public void setCovarianceMatrixBuilder(CovarianceMatrixBuilder<V> covarianceBuilder) { + this.covarianceMatrixBuilder = covarianceBuilder; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer<V extends NumberVector<? extends V, ?>> extends AbstractParameterizer { + /** + * The covariance computation class. + */ + protected CovarianceMatrixBuilder<V> covarianceMatrixBuilder; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + ObjectParameter<CovarianceMatrixBuilder<V>> covarianceP = new ObjectParameter<CovarianceMatrixBuilder<V>>(PCA_COVARIANCE_MATRIX, CovarianceMatrixBuilder.class, StandardCovarianceMatrixBuilder.class); + if(config.grab(covarianceP)) { + covarianceMatrixBuilder = covarianceP.instantiateClass(config); + } + } + + @Override + protected PCARunner<V> makeInstance() { + return new PCARunner<V>(covarianceMatrixBuilder); + } + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PercentageEigenPairFilter.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PercentageEigenPairFilter.java new file mode 100644 index 00000000..f1322d6a --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PercentageEigenPairFilter.java @@ -0,0 +1,166 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +import java.util.ArrayList; +import java.util.List; + +import de.lmu.ifi.dbs.elki.logging.Logging; +import de.lmu.ifi.dbs.elki.math.linearalgebra.EigenPair; +import de.lmu.ifi.dbs.elki.math.linearalgebra.SortedEigenPairs; +import de.lmu.ifi.dbs.elki.utilities.documentation.Description; +import de.lmu.ifi.dbs.elki.utilities.documentation.Title; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.IntervalConstraint; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; + +/** + * The PercentageEigenPairFilter sorts the eigenpairs in descending order of + * their eigenvalues and marks the first eigenpairs, whose sum of eigenvalues is + * higher than the given percentage of the sum of all eigenvalues as strong + * eigenpairs. + * + * @author Elke Achtert + */ +@Title("Percentage based Eigenpair filter") +@Description("Sorts the eigenpairs in decending order of their eigenvalues and returns the first eigenpairs, whose sum of eigenvalues is higher than the given percentage of the sum of all eigenvalues.") +public class PercentageEigenPairFilter implements EigenPairFilter { + /** + * The logger for this class. + */ + private static final Logging logger = Logging.getLogger(PercentageEigenPairFilter.class); + + /** + * The threshold for 'strong' eigenvectors: the 'strong' eigenvectors explain + * a portion of at least alpha of the total variance. + * <p> + * Default value: {@link #DEFAULT_ALPHA} + * </p> + * <p> + * Key: {@code -pca.filter.alpha} + * </p> + */ + public static final OptionID ALPHA_ID = OptionID.getOrCreateOptionID("pca.filter.alpha", "The share (0.0 to 1.0) of variance that needs to be explained by the 'strong' eigenvectors." + "The filter class will choose the number of strong eigenvectors by this share."); + + /** + * The default value for alpha. + */ + public static final double DEFAULT_ALPHA = 0.85; + + /** + * The threshold for strong eigenvectors: the strong eigenvectors explain a + * portion of at least alpha of the total variance. + */ + private double alpha; + + /** + * Constructor. + * + * @param alpha + */ + public PercentageEigenPairFilter(double alpha) { + super(); + this.alpha = alpha; + } + + @Override + public FilteredEigenPairs filter(SortedEigenPairs eigenPairs) { + StringBuffer msg = new StringBuffer(); + if(logger.isDebugging()) { + msg.append("alpha = ").append(alpha); + msg.append("\nsortedEigenPairs = ").append(eigenPairs); + } + + // init strong and weak eigenpairs + List<EigenPair> strongEigenPairs = new ArrayList<EigenPair>(); + List<EigenPair> weakEigenPairs = new ArrayList<EigenPair>(); + + // determine sum of eigenvalues + double totalSum = 0; + for(int i = 0; i < eigenPairs.size(); i++) { + EigenPair eigenPair = eigenPairs.getEigenPair(i); + totalSum += eigenPair.getEigenvalue(); + } + if(logger.isDebugging()) { + msg.append("\ntotalSum = ").append(totalSum); + } + + // determine strong and weak eigenpairs + double currSum = 0; + boolean found = false; + for(int i = 0; i < eigenPairs.size(); i++) { + EigenPair eigenPair = eigenPairs.getEigenPair(i); + currSum += eigenPair.getEigenvalue(); + if(currSum / totalSum >= alpha) { + if(!found) { + found = true; + strongEigenPairs.add(eigenPair); + } + else { + weakEigenPairs.add(eigenPair); + } + } + else { + strongEigenPairs.add(eigenPair); + } + } + if(logger.isDebugging()) { + msg.append("\nstrong EigenPairs = ").append(strongEigenPairs); + msg.append("\nweak EigenPairs = ").append(weakEigenPairs); + logger.debugFine(msg.toString()); + } + + return new FilteredEigenPairs(weakEigenPairs, strongEigenPairs); + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + /** + * The threshold for strong eigenvectors: the strong eigenvectors explain a + * portion of at least alpha of the total variance. + */ + private double alpha; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + DoubleParameter alphaP = new DoubleParameter(ALPHA_ID, new IntervalConstraint(0.0, IntervalConstraint.IntervalBoundary.OPEN, 1.0, IntervalConstraint.IntervalBoundary.OPEN), DEFAULT_ALPHA); + if(config.grab(alphaP)) { + alpha = alphaP.getValue(); + } + } + + @Override + protected PercentageEigenPairFilter makeInstance() { + return new PercentageEigenPairFilter(alpha); + } + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/ProgressiveEigenPairFilter.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/ProgressiveEigenPairFilter.java new file mode 100644 index 00000000..e8e455e1 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/ProgressiveEigenPairFilter.java @@ -0,0 +1,216 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +import java.util.ArrayList; +import java.util.List; + +import de.lmu.ifi.dbs.elki.math.linearalgebra.EigenPair; +import de.lmu.ifi.dbs.elki.math.linearalgebra.SortedEigenPairs; +import de.lmu.ifi.dbs.elki.utilities.documentation.Description; +import de.lmu.ifi.dbs.elki.utilities.documentation.Title; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.IntervalConstraint; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; + +/** + * The ProgressiveEigenPairFilter sorts the eigenpairs in descending order of + * their eigenvalues and marks the first eigenpairs, whose sum of eigenvalues is + * higher than the given percentage of the sum of all eigenvalues as strong + * eigenpairs. In contrast to the PercentageEigenPairFilter, it will use a + * percentage which changes linearly with the subspace dimensionality. This + * makes the parameter more consistent for different dimensionalities and often + * gives better results when clusters of different dimensionality exist, since + * different percentage alpha levels might be appropriate for different + * dimensionalities. + * + * Example calculations of alpha levels: + * + * In a 3D space, a progressive alpha value of 0.5 equals: + * + * - 1D subspace: 50 % + 1/3 of remainder = 0.667 + * + * - 2D subspace: 50 % + 2/3 of remainder = 0.833 + * + * In a 4D space, a progressive alpha value of 0.5 equals: + * + * - 1D subspace: 50% + 1/4 of remainder = 0.625 + * + * - 2D subspace: 50% + 2/4 of remainder = 0.750 + * + * - 3D subspace: 50% + 3/4 of remainder = 0.875 + * + * Reasoning why this improves over PercentageEigenPairFilter: + * + * In a 100 dimensional space, a single Eigenvector representing over 85% of the + * total variance is highly significant, whereas the strongest 85 Eigenvectors + * together will by definition always represent at least 85% of the variance. + * PercentageEigenPairFilter can thus not be used with these parameters and + * detect both dimensionalities correctly. + * + * The second parameter introduced here, walpha, serves a different function: It + * prevents the eigenpair filter to use a statistically weak Eigenvalue just to + * reach the intended level, e.g. 84% + 1% >= 85% when 1% is statistically very + * weak. + * + * @author Erich Schubert + * + */ +@Title("Progressive Eigenpair Filter") +@Description("Sorts the eigenpairs in decending order of their eigenvalues and returns the first eigenpairs, whose sum of eigenvalues explains more than the a certain percentage of the unexpected variance, where the percentage increases with subspace dimensionality.") +public class ProgressiveEigenPairFilter implements EigenPairFilter { + /** + * Parameter progressive alpha. + */ + public static final OptionID EIGENPAIR_FILTER_PALPHA = OptionID.getOrCreateOptionID("pca.filter.progressivealpha", "The share (0.0 to 1.0) of variance that needs to be explained by the 'strong' eigenvectors." + "The filter class will choose the number of strong eigenvectors by this share."); + + /** + * The default value for alpha. + */ + public static final double DEFAULT_PALPHA = 0.5; + + /** + * The default value for alpha. + */ + public static final double DEFAULT_WALPHA = 0.95; + + /** + * The threshold for strong eigenvectors: the strong eigenvectors explain a + * portion of at least alpha of the total variance. + */ + private double palpha; + + /** + * The noise tolerance level for weak eigenvectors + */ + private double walpha; + + /** + * Constructor. + * + * @param palpha palpha + * @param walpha walpha + */ + public ProgressiveEigenPairFilter(double palpha, double walpha) { + super(); + this.palpha = palpha; + this.walpha = walpha; + } + + /** + * Filter eigenpairs. + */ + @Override + public FilteredEigenPairs filter(SortedEigenPairs eigenPairs) { + // init strong and weak eigenpairs + List<EigenPair> strongEigenPairs = new ArrayList<EigenPair>(); + List<EigenPair> weakEigenPairs = new ArrayList<EigenPair>(); + + // determine sum of eigenvalues + double totalSum = 0; + for(int i = 0; i < eigenPairs.size(); i++) { + EigenPair eigenPair = eigenPairs.getEigenPair(i); + totalSum += eigenPair.getEigenvalue(); + } + double expectedVariance = totalSum / eigenPairs.size() * walpha; + + // determine strong and weak eigenpairs + double currSum = 0; + boolean found = false; + int i; + for(i = 0; i < eigenPairs.size(); i++) { + EigenPair eigenPair = eigenPairs.getEigenPair(i); + // weak Eigenvector? + if(eigenPair.getEigenvalue() < expectedVariance) { + break; + } + currSum += eigenPair.getEigenvalue(); + // calculate progressive alpha level + double alpha = 1.0 - (1.0 - palpha) * (1.0 - (i + 1) / eigenPairs.size()); + if(currSum / totalSum >= alpha || i == eigenPairs.size() - 1) { + found = true; + strongEigenPairs.add(eigenPair); + break; + } + } + // if we didn't hit our alpha level, we consider all vectors to be weak! + if(!found) { + assert (weakEigenPairs.size() == 0); + weakEigenPairs = strongEigenPairs; + strongEigenPairs = new ArrayList<EigenPair>(); + } + for(; i < eigenPairs.size(); i++) { + EigenPair eigenPair = eigenPairs.getEigenPair(i); + weakEigenPairs.add(eigenPair); + } + + // the code using this method doesn't expect an empty strong set, + // if we didn't find any strong ones, we make all vectors strong + if(strongEigenPairs.size() == 0) { + return new FilteredEigenPairs(new ArrayList<EigenPair>(), weakEigenPairs); + } + return new FilteredEigenPairs(weakEigenPairs, strongEigenPairs); + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + /** + * The threshold for strong eigenvectors: the strong eigenvectors explain a + * portion of at least alpha of the total variance. + */ + private double palpha; + + /** + * The noise tolerance level for weak eigenvectors + */ + private double walpha; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + DoubleParameter palphaP = new DoubleParameter(EIGENPAIR_FILTER_PALPHA, new IntervalConstraint(0.0, IntervalConstraint.IntervalBoundary.OPEN, 1.0, IntervalConstraint.IntervalBoundary.OPEN), DEFAULT_PALPHA); + if(config.grab(palphaP)) { + palpha = palphaP.getValue(); + } + + DoubleParameter walphaP = new DoubleParameter(WeakEigenPairFilter.EIGENPAIR_FILTER_WALPHA, new GreaterEqualConstraint(0.0), DEFAULT_WALPHA); + if(config.grab(walphaP)) { + walpha = walphaP.getValue(); + } + } + + @Override + protected ProgressiveEigenPairFilter makeInstance() { + return new ProgressiveEigenPairFilter(palpha, walpha); + } + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/RelativeEigenPairFilter.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/RelativeEigenPairFilter.java new file mode 100644 index 00000000..16414238 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/RelativeEigenPairFilter.java @@ -0,0 +1,141 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +import java.util.ArrayList; +import java.util.List; + +import de.lmu.ifi.dbs.elki.math.linearalgebra.EigenPair; +import de.lmu.ifi.dbs.elki.math.linearalgebra.SortedEigenPairs; +import de.lmu.ifi.dbs.elki.utilities.documentation.Description; +import de.lmu.ifi.dbs.elki.utilities.documentation.Title; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; + +/** + * The RelativeEigenPairFilter sorts the eigenpairs in descending order of their + * eigenvalues and marks the first eigenpairs who are a certain factor above the + * average of the remaining eigenvalues. + * + * It is closely related to the WeakEigenPairFilter, and differs mostly by + * comparing to the remaining Eigenvalues, not to the total sum. + * + * There are some situations where one or the other is superior, especially when + * it comes to handling nested clusters and strong global correlations that are + * not too interesting. These benefits usually only make a difference at higher + * dimensionalities. + * + * @author Erich Schubert + */ +@Title("Relative EigenPair Filter") +@Description("Sorts the eigenpairs in decending order of their eigenvalues and returns those eigenpairs, whose eigenvalue is " + "above the average ('expected') eigenvalue of the remaining eigenvectors.") +public class RelativeEigenPairFilter implements EigenPairFilter { + /** + * Parameter relative alpha. + */ + public static final OptionID EIGENPAIR_FILTER_RALPHA = OptionID.getOrCreateOptionID("pca.filter.relativealpha", "The sensitivity niveau for weak eigenvectors: An eigenvector which is at less than " + "the given share of the statistical average variance is considered weak."); + + /** + * The default value for ralpha. + */ + public static final double DEFAULT_RALPHA = 1.1; + + /** + * The noise tolerance level for weak eigenvectors + */ + private double ralpha; + + /** + * Constructor. + * + * @param ralpha + */ + public RelativeEigenPairFilter(double ralpha) { + super(); + this.ralpha = ralpha; + } + + /** + * Filter eigenpairs + */ + @Override + public FilteredEigenPairs filter(SortedEigenPairs eigenPairs) { + // init strong and weak eigenpairs + List<EigenPair> strongEigenPairs = new ArrayList<EigenPair>(); + List<EigenPair> weakEigenPairs = new ArrayList<EigenPair>(); + + // default value is "all strong". + int contrastAtMax = eigenPairs.size() - 1; + // find the last eigenvector that is considered 'strong' by the weak rule + // applied to the remaining vectors only + double eigenValueSum = eigenPairs.getEigenPair(eigenPairs.size() - 1).getEigenvalue(); + for(int i = eigenPairs.size() - 2; i >= 0; i--) { + EigenPair eigenPair = eigenPairs.getEigenPair(i); + eigenValueSum += eigenPair.getEigenvalue(); + double needEigenvalue = eigenValueSum / (eigenPairs.size() - i) * ralpha; + if(eigenPair.getEigenvalue() >= needEigenvalue) { + contrastAtMax = i; + break; + } + } + + for(int i = 0; i <= contrastAtMax /* && i < eigenPairs.size() */; i++) { + EigenPair eigenPair = eigenPairs.getEigenPair(i); + strongEigenPairs.add(eigenPair); + } + for(int i = contrastAtMax + 1; i < eigenPairs.size(); i++) { + EigenPair eigenPair = eigenPairs.getEigenPair(i); + weakEigenPairs.add(eigenPair); + } + + return new FilteredEigenPairs(weakEigenPairs, strongEigenPairs); + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + protected double ralpha; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + DoubleParameter ralphaP = new DoubleParameter(EIGENPAIR_FILTER_RALPHA, new GreaterEqualConstraint(0.0), DEFAULT_RALPHA); + if(config.grab(ralphaP)) { + ralpha = ralphaP.getValue(); + } + } + + @Override + protected RelativeEigenPairFilter makeInstance() { + return new RelativeEigenPairFilter(ralpha); + } + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/SignificantEigenPairFilter.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/SignificantEigenPairFilter.java new file mode 100644 index 00000000..1c6502bf --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/SignificantEigenPairFilter.java @@ -0,0 +1,149 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +import java.util.ArrayList; +import java.util.List; + +import de.lmu.ifi.dbs.elki.math.linearalgebra.EigenPair; +import de.lmu.ifi.dbs.elki.math.linearalgebra.SortedEigenPairs; +import de.lmu.ifi.dbs.elki.utilities.documentation.Description; +import de.lmu.ifi.dbs.elki.utilities.documentation.Title; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; + +/** + * The SignificantEigenPairFilter sorts the eigenpairs in descending order of + * their eigenvalues and chooses the contrast of an Eigenvalue to the remaining + * Eigenvalues is maximal. + * + * It is closely related to the WeakEigenPairFilter and RelativeEigenPairFilter. + * But while the RelativeEigenPairFilter chooses the highest dimensionality that + * satisfies the relative alpha levels, the SignificantEigenPairFilter will + * chose the local dimensionality such that the 'contrast' is maximal. + * + * There are some situations where one or the other is superior, especially when + * it comes to handling nested clusters and strong global correlations that are + * not too interesting. These benefits usually only make a difference at higher + * dimensionalities. + * + * @author Erich Schubert + */ +@Title("Significant EigenPair Filter") +@Description("Sorts the eigenpairs in decending order of their eigenvalues and looks for the maxmimum contrast of current Eigenvalue / average of remaining Eigenvalues.") +public class SignificantEigenPairFilter implements EigenPairFilter { + /** + * The default value for walpha. Not used by default, we're going for maximum + * contrast only. + */ + public static final double DEFAULT_WALPHA = 0.0; + + /** + * The noise tolerance level for weak eigenvectors + */ + private double walpha; + + /** + * Constructor. + * + * @param walpha + */ + public SignificantEigenPairFilter(double walpha) { + super(); + this.walpha = walpha; + } + + /** + * Filter eigenpairs + */ + @Override + public FilteredEigenPairs filter(SortedEigenPairs eigenPairs) { + // init strong and weak eigenpairs + List<EigenPair> strongEigenPairs = new ArrayList<EigenPair>(); + List<EigenPair> weakEigenPairs = new ArrayList<EigenPair>(); + + // default value is "all strong". + int contrastMaximum = eigenPairs.size() - 1; + double maxContrast = 0.0; + // calc the eigenvalue sum. + double eigenValueSum = 0.0; + for(int i = 0; i < eigenPairs.size(); i++) { + EigenPair eigenPair = eigenPairs.getEigenPair(i); + eigenValueSum += eigenPair.getEigenvalue(); + } + double weakEigenvalue = eigenValueSum / eigenPairs.size() * walpha; + // now find the maximum contrast. + double currSum = eigenPairs.getEigenPair(eigenPairs.size() - 1).getEigenvalue(); + for(int i = eigenPairs.size() - 2; i >= 0; i--) { + EigenPair eigenPair = eigenPairs.getEigenPair(i); + currSum += eigenPair.getEigenvalue(); + // weak? + if(eigenPair.getEigenvalue() < weakEigenvalue) { + continue; + } + double contrast = eigenPair.getEigenvalue() / (currSum / (eigenPairs.size() - i)); + if(contrast > maxContrast) { + maxContrast = contrast; + contrastMaximum = i; + } + } + + for(int i = 0; i <= contrastMaximum /* && i < eigenPairs.size() */; i++) { + EigenPair eigenPair = eigenPairs.getEigenPair(i); + strongEigenPairs.add(eigenPair); + } + for(int i = contrastMaximum + 1; i < eigenPairs.size(); i++) { + EigenPair eigenPair = eigenPairs.getEigenPair(i); + weakEigenPairs.add(eigenPair); + } + + return new FilteredEigenPairs(weakEigenPairs, strongEigenPairs); + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + private double walpha; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + DoubleParameter walphaP = new DoubleParameter(WeakEigenPairFilter.EIGENPAIR_FILTER_WALPHA, new GreaterEqualConstraint(0.0), DEFAULT_WALPHA); + if(config.grab(walphaP)) { + walpha = walphaP.getValue(); + } + } + + @Override + protected SignificantEigenPairFilter makeInstance() { + return new SignificantEigenPairFilter(walpha); + } + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/StandardCovarianceMatrixBuilder.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/StandardCovarianceMatrixBuilder.java new file mode 100644 index 00000000..c39be039 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/StandardCovarianceMatrixBuilder.java @@ -0,0 +1,64 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +import de.lmu.ifi.dbs.elki.data.NumberVector; +import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix; +import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix; + +/** + * Class for building a "traditional" covariance matrix. + * Reasonable default choice for a {@link CovarianceMatrixBuilder} + * + * @author Erich Schubert + * + * @apiviz.uses CovarianceMatrix + * + * @param <V> Vector class to use. + */ +public class StandardCovarianceMatrixBuilder<V extends NumberVector<? extends V, ?>> extends AbstractCovarianceMatrixBuilder<V> { + /** + * Compute Covariance Matrix for a complete database + * + * @param database the database used + * @return Covariance Matrix + */ + @Override + public Matrix processDatabase(Relation<? extends V> database) { + return CovarianceMatrix.make(database).destroyToNaiveMatrix(); + } + + /** + * Compute Covariance Matrix for a collection of database IDs + * + * @param ids a collection of ids + * @param database the database used + * @return Covariance Matrix + */ + @Override + public Matrix processIds(DBIDs ids, Relation<? extends V> database) { + return CovarianceMatrix.make(database, ids).destroyToNaiveMatrix(); + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/WeakEigenPairFilter.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/WeakEigenPairFilter.java new file mode 100644 index 00000000..549e767e --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/WeakEigenPairFilter.java @@ -0,0 +1,140 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +import java.util.ArrayList; +import java.util.List; + +import de.lmu.ifi.dbs.elki.math.linearalgebra.EigenPair; +import de.lmu.ifi.dbs.elki.math.linearalgebra.SortedEigenPairs; +import de.lmu.ifi.dbs.elki.utilities.documentation.Description; +import de.lmu.ifi.dbs.elki.utilities.documentation.Title; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; + +/** + * The WeakEigenPairFilter sorts the eigenpairs in descending order of their + * eigenvalues and returns the first eigenpairs who are above the average mark + * as "strong", the others as "weak". + * + * @author Erich Schubert + */ +@Title("Weak Eigenpair Filter") +@Description("Sorts the eigenpairs in decending order of their eigenvalues and returns those eigenpairs, whose eigenvalue is above the average ('expected') eigenvalue.") +public class WeakEigenPairFilter implements EigenPairFilter { + /** + * OptionID for the weak alpha value of {@link WeakEigenPairFilter}, + * {@link de.lmu.ifi.dbs.elki.math.linearalgebra.pca.ProgressiveEigenPairFilter} + * and + * {@link de.lmu.ifi.dbs.elki.math.linearalgebra.pca.SignificantEigenPairFilter} + */ + public static final OptionID EIGENPAIR_FILTER_WALPHA = OptionID.getOrCreateOptionID("pca.filter.weakalpha", "The minimum strength of the statistically expected variance (1/n) share an eigenvector " + "needs to have to be considered 'strong'."); + + /** + * The default value for walpha. + */ + public static final double DEFAULT_WALPHA = 0.95; + + /** + * The noise tolerance level for weak eigenvectors + */ + private double walpha; + + /** + * Constructor. + * + * @param walpha + */ + public WeakEigenPairFilter(double walpha) { + super(); + this.walpha = walpha; + } + + /** + * Filter eigenpairs + */ + @Override + public FilteredEigenPairs filter(SortedEigenPairs eigenPairs) { + // init strong and weak eigenpairs + List<EigenPair> strongEigenPairs = new ArrayList<EigenPair>(); + List<EigenPair> weakEigenPairs = new ArrayList<EigenPair>(); + + // determine sum of eigenvalues + double totalSum = 0; + for(int i = 0; i < eigenPairs.size(); i++) { + EigenPair eigenPair = eigenPairs.getEigenPair(i); + totalSum += eigenPair.getEigenvalue(); + } + double expectEigenvalue = totalSum / eigenPairs.size() * walpha; + + // determine strong and weak eigenpairs + for(int i = 0; i < eigenPairs.size(); i++) { + EigenPair eigenPair = eigenPairs.getEigenPair(i); + if(eigenPair.getEigenvalue() > expectEigenvalue) { + strongEigenPairs.add(eigenPair); + } + else { + weakEigenPairs.add(eigenPair); + } + } + + // the code using this method doesn't expect an empty strong set, + // if we didn't find any strong ones, we make all vectors strong + if(strongEigenPairs.size() == 0) { + return new FilteredEigenPairs(new ArrayList<EigenPair>(), weakEigenPairs); + } + return new FilteredEigenPairs(weakEigenPairs, strongEigenPairs); + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + /** + * The threshold for strong eigenvectors: the strong eigenvectors explain a + * portion of at least alpha of the total variance. + */ + private double walpha; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + DoubleParameter walphaP = new DoubleParameter(EIGENPAIR_FILTER_WALPHA, new GreaterEqualConstraint(0.0), DEFAULT_WALPHA); + if(config.grab(walphaP)) { + walpha = walphaP.getValue(); + } + } + + @Override + protected WeakEigenPairFilter makeInstance() { + return new WeakEigenPairFilter(walpha); + } + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/WeightedCovarianceMatrixBuilder.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/WeightedCovarianceMatrixBuilder.java new file mode 100644 index 00000000..9402397e --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/WeightedCovarianceMatrixBuilder.java @@ -0,0 +1,238 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +import java.util.Collection; +import java.util.Iterator; + +import de.lmu.ifi.dbs.elki.data.NumberVector; +import de.lmu.ifi.dbs.elki.database.ids.DBID; +import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair; +import de.lmu.ifi.dbs.elki.database.query.DoubleDistanceResultPair; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; +import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; +import de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid; +import de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix; +import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix; +import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions.ConstantWeight; +import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions.WeightFunction; +import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil; +import de.lmu.ifi.dbs.elki.utilities.documentation.Description; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.documentation.Title; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; + +/** + * {@link CovarianceMatrixBuilder} with weights. + * + * This builder uses a weight function to weight points differently during build + * a covariance matrix. Covariance can be canonically extended with weights, as + * shown in the article + * + * A General Framework for Increasing the Robustness of PCA-Based Correlation + * Clustering Algorithms Hans-Peter Kriegel and Peer Kröger and Erich + * Schubert and Arthur Zimek In: Proc. 20th Int. Conf. on Scientific and + * Statistical Database Management (SSDBM), 2008, Hong Kong Lecture Notes in + * Computer Science 5069, Springer + * + * @author Erich Schubert + * + * @apiviz.has WeightFunction + * @apiviz.has PrimitiveDistanceFunction + * @apiviz.uses CovarianceMatrix + * + * @param <V> Vector class to use + */ +@Title("Weighted Covariance Matrix / PCA") +@Description("A PCA modification by using weights while building the covariance matrix, to obtain more stable results") +@Reference(authors = "H.-P. Kriegel, P. Kröger, E. Schubert, A. Zimek", title = "A General Framework for Increasing the Robustness of PCA-based Correlation Clustering Algorithms", booktitle = "Proceedings of the 20th International Conference on Scientific and Statistical Database Management (SSDBM), Hong Kong, China, 2008", url = "http://dx.doi.org/10.1007/978-3-540-69497-7_27") +public class WeightedCovarianceMatrixBuilder<V extends NumberVector<? extends V, ?>> extends AbstractCovarianceMatrixBuilder<V> { + /** + * Parameter to specify the weight function to use in weighted PCA, must + * implement + * {@link de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions.WeightFunction} + * . + * <p> + * Key: {@code -pca.weight} + * </p> + */ + public static final OptionID WEIGHT_ID = OptionID.getOrCreateOptionID("pca.weight", "Weight function to use in weighted PCA."); + + /** + * Holds the weight function. + */ + protected WeightFunction weightfunction; + + /** + * Holds the distance function used for weight calculation + */ + // TODO: make configurable? + private PrimitiveDistanceFunction<? super V, DoubleDistance> weightDistance = EuclideanDistanceFunction.STATIC; + + /** + * Constructor. + * + * @param weightfunction + */ + public WeightedCovarianceMatrixBuilder(WeightFunction weightfunction) { + super(); + this.weightfunction = weightfunction; + } + + /** + * Weighted Covariance Matrix for a set of IDs. Since we are not supplied any + * distance information, we'll need to compute it ourselves. Covariance is + * tied to Euclidean distance, so it probably does not make much sense to add + * support for other distance functions? + */ + @Override + public Matrix processIds(DBIDs ids, Relation<? extends V> database) { + final int dim = DatabaseUtil.dimensionality(database); + final CovarianceMatrix cmat = new CovarianceMatrix(dim); + final V centroid = Centroid.make(database, ids).toVector(database); + + // find maximum distance + double maxdist = 0.0; + double stddev = 0.0; + { + for(Iterator<DBID> it = ids.iterator(); it.hasNext();) { + V obj = database.get(it.next()); + double distance = weightDistance.distance(centroid, obj).doubleValue(); + stddev += distance * distance; + if(distance > maxdist) { + maxdist = distance; + } + } + if(maxdist == 0.0) { + maxdist = 1.0; + } + // compute standard deviation. + stddev = Math.sqrt(stddev / ids.size()); + } + + int i = 0; + for(Iterator<DBID> it = ids.iterator(); it.hasNext(); i++) { + V obj = database.get(it.next()); + double distance = weightDistance.distance(centroid, obj).doubleValue(); + double weight = weightfunction.getWeight(distance, maxdist, stddev); + cmat.put(obj, weight); + } + return cmat.destroyToNaiveMatrix(); + } + + /** + * Compute Covariance Matrix for a QueryResult Collection + * + * By default it will just collect the ids and run processIds + * + * @param results a collection of QueryResults + * @param database the database used + * @param k number of elements to process + * @return Covariance Matrix + */ + @Override + public <D extends NumberDistance<?, ?>> Matrix processQueryResults(Collection<DistanceResultPair<D>> results, Relation<? extends V> database, int k) { + final int dim = DatabaseUtil.dimensionality(database); + final CovarianceMatrix cmat = new CovarianceMatrix(dim); + + // avoid bad parameters + if(k > results.size()) { + k = results.size(); + } + + // find maximum distance + double maxdist = 0.0; + double stddev = 0.0; + { + int i = 0; + for(Iterator<DistanceResultPair<D>> it = results.iterator(); it.hasNext() && i < k; i++) { + DistanceResultPair<D> res = it.next(); + final double dist; + if(res instanceof DoubleDistanceResultPair) { + dist = ((DoubleDistanceResultPair) res).getDoubleDistance(); + } + else { + dist = res.getDistance().doubleValue(); + } + stddev += dist * dist; + if(dist > maxdist) { + maxdist = dist; + } + } + if(maxdist == 0.0) { + maxdist = 1.0; + } + stddev = Math.sqrt(stddev / k); + } + + // calculate weighted PCA + int i = 0; + for(Iterator<DistanceResultPair<D>> it = results.iterator(); it.hasNext() && i < k; i++) { + DistanceResultPair<? extends NumberDistance<?, ?>> res = it.next(); + final double dist; + if(res instanceof DoubleDistanceResultPair) { + dist = ((DoubleDistanceResultPair) res).getDoubleDistance(); + } + else { + dist = res.getDistance().doubleValue(); + } + + V obj = database.get(res.getDBID()); + double weight = weightfunction.getWeight(dist, maxdist, stddev); + cmat.put(obj, weight); + } + return cmat.destroyToNaiveMatrix(); + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractParameterizer { + protected WeightFunction weightfunction = null; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + ObjectParameter<WeightFunction> weightfunctionP = new ObjectParameter<WeightFunction>(WEIGHT_ID, WeightFunction.class, ConstantWeight.class); + if(config.grab(weightfunctionP)) { + weightfunction = weightfunctionP.instantiateClass(config); + } + } + + @Override + protected WeightedCovarianceMatrixBuilder<V> makeInstance() { + return new WeightedCovarianceMatrixBuilder<V>(weightfunction); + } + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/package-info.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/package-info.java new file mode 100644 index 00000000..94b0f5a5 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/package-info.java @@ -0,0 +1,26 @@ +/** + * <p>Principal Component Analysis (PCA) and Eigenvector processing.</p> + */ +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ +package de.lmu.ifi.dbs.elki.math.linearalgebra.pca;
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/ConstantWeight.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/ConstantWeight.java new file mode 100644 index 00000000..2901a75c --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/ConstantWeight.java @@ -0,0 +1,41 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * Constant Weight function + * + * The result is always 1.0 + * + * @author Erich Schubert + */ +public final class ConstantWeight implements WeightFunction { + /** + * Get the constant weight + * No scaling - the result is always 1.0 + */ + @Override + public double getWeight(@SuppressWarnings("unused") double distance, @SuppressWarnings("unused") double max, @SuppressWarnings("unused") double stddev) { + return 1.0; + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/ErfcStddevWeight.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/ErfcStddevWeight.java new file mode 100644 index 00000000..fe2fbca2 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/ErfcStddevWeight.java @@ -0,0 +1,46 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +import de.lmu.ifi.dbs.elki.math.MathUtil; + +/** + * Gaussian Error Function Weight function, scaled using stddev. This probably + * is the most statistically sound weight. + * + * erfc(1 / sqrt(2) * distance / stddev) + * + * @author Erich Schubert + */ +public final class ErfcStddevWeight implements WeightFunction { + /** + * Return Erfc weight, scaled by standard deviation. max is ignored. + */ + @Override + public double getWeight(double distance, @SuppressWarnings("unused") double max, double stddev) { + if(stddev <= 0) { + return 1; + } + return MathUtil.erfc(MathUtil.SQRTHALF * distance / stddev); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/ErfcWeight.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/ErfcWeight.java new file mode 100644 index 00000000..b497a29c --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/ErfcWeight.java @@ -0,0 +1,52 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +import de.lmu.ifi.dbs.elki.math.MathUtil; + +/** + * Gaussian Error Function Weight function, scaled such that the result it 0.1 + * at distance == max + * + * erfc(1.1630871536766736 * distance / max) + * + * The value of 1.1630871536766736 is erfcinv(0.1), to achieve the intended + * scaling. + * + * @author Erich Schubert + */ +public final class ErfcWeight implements WeightFunction { + /** + * Get Erfc Weight, using distance / max. stddev is ignored. + */ + @Override + public double getWeight(double distance, double max, @SuppressWarnings("unused") double stddev) { + if(max <= 0) { + return 1.0; + } + double relativedistance = distance / max; + // the scaling was picked such that getWeight(a,a,0) is 0.1 + // since erfc(1.1630871536766736) == 1.0 + return MathUtil.erfc(1.1630871536766736 * relativedistance); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/ExponentialStddevWeight.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/ExponentialStddevWeight.java new file mode 100644 index 00000000..1d657f49 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/ExponentialStddevWeight.java @@ -0,0 +1,48 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * Exponential Weight function, scaled such that the result it 0.1 at distance + * == max + * + * stddev * exp(-.5 * distance/stddev) + * + * This is similar to the Gaussian weight function, except distance/stddev is + * not squared. + * + * @author Erich Schubert + */ +public final class ExponentialStddevWeight implements WeightFunction { + /** + * Get exponential weight, max is ignored. + */ + @Override + public double getWeight(double distance, @SuppressWarnings("unused") double max, double stddev) { + if(stddev <= 0) { + return 1; + } + double scaleddistance = distance / stddev; + return stddev * java.lang.Math.exp(-.5 * scaleddistance); + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/ExponentialWeight.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/ExponentialWeight.java new file mode 100644 index 00000000..f204c1da --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/ExponentialWeight.java @@ -0,0 +1,51 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * Exponential Weight function, scaled such that the result it 0.1 at distance + * == max + * + * exp(-2.3025850929940455 * distance/max) + * + * This is similar to the Gaussian weight function, except distance/max is not + * squared. + * + * -2.3025850929940455 is log(-.1) to achieve the intended range of 1.0 - 0.1 + * + * @author Erich Schubert + */ +public final class ExponentialWeight implements WeightFunction { + /** + * Exponential Weight function. stddev is not used. + */ + @Override + public double getWeight(double distance, double max, @SuppressWarnings("unused") double stddev) { + if(max <= 0) { + return 1.0; + } + double relativedistance = distance / max; + // scaling -2.303 is log(-.1) to suit the intended range of 1.0-0.1 + return java.lang.Math.exp(-2.3025850929940455 * relativedistance); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/GaussStddevWeight.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/GaussStddevWeight.java new file mode 100644 index 00000000..f4a64462 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/GaussStddevWeight.java @@ -0,0 +1,55 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +import de.lmu.ifi.dbs.elki.math.MathUtil; + +/** + * Gaussian Weight function, scaled such using standard deviation + * + * factor * exp(-.5 * (distance/stddev)^2) + * + * with factor being 1 / sqrt(2 * PI) + * + * @author Erich Schubert + */ +public final class GaussStddevWeight implements WeightFunction { + /** + * Constant scaling factor of Gaussian distribution. + * + * In fact, in most use cases we could leave this away. + */ + private final static double scaling = 1 / MathUtil.SQRTTWOPI; + + /** + * Get Gaussian Weight using standard deviation for scaling. max is ignored. + */ + @Override + public double getWeight(double distance, @SuppressWarnings("unused") double max, double stddev) { + if(stddev <= 0) { + return 1; + } + double normdistance = distance / stddev; + return scaling * java.lang.Math.exp(-.5 * normdistance * normdistance) / stddev; + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/GaussWeight.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/GaussWeight.java new file mode 100644 index 00000000..87a69d4f --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/GaussWeight.java @@ -0,0 +1,46 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * Gaussian Weight function, scaled such that the result it 0.1 at distance == + * max + * + * exp(-2.3025850929940455 * (distance/max)^2) + * + * @author Erich Schubert + */ +public final class GaussWeight implements WeightFunction { + /** + * Get Gaussian weight. stddev is not used, scaled using max. + */ + @Override + public double getWeight(double distance, double max, @SuppressWarnings("unused") double stddev) { + if(max <= 0) { + return 1.0; + } + double relativedistance = distance / max; + // -2.303 is log(.1) to suit the intended range of 1.0-0.1 + return java.lang.Math.exp(-2.3025850929940455 * relativedistance * relativedistance); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/InverseLinearWeight.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/InverseLinearWeight.java new file mode 100644 index 00000000..58b8d047 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/InverseLinearWeight.java @@ -0,0 +1,49 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * Inverse Linear Weight Function. + * + * This weight is not particularly reasonable. Instead it serves the purpose of + * testing the effects of a badly chosen weight function. + * + * This function has increasing weight, from 0.1 to 1.0 at distance == max. + * + * @author Erich Schubert + */ +public final class InverseLinearWeight implements WeightFunction { + /** + * Linear increasing weight, from 0.1 to 1.0 + * + * NOTE: increasing weights are non-standard, and mostly for testing + */ + @Override + public double getWeight(double distance, double max, @SuppressWarnings("unused") double stddev) { + if(max <= 0) { + return 0.1; + } + double relativedistance = distance / max; + return .1 + relativedistance * .9; + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/InverseProportionalStddevWeight.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/InverseProportionalStddevWeight.java new file mode 100644 index 00000000..b157a7e5 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/InverseProportionalStddevWeight.java @@ -0,0 +1,44 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * Inverse proportional weight function, scaled using the standard deviation. + * + * 1 / (1 + distance/stddev) + * + * @author Erich Schubert + */ +public final class InverseProportionalStddevWeight implements WeightFunction { + /** + * Get inverse proportional weight. max is ignored. + */ + @Override + public double getWeight(double distance, @SuppressWarnings("unused") double max, double stddev) { + if(stddev <= 0) { + return 1; + } + double scaleddistance = distance / stddev; + return 1 / (1 + scaleddistance); + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/InverseProportionalWeight.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/InverseProportionalWeight.java new file mode 100644 index 00000000..2d19cf03 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/InverseProportionalWeight.java @@ -0,0 +1,44 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * Inverse proportional weight function, scaled using the maximum. + * + * 1 / (1 + distance/max) + * + * @author Erich Schubert + */ +public final class InverseProportionalWeight implements WeightFunction { + /** + * Get inverse proportional weight. stddev is ignored. + */ + @Override + public double getWeight(double distance, double max, @SuppressWarnings("unused") double stddev) { + if(max <= 0) { + return 1.0; + } + double relativedistance = distance / max; + return 1 / (1 + 9 * relativedistance); + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/LinearWeight.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/LinearWeight.java new file mode 100644 index 00000000..907a81d9 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/LinearWeight.java @@ -0,0 +1,45 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * Linear weight function, scaled using the maximum such that it goes from 1.0 + * to 0.1 + * + * 1 - 0.9 * (distance/max) + * + * @author Erich Schubert + */ +public final class LinearWeight implements WeightFunction { + /** + * Linear decreasing weight, from 1.0 to 0.1. Stddev is ignored. + */ + @Override + public double getWeight(double distance, double max, @SuppressWarnings("unused") double stddev) { + if(max <= 0) { + return 1.0; + } + double relativedistance = distance / max; + return 1 - relativedistance * .9; + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/QuadraticStddevWeight.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/QuadraticStddevWeight.java new file mode 100644 index 00000000..3b26a85e --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/QuadraticStddevWeight.java @@ -0,0 +1,56 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * Quadratic weight function, scaled using the standard deviation. + * + * We needed another scaling here, we chose the cutoff point to be 3*stddev. If + * you need another value, you have to reimplement this class. + * + * max(0.0, 1.0 - (distance/(3*stddev))^2 + * + * @author Erich Schubert + */ +public final class QuadraticStddevWeight implements WeightFunction { + /** + * Scaling: at scaling * stddev the function will hit 0.0 + */ + private static final double scaling = 3; + + /** + * Evaluate weight function at given parameters. max is ignored. + */ + @Override + public double getWeight(double distance, @SuppressWarnings("unused") double max, double stddev) { + if(stddev <= 0) { + return 1; + } + double scaleddistance = distance / (scaling * stddev); + // After this, the result would be negative. + if(scaleddistance >= 1.0) { + return 0.0; + } + return 1.0 - scaleddistance * scaleddistance; + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/QuadraticWeight.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/QuadraticWeight.java new file mode 100644 index 00000000..7de38dda --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/QuadraticWeight.java @@ -0,0 +1,45 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * Quadratic weight function, scaled using the maximum to reach 0.1 at that + * point. + * + * 1.0 - 0.9 * (distance/max)^2 + * + * @author Erich Schubert + */ +public final class QuadraticWeight implements WeightFunction { + /** + * Evaluate quadratic weight. stddev is ignored. + */ + @Override + public double getWeight(double distance, double max, @SuppressWarnings("unused") double stddev) { + if(max <= 0) { + return 1.0; + } + double relativedistance = distance / max; + return 1.0 - 0.9 * relativedistance * relativedistance; + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/WeightFunction.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/WeightFunction.java new file mode 100644 index 00000000..bf8e32e6 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/WeightFunction.java @@ -0,0 +1,48 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +import de.lmu.ifi.dbs.elki.utilities.InspectionUtilFrequentlyScanned; + +/** + * WeightFunction interface that allows the use of various distance-based weight + * functions. In addition to the distance parameter, the maximum distance and + * standard deviation are also given, to allow distance functions to be + * normalized according to the maximum or standard deviation. + * + * @author Erich Schubert + */ +public interface WeightFunction extends InspectionUtilFrequentlyScanned { + /** + * Evaluate weight function with given parameters. + * + * Note that usually implementations will ignore either max or stddev. + * + * @param distance distance of the query point + * @param max maximum distance of all included points + * @param stddev standard deviation (i.e. quadratic mean / RMS) of the + * included points + * @return weight for the query point + */ + double getWeight(double distance, double max, double stddev); +} diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/package-info.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/package-info.java new file mode 100644 index 00000000..268a3e89 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/package-info.java @@ -0,0 +1,26 @@ +/** + * <p>Weight functions used in weighted PCA via {@link de.lmu.ifi.dbs.elki.math.linearalgebra.pca.WeightedCovarianceMatrixBuilder}</p> + */ +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ +package de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions;
\ No newline at end of file |