package de.lmu.ifi.dbs.elki.data;
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
*/
import gnu.trove.map.hash.TIntDoubleHashMap;
import java.util.BitSet;
import java.util.Comparator;
import java.util.Random;
import de.lmu.ifi.dbs.elki.data.spatial.SpatialComparable;
import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.math.MathUtil;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
import de.lmu.ifi.dbs.elki.utilities.datastructures.QuickSelect;
/**
* Utility functions for use with vectors.
*
* Note: obviously, many functions are class methods or database related.
*
* @author Erich Schubert
*
* @apiviz.uses NumberVector
*/
public final class VectorUtil {
/**
* Fake constructor. Do not instantiate, use static methods.
*/
private VectorUtil() {
// Do not instantiate - utility class.
}
/**
* Produce a new vector based on random numbers in [0:1].
*
* @param factory Vector factory
* @param dim desired dimensionality
* @param r Random generator
* @param vector type
* @return new instance
*/
public static V randomVector(NumberVector.Factory factory, int dim, Random r) {
return factory.newNumberVector(MathUtil.randomDoubleArray(dim, r));
}
/**
* Produce a new vector based on random numbers in [0:1].
*
* @param factory Vector factory
* @param dim desired dimensionality
* @param vector type
* @return new instance
*/
public static V randomVector(NumberVector.Factory factory, int dim) {
return randomVector(factory, dim, new Random());
}
/**
* Compute the angle for sparse vectors.
*
* @param v1 First vector
* @param v2 Second vector
* @return angle
*/
public static double angleSparse(SparseNumberVector v1, SparseNumberVector v2) {
// TODO: exploit precomputed length, when available?
// Length of first vector
double l1 = 0., l2 = 0., cross = 0.;
int i1 = v1.iter(), i2 = v2.iter();
while(v1.iterValid(i1) && v2.iterValid(i2)) {
final int d1 = v1.iterDim(i1), d2 = v2.iterDim(i2);
if(d1 < d2) {
final double val = v1.iterDoubleValue(i1);
l1 += val * val;
i1 = v1.iterAdvance(i1);
}
else if(d2 < d1) {
final double val = v2.iterDoubleValue(i2);
l2 += val * val;
i2 = v2.iterAdvance(i2);
}
else { // d1 == d2
final double val1 = v1.iterDoubleValue(i1);
final double val2 = v2.iterDoubleValue(i2);
l1 += val1 * val1;
l2 += val2 * val2;
cross += val1 * val2;
i1 = v1.iterAdvance(i1);
i2 = v2.iterAdvance(i2);
}
}
while(v1.iterValid(i1)) {
final double val = v1.iterDoubleValue(i1);
l1 += val * val;
i1 = v1.iterAdvance(i1);
}
while(v2.iterValid(i2)) {
final double val = v2.iterDoubleValue(i2);
l2 += val * val;
i2 = v2.iterAdvance(i2);
}
if(cross == 0.) {
return 0.;
}
if(l1 == 0. || l2 == 0.) {
return 1.;
}
final double a = Math.sqrt((cross / l1) * (cross / l2));
return (a < 1.) ? a : 1.;
}
/**
* Compute the angle between two vectors.
*
* @param v1 first vector
* @param v2 second vector
* @param o Origin
* @return Angle
*/
public static double angle(NumberVector v1, NumberVector v2, Vector o) {
final int dim1 = v1.getDimensionality(), dim2 = v2.getDimensionality(), dimo = o.getDimensionality();
final int mindim = (dim1 <= dim2) ? dim1 : dim2;
// Essentially, we want to compute this:
// v1' = v1 - o, v2' = v2 - o
// v1'.transposeTimes(v2') / (v1'.euclideanLength()*v2'.euclideanLength());
// We can just compute all three in parallel.
double[] oe = o.getArrayRef();
double cross = 0., l1 = 0., l2 = 0.;
for(int k = 0; k < mindim; k++) {
final double dk = k < dimo ? oe[k] : 0.;
final double r1 = v1.doubleValue(k) - dk;
final double r2 = v2.doubleValue(k) - dk;
cross += r1 * r2;
l1 += r1 * r1;
l2 += r2 * r2;
}
for(int k = mindim; k < dim1; k++) {
final double dk = k < dimo ? oe[k] : 0.;
final double r1 = v1.doubleValue(k) - dk;
l1 += r1 * r1;
}
for(int k = mindim; k < dim2; k++) {
final double dk = k < dimo ? oe[k] : 0.;
final double r2 = v2.doubleValue(k) - dk;
l2 += r2 * r2;
}
if(cross == 0.) {
return 0.;
}
if(l1 == 0. || l2 == 0.) {
return 1.;
}
final double a = Math.sqrt((cross / l1) * (cross / l2));
return (a < 1.) ? a : 1.;
}
/**
* Compute the angle between two vectors.
*
* @param v1 first vector
* @param v2 second vector
* @param o Origin
* @return Angle
*/
public static double angle(NumberVector v1, NumberVector v2, NumberVector o) {
final int dim1 = v1.getDimensionality(), dim2 = v2.getDimensionality(), dimo = o.getDimensionality();
final int mindim = (dim1 <= dim2) ? dim1 : dim2;
// Essentially, we want to compute this:
// v1' = v1 - o, v2' = v2 - o
// v1'.transposeTimes(v2') / (v1'.euclideanLength()*v2'.euclideanLength());
// We can just compute all three in parallel.
double cross = 0, l1 = 0, l2 = 0;
for(int k = 0; k < mindim; k++) {
final double ok = k < dimo ? o.doubleValue(k) : 0.;
final double r1 = v1.doubleValue(k) - ok;
final double r2 = v2.doubleValue(k) - o.doubleValue(k);
cross += r1 * r2;
l1 += r1 * r1;
l2 += r2 * r2;
}
for(int k = mindim; k < dim1; k++) {
final double ok = k < dimo ? o.doubleValue(k) : 0.;
final double r1 = v1.doubleValue(k) - ok;
l1 += r1 * r1;
}
for(int k = mindim; k < dim2; k++) {
final double ok = k < dimo ? o.doubleValue(k) : 0.;
final double r2 = v2.doubleValue(k) - ok;
l2 += r2 * r2;
}
if(cross == 0.) {
return 0.;
}
if(l1 == 0. || l2 == 0.) {
return 1.;
}
final double a = Math.sqrt((cross / l1) * (cross / l2));
return (a < 1.) ? a : 1.;
}
/**
* Compute the absolute cosine of the angle between two vectors.
*
* To convert it to radians, use Math.acos(angle)
!
*
* @param v1 first vector
* @param v2 second vector
* @return Angle
*/
public static double cosAngle(NumberVector v1, NumberVector v2) {
if(v1 instanceof SparseNumberVector && v2 instanceof SparseNumberVector) {
return angleSparse((SparseNumberVector) v1, (SparseNumberVector) v2);
}
final int dim1 = v1.getDimensionality(), dim2 = v2.getDimensionality();
final int mindim = (dim1 <= dim2) ? dim1 : dim2;
// Essentially, we want to compute this:
// v1.transposeTimes(v2) / (v1.euclideanLength() * v2.euclideanLength());
// We can just compute all three in parallel.
double cross = 0, l1 = 0, l2 = 0;
for(int k = 0; k < mindim; k++) {
final double r1 = v1.doubleValue(k);
final double r2 = v2.doubleValue(k);
cross += r1 * r2;
l1 += r1 * r1;
l2 += r2 * r2;
}
for(int k = mindim; k < dim1; k++) {
final double r1 = v1.doubleValue(k);
l1 += r1 * r1;
}
for(int k = mindim; k < dim2; k++) {
final double r2 = v2.doubleValue(k);
l2 += r2 * r2;
}
if(cross == 0.) {
return 0.;
}
if(l1 == 0. || l2 == 0.) {
return 1.;
}
final double a = Math.sqrt((cross / l1) * (cross / l2));
return (a < 1.) ? a : 1.;
}
// TODO: add more precise but slower O(n^2) angle computation according to:
// Computing the Angle between Vectors, P. Schatte
// Journal of Computing, Volume 63, Number 1 (1999)
/**
* Compute the minimum angle between two rectangles.
*
* @param v1 first rectangle
* @param v2 second rectangle
* @return Angle
*/
public static double minCosAngle(SpatialComparable v1, SpatialComparable v2) {
if(v1 instanceof NumberVector && v2 instanceof NumberVector) {
return cosAngle((NumberVector) v1, (NumberVector) v2);
}
final int dim1 = v1.getDimensionality(), dim2 = v2.getDimensionality();
final int mindim = (dim1 <= dim2) ? dim1 : dim2;
// Essentially, we want to compute this:
// absmax(v1.transposeTimes(v2))/(min(v1.euclideanLength())*min(v2.euclideanLength()));
// We can just compute all three in parallel.
double s1 = 0, s2 = 0, l1 = 0, l2 = 0;
for(int k = 0; k < mindim; k++) {
final double min1 = v1.getMin(k), max1 = v1.getMax(k);
final double min2 = v2.getMin(k), max2 = v2.getMax(k);
final double p1 = min1 * min2, p2 = min1 * max2;
final double p3 = max1 * min2, p4 = max1 * max2;
s1 += Math.max(p1 > p2 ? p1 : p2, p3 > p4 ? p3 : p4);
s2 += Math.min(p1 < p2 ? p1 : p2, p3 < p4 ? p3 : p4);
if(max1 < 0) {
l1 += max1 * max1;
}
else if(min1 > 0) {
l1 += min1 * min1;
} // else: 0
if(max2 < 0) {
l2 += max2 * max2;
}
else if(min2 > 0) {
l2 += min2 * min2;
} // else: 0
}
for(int k = mindim; k < dim1; k++) {
final double min1 = v1.getMin(k), max1 = v1.getMax(k);
if(max1 < 0.) {
l1 += max1 * max1;
}
else if(min1 > 0.) {
l1 += min1 * min1;
} // else: 0
}
for(int k = mindim; k < dim2; k++) {
final double min2 = v2.getMin(k), max2 = v2.getMax(k);
if(max2 < 0.) {
l2 += max2 * max2;
}
else if(min2 > 0.) {
l2 += min2 * min2;
} // else: 0
}
final double cross = Math.max(s1, Math.abs(s2));
if(cross == 0.) {
return 0.;
}
if(l1 == 0. || l2 == 0.) {
return 1.;
}
final double a = Math.sqrt((cross / l1) * (cross / l2));
return (a < 1.) ? a : 1.;
}
/**
* Compute the scalar product (inner product) of this and the given
* DoubleVector.
*
* @param d1 the first vector to compute the scalar product for
* @param d2 the second vector to compute the scalar product for
* @return the scalar product (inner product) of this and the given
* DoubleVector
*/
public static double scalarProduct(NumberVector d1, NumberVector d2) {
final int dim = d1.getDimensionality();
double result = 0.;
for(int i = 0; i < dim; i++) {
result += d1.doubleValue(i) * d2.doubleValue(i);
}
return result;
}
/**
* Compute medoid for a given subset.
*
* @param relation Relation to process
* @param sample Sample set
* @return Medoid vector
*/
public static Vector computeMedoid(Relation extends NumberVector> relation, DBIDs sample) {
final int dim = RelationUtil.dimensionality(relation);
ArrayModifiableDBIDs mids = DBIDUtil.newArray(sample);
SortDBIDsBySingleDimension s = new SortDBIDsBySingleDimension(relation);
Vector medoid = new Vector(dim);
for(int d = 0; d < dim; d++) {
s.setDimension(d);
medoid.set(d, relation.get(QuickSelect.median(mids, s)).doubleValue(d));
}
return medoid;
}
/**
* This is an ugly hack, but we don't want to have the {@link Matrix} class
* depend on {@link NumberVector}. Maybe a future version will no longer need
* this.
*
* @param mat Matrix
* @param v Vector
* @return {@code mat * v}, as double array.
*/
public static double[] fastTimes(Matrix mat, NumberVector v) {
final double[][] elements = mat.getArrayRef();
final int cdim = mat.getColumnDimensionality();
final double[] X = new double[elements.length];
// multiply it with each row from A
for(int i = 0; i < elements.length; i++) {
final double[] Arowi = elements[i];
double s = 0;
for(int k = 0; k < cdim; k++) {
s += Arowi[k] * v.doubleValue(k);
}
X[i] = s;
}
return X;
}
/**
* Compare number vectors by a single dimension.
*
* @author Erich Schubert
*
* @apiviz.exclude
*/
public static class SortDBIDsBySingleDimension implements Comparator {
/**
* Dimension to sort with.
*/
private int d;
/**
* The relation to sort.
*/
private Relation extends NumberVector> data;
/**
* Constructor.
*
* @param data Vector data source
* @param dim Dimension to sort by
*/
public SortDBIDsBySingleDimension(Relation extends NumberVector> data, int dim) {
super();
this.data = data;
this.d = dim;
};
/**
* Constructor.
*
* @param data Vector data source
*/
public SortDBIDsBySingleDimension(Relation extends NumberVector> data) {
super();
this.data = data;
};
/**
* Get the dimension to sort by.
*
* @return Dimension to sort with
*/
public int getDimension() {
return this.d;
}
/**
* Set the dimension to sort by.
*
* @param d2 Dimension to sort with
*/
public void setDimension(int d2) {
this.d = d2;
}
@Override
public int compare(DBIDRef id1, DBIDRef id2) {
return Double.compare(data.get(id1).doubleValue(d), data.get(id2).doubleValue(d));
}
}
/**
* Compare number vectors by a single dimension.
*
* @author Erich Schubert
*
* @apiviz.exclude
*/
public static class SortVectorsBySingleDimension implements Comparator {
/**
* Dimension to sort with.
*/
private int d;
/**
* Constructor.
*
* @param dim Dimension to sort by.
*/
public SortVectorsBySingleDimension(int dim) {
super();
this.d = dim;
};
/**
* Constructor.
*/
public SortVectorsBySingleDimension() {
super();
};
/**
* Get the dimension to sort by.
*
* @return Dimension to sort with
*/
public int getDimension() {
return this.d;
}
/**
* Set the dimension to sort by.
*
* @param d2 Dimension to sort with
*/
public void setDimension(int d2) {
this.d = d2;
}
@Override
public int compare(NumberVector o1, NumberVector o2) {
return Double.compare(o1.doubleValue(d), o2.doubleValue(d));
}
}
/**
* Project a number vector to the specified attributes.
*
* @param v a NumberVector to project
* @param selectedAttributes the attributes selected for projection
* @param factory Vector factory
* @param Vector type
* @return a new NumberVector as a projection on the specified attributes
*/
public static V project(V v, BitSet selectedAttributes, NumberVector.Factory factory) {
if(factory instanceof SparseNumberVector.Factory) {
final SparseNumberVector.Factory> sfactory = (SparseNumberVector.Factory>) factory;
TIntDoubleHashMap values = new TIntDoubleHashMap(selectedAttributes.cardinality(), 1);
for(int d = selectedAttributes.nextSetBit(0); d >= 0; d = selectedAttributes.nextSetBit(d + 1)) {
if(v.doubleValue(d) != 0.0) {
values.put(d, v.doubleValue(d));
}
}
// We can't avoid this cast, because Java doesn't know that V is a
// SparseNumberVector:
@SuppressWarnings("unchecked")
V projectedVector = (V) sfactory.newNumberVector(values, selectedAttributes.cardinality());
return projectedVector;
}
else {
double[] newAttributes = new double[selectedAttributes.cardinality()];
int i = 0;
for(int d = selectedAttributes.nextSetBit(0); d >= 0; d = selectedAttributes.nextSetBit(d + 1)) {
newAttributes[i] = v.doubleValue(d);
i++;
}
return factory.newNumberVector(newAttributes);
}
}
}