package de.lmu.ifi.dbs.elki.data; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ import gnu.trove.map.hash.TIntDoubleHashMap; import java.util.BitSet; import java.util.Comparator; import java.util.Random; import de.lmu.ifi.dbs.elki.data.spatial.SpatialComparable; import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs; import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; import de.lmu.ifi.dbs.elki.math.DoubleMinMax; import de.lmu.ifi.dbs.elki.math.MathUtil; import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector; import de.lmu.ifi.dbs.elki.utilities.datastructures.QuickSelect; /** * Utility functions for use with vectors. * * Note: obviously, many functions are class methods or database related. * * @author Erich Schubert * * @apiviz.uses NumberVector */ public final class VectorUtil { /** * Fake constructor. Do not instantiate, use static methods. */ private VectorUtil() { // Do not instantiate - utility class. } /** * Return the range across all dimensions. Useful in particular for time * series. * * @param vec Vector to process. * @return [min, max] */ public static DoubleMinMax getRangeDouble(NumberVector vec) { DoubleMinMax minmax = new DoubleMinMax(); for(int i = 0; i < vec.getDimensionality(); i++) { minmax.put(vec.doubleValue(i)); } return minmax; } /** * Produce a new vector based on random numbers in [0:1]. * * @param factory Vector factory * @param dim desired dimensionality * @param r Random generator * @param vector type * @return new instance */ public static > V randomVector(NumberVector.Factory factory, int dim, Random r) { return factory.newNumberVector(MathUtil.randomDoubleArray(dim, r)); } /** * Produce a new vector based on random numbers in [0:1]. * * @param factory Vector factory * @param dim desired dimensionality * @param vector type * @return new instance */ public static > V randomVector(NumberVector.Factory factory, int dim) { return randomVector(factory, dim, new Random()); } /** * Compute the angle for sparse vectors. * * @param v1 First vector * @param v2 Second vector * @return angle */ public static double angleSparse(SparseNumberVector v1, SparseNumberVector v2) { BitSet b1 = v1.getNotNullMask(); BitSet b2 = v2.getNotNullMask(); BitSet both = (BitSet) b1.clone(); both.and(b2); // Length of first vector double l1 = 0.0; for(int i = b1.nextSetBit(0); i >= 0; i = b1.nextSetBit(i + 1)) { final double val = v1.doubleValue(i); l1 += val * val; } l1 = Math.sqrt(l1); // Length of second vector double l2 = 0.0; for(int i = b2.nextSetBit(0); i >= 0; i = b2.nextSetBit(i + 1)) { final double val = v2.doubleValue(i); l2 += val * val; } l2 = Math.sqrt(l2); // Cross product double cross = 0.0; for(int i = both.nextSetBit(0); i >= 0; i = both.nextSetBit(i + 1)) { cross += v1.doubleValue(i) * v2.doubleValue(i); } return cross / (l1 * l2); } /** * Compute the angle between two vectors. * * @param v1 first vector * @param v2 second vector * @param o Origin * @return Angle */ public static double angle(NumberVector v1, NumberVector v2, Vector o) { // Essentially, we want to compute this: // v1' = v1 - o, v2' = v2 - o // v1'.transposeTimes(v2') / (v1'.euclideanLength()*v2'.euclideanLength()); // We can just compute all three in parallel. double[] oe = o.getArrayRef(); final int dim = v1.getDimensionality(); double s = 0, e1 = 0, e2 = 0; for(int k = 0; k < dim; k++) { final double r1 = v1.doubleValue(k) - oe[k]; final double r2 = v2.doubleValue(k) - oe[k]; s += r1 * r2; e1 += r1 * r1; e2 += r2 * r2; } return Math.sqrt((s / e1) * (s / e2)); } /** * Compute the angle between two vectors. * * @param v1 first vector * @param v2 second vector * @param o Origin * @return Angle */ public static double angle(NumberVector v1, NumberVector v2, NumberVector o) { // Essentially, we want to compute this: // v1' = v1 - o, v2' = v2 - o // v1'.transposeTimes(v2') / (v1'.euclideanLength()*v2'.euclideanLength()); // We can just compute all three in parallel. final int dim = v1.getDimensionality(); double s = 0, e1 = 0, e2 = 0; for(int k = 0; k < dim; k++) { final double r1 = v1.doubleValue(k) - o.doubleValue(k); final double r2 = v2.doubleValue(k) - o.doubleValue(k); s += r1 * r2; e1 += r1 * r1; e2 += r2 * r2; } return Math.sqrt((s / e1) * (s / e2)); } /** * Compute the absolute cosine of the angle between two vectors. * * To convert it to radians, use Math.acos(angle)! * * @param v1 first vector * @param v2 second vector * @return Angle */ public static double cosAngle(NumberVector v1, NumberVector v2) { if(v1 instanceof SparseNumberVector && v2 instanceof SparseNumberVector) { return angleSparse((SparseNumberVector) v1, (SparseNumberVector) v2); } // Essentially, we want to compute this: // v1.transposeTimes(v2) / (v1.euclideanLength() * v2.euclideanLength()); // We can just compute all three in parallel. final int d1 = v1.getDimensionality(); final int d2 = v2.getDimensionality(); final int dim = Math.min(d1, d2); double s = 0, e1 = 0, e2 = 0; for(int k = 0; k < dim; k++) { final double r1 = v1.doubleValue(k); final double r2 = v2.doubleValue(k); s += r1 * r2; e1 += r1 * r1; e2 += r2 * r2; } for(int k = dim; k < d1; k++) { final double r1 = v1.doubleValue(k); e1 += r1 * r1; } for(int k = dim; k < d2; k++) { final double r2 = v2.doubleValue(k); e2 += r2 * r2; } return Math.min(Math.sqrt((s / e1) * (s / e2)), 1); } // TODO: add more precise but slower O(n^2) angle computation according to: // Computing the Angle between Vectors, P. Schatte // Journal of Computing, Volume 63, Number 1 (1999) /** * Compute the minimum angle between two rectangles. * * @param v1 first rectangle * @param v2 second rectangle * @return Angle */ public static double minCosAngle(SpatialComparable v1, SpatialComparable v2) { if(v1 instanceof NumberVector && v2 instanceof NumberVector) { return cosAngle((NumberVector) v1, (NumberVector) v2); } // Essentially, we want to compute this: // absmax(v1.transposeTimes(v2))/(min(v1.euclideanLength())*min(v2.euclideanLength())); // We can just compute all three in parallel. final int dim = v1.getDimensionality(); double s1 = 0, s2 = 0, e1 = 0, e2 = 0; for(int k = 0; k < dim; k++) { final double min1 = v1.getMin(k), max1 = v1.getMax(k); final double min2 = v2.getMin(k), max2 = v2.getMax(k); final double p1 = min1 * min2, p2 = min1 * max2; final double p3 = max1 * min2, p4 = max1 * max2; s1 += Math.max(Math.max(p1, p2), Math.max(p3, p4)); s2 += Math.min(Math.min(p1, p2), Math.min(p3, p4)); if(max1 < 0) { e1 += max1 * max1; } else if(min1 > 0) { e1 += min1 * min1; } // else: 0 if(max2 < 0) { e2 += max2 * max2; } else if(min2 > 0) { e2 += min2 * min2; } // else: 0 } final double s = Math.max(s1, Math.abs(s2)); return Math.min(Math.sqrt((s / e1) * (s / e2)), 1.0); } /** * Provides the scalar product (inner product) of this and the given * DoubleVector. * * @param d1 the first vector to compute the scalar product for * @param d2 the second vector to compute the scalar product for * @return the scalar product (inner product) of this and the given * DoubleVector */ public static double scalarProduct(NumberVector d1, NumberVector d2) { final int dim = d1.getDimensionality(); double result = 0.0; for(int i = 0; i < dim; i++) { result += d1.doubleValue(i) * d2.doubleValue(i); } return result; } /** * Compute medoid for a given subset. * * @param relation Relation to process * @param sample Sample set * @return Medoid vector */ public static Vector computeMedoid(Relation> relation, DBIDs sample) { final int dim = RelationUtil.dimensionality(relation); ArrayModifiableDBIDs mids = DBIDUtil.newArray(sample); SortDBIDsBySingleDimension s = new SortDBIDsBySingleDimension(relation); Vector medoid = new Vector(dim); for (int d = 0; d < dim; d++) { s.setDimension(d); medoid.set(d, relation.get(QuickSelect.median(mids, s)).doubleValue(d)); } return medoid; } /** * Compare number vectors by a single dimension. * * @author Erich Schubert * * @apiviz.exclude */ public static class SortDBIDsBySingleDimension implements Comparator { /** * Dimension to sort with. */ private int d; /** * The relation to sort. */ private Relation> data; /** * Constructor. * * @param data Vector data source */ public SortDBIDsBySingleDimension(Relation> data) { super(); this.data = data; }; /** * Get the dimension to sort by. * * @return Dimension to sort with */ public int getDimension() { return this.d; } /** * Set the dimension to sort by. * * @param d2 Dimension to sort with */ public void setDimension(int d2) { this.d = d2; } @Override public int compare(DBIDRef id1, DBIDRef id2) { return Double.compare(data.get(id1).doubleValue(d), data.get(id2).doubleValue(d)); } } /** * Compare number vectors by a single dimension. * * @author Erich Schubert * * @apiviz.exclude */ public static class SortVectorsBySingleDimension implements Comparator> { /** * Dimension to sort with. */ private int d; /** * Constructor. */ public SortVectorsBySingleDimension() { super(); }; /** * Get the dimension to sort by. * * @return Dimension to sort with */ public int getDimension() { return this.d; } /** * Set the dimension to sort by. * * @param d2 Dimension to sort with */ public void setDimension(int d2) { this.d = d2; } @Override public int compare(NumberVector o1, NumberVector o2) { return Double.compare(o1.doubleValue(d), o2.doubleValue(d)); } } /** * Provides a new NumberVector as a projection on the specified attributes. * * @param v a NumberVector to project * @param selectedAttributes the attributes selected for projection * @param factory Vector factory * @param Vector type * @return a new NumberVector as a projection on the specified attributes */ public static > V project(V v, BitSet selectedAttributes, NumberVector.Factory factory) { if (factory instanceof SparseNumberVector.Factory) { final SparseNumberVector.Factory sfactory = (SparseNumberVector.Factory) factory; TIntDoubleHashMap values = new TIntDoubleHashMap(selectedAttributes.cardinality(), 1); for (int d = selectedAttributes.nextSetBit(0); d >= 0; d = selectedAttributes.nextSetBit(d + 1)) { if (v.doubleValue(d) != 0.0) { values.put(d, v.doubleValue(d)); } } // We can't avoid this cast, because Java doesn't know that V is a SparseNumberVector: @SuppressWarnings("unchecked") V projectedVector = (V) sfactory.newNumberVector(values, selectedAttributes.cardinality()); return projectedVector; } else { double[] newAttributes = new double[selectedAttributes.cardinality()]; int i = 0; for (int d = selectedAttributes.nextSetBit(0); d >= 0; d = selectedAttributes.nextSetBit(d + 1)) { newAttributes[i] = v.doubleValue(d); i++; } return factory.newNumberVector(newAttributes); } } }