package de.lmu.ifi.dbs.elki.utilities; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures Copyright (C) 2011 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ import java.io.PrintStream; import java.util.AbstractCollection; import java.util.ArrayList; import java.util.BitSet; import java.util.Collection; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Random; import java.util.Set; import java.util.StringTokenizer; import de.lmu.ifi.dbs.elki.data.DoubleVector; import de.lmu.ifi.dbs.elki.data.SparseFloatVector; /** * This class collects various static helper methods. * * For helper methods related to special application fields see other utilities * classes. * * * @see de.lmu.ifi.dbs.elki.utilities */ public final class Util { /** * Returns the prefix of the specified fileName (i.e. the name of the file * without extension). * * @param fileName the name of the file * @return the prefix of the specified fileName */ public static String getFilePrefix(final String fileName) { final int index = fileName.lastIndexOf(Character.getNumericValue('.')); if(index < 0) { return fileName; } return fileName.substring(0, index); } /** * Returns a new String array containing the same objects as are contained in * the given array. * * @param array an array to copy * @return the copied array */ public static String[] copy(String[] array) { String[] copy = new String[array.length]; System.arraycopy(array, 0, copy, 0, array.length); return copy; } /** * Returns a new double array containing the same objects as are contained in * the given array. * * @param array an array to copy * @return the copied array */ public static double[] copy(double[] array) { double[] copy = new double[array.length]; System.arraycopy(array, 0, copy, 0, array.length); return copy; } /** * Returns the unboxed double array of the given Object Double array. * * @param array the array to be unboxed * @return the unboxed double array */ public static double[] unbox(Double[] array) { double[] unboxed = new double[array.length]; // noinspection ManualArrayCopy for(int i = 0; i < unboxed.length; i++) { unboxed[i] = array[i]; } return unboxed; } /** * Returns the unboxed double array of the given Object Number array. * * @param array the array to be unboxed * @return the unboxed double array */ public static double[] unbox(Number[] array) { double[] unboxed = new double[array.length]; for(int i = 0; i < unboxed.length; i++) { unboxed[i] = array[i].doubleValue(); } return unboxed; } /** * Returns the unboxed float array of the given Object Number array. * * @param array the array to be unboxed * @return the unboxed float array */ public static float[] unboxToFloat(Number[] array) { float[] unboxed = new float[array.length]; for(int i = 0; i < unboxed.length; i++) { unboxed[i] = array[i].floatValue(); } return unboxed; } /** * Returns a new Double array initialized to the values * represented by the specified String and separated by comma, as * performed by the valueOf method of class Double. * * @param s the string to be parsed. * @return a new Double array represented by s */ public static double[] parseDoubles(String s) { List result = new ArrayList(); StringTokenizer tokenizer = new StringTokenizer(s, ","); while(tokenizer.hasMoreTokens()) { String d = tokenizer.nextToken(); result.add(Double.parseDouble(d)); } return unbox(result.toArray(new Double[result.size()])); } /** * Returns a new Float array initialized to the values * represented by the specified String and separated by comma, as * performed by the valueOf method of class Float. * * @param s the string to be parsed. * @return a new Float array represented by s */ public static float[] parseFloats(String s) { List result = new ArrayList(); StringTokenizer tokenizer = new StringTokenizer(s, ","); while(tokenizer.hasMoreTokens()) { String d = tokenizer.nextToken(); result.add(Float.parseFloat(d)); } return unboxToFloat(result.toArray(new Float[result.size()])); } /** * Converts the specified list of double objects to a list of float objects. * * @param values the list of double objects to be converted * @return the converted list of float objects */ public static List convertToFloat(List values) { List result = new ArrayList(values.size()); for(Double value : values) { result.add(new Float(value)); } return result; } /** * Converts the specified array of doubles to an array of floats. * * @param values the array of doubles to be converted * @return the converted array of floats */ public static float[] convertToFloat(double[] values) { float[] result = new float[values.length]; for(int i = 0; i < values.length; i++) { result[i] = (float) values[i]; } return result; } /** * Converts the specified array of doubles to an array of floats. * * @param values the array of doubles to be converted * @return the converted array of floats */ public static double[] convertToDoubles(float[] values) { double[] result = new double[values.length]; for(int i = 0; i < values.length; i++) { result[i] = values[i]; } return result; } /** * Converts the specified list of Double objects to an array of doubles. * * @param values the list of Double objects to be converted * @return the converted array of doubles */ public static double[] convertToDoubles(List values) { double[] result = new double[values.size()]; for(int i = 0; i < result.length; i++) { result[i] = values.get(i); } return result; } /** * Prints the given list to the specified PrintStream. The list entries are * separated by the specified separator. The last entry is not followed by a * separator. Thus, if a newline is used as separator, it might make sense to * print a newline to the PrintStream after calling this method. * * @param object class * @param list the list to be printed * @param separator the separator to separate entries of the list * @param out the target PrintStream */ public static void print(List list, String separator, PrintStream out) { for(Iterator iter = list.iterator(); iter.hasNext();) { out.print(iter.next()); if(iter.hasNext()) { out.print(separator); } } } /** * Returns the index of the maximum of the given values. If no value is bigger * than the first, the index of the first entry is returned. * * @param values the values to find the index of the maximum * @return the index of the maximum in the given values * @throws ArrayIndexOutOfBoundsException if values.length==0 */ public static int getIndexOfMaximum(double[] values) throws ArrayIndexOutOfBoundsException { int index = 0; double max = values[index]; for(int i = 0; i < values.length; i++) { if(values[i] > max) { max = values[i]; index = i; } } return index; } /** * Returns a new BitSet initialized to the values represented by * the specified String only containing 0 and 1 values. * * @param s the string to be parsed. * @return a new BitSet represented by s */ public static BitSet parseBitSet(String s) { try { return parseBitSet(s.toCharArray()); } catch(IllegalArgumentException e) { throw new IllegalArgumentException("The specified String does not represent a bit set " + "containing only 0 and 1 values: " + s); } } /** * Returns a new BitSet initialized to the values represented by * the specified char array only containing '0' and '1' values. * * @param s the char array to be parsed. * @return a new BitSet represented by s */ public static BitSet parseBitSet(char[] s) { BitSet result = new BitSet(); for(int i = 0; i < s.length; i++) { if(s[i] == '1') { result.set(i); } else if(s[i] != '0') { throw new IllegalArgumentException("The specified String does not represent a bit set " + "containing only 0 and 1 values: " + String.valueOf(s)); } } return result; } /** * Returns a string that represents the selected bits of the specified * BitSet, while the first bit starts with 1. The selected bits * are separated by the specified separator sep. * * If sep is ",", the result is suitable as a parameter * for an IntListParameter. * * @param b the bit set to be parsed * @param sep the separator * @return a string representing the selected bits of the specified * BitSet */ public static String parseSelectedBits(BitSet b, String sep) { StringBuffer result = new StringBuffer(); for(int i = b.nextSetBit(0); i >= 0; i = b.nextSetBit(i + 1)) { if(result.length() != 0) { result.append(sep).append(i + 1); } else { result.append((i + 1)); } } return result.toString(); } /** * Convert a bit set to a list of integers, representing bits that are set * * @param b Bitset * @param off Offset, set to 0 to start counting at 0, 1 to start counting at * 1. * @return List */ public static List convertBitSetToListInt(BitSet b, int off) { List list = new ArrayList(); for(int i = b.nextSetBit(0); i >= 0; i = b.nextSetBit(i + 1)) { list.add(i + off); } return list; } /** * Creates a new BitSet of fixed cardinality with randomly set bits. * * @param cardinality the cardinality of the BitSet to create * @param capacity the capacity of the BitSet to create - the randomly * generated indices of the bits set to true will be uniformly * distributed between 0 (inclusive) and capacity (exclusive) * @param random a Random Object to create the sequence of indices set to true * - the same number occurring twice or more is ignored but the already * selected bit remains true * @return a new BitSet with randomly set bits */ public static BitSet randomBitSet(int cardinality, int capacity, Random random) { BitSet bitset = new BitSet(capacity); while(bitset.cardinality() < cardinality) { bitset.set(random.nextInt(capacity)); } return bitset; } /** * Provides a new DoubleVector as a projection on the specified attributes. * * If the given DoubleVector has already an ID not null, the same * ID is set in the returned new DoubleVector. Nevertheless, the returned * DoubleVector is not backed by the given DoubleVector, i.e., any changes * affecting v after calling this method will not affect the * newly returned DoubleVector. * * @param v a DoubleVector to project * @param selectedAttributes the attributes selected for projection * @return a new DoubleVector as a projection on the specified attributes * @throws IllegalArgumentException if the given selected attributes specify * an attribute as selected which is out of range for the given * DoubleVector. * @see DoubleVector#doubleValue(int) */ public static DoubleVector project(DoubleVector v, BitSet selectedAttributes) { double[] newAttributes = new double[selectedAttributes.cardinality()]; int i = 0; for(int d = selectedAttributes.nextSetBit(0); d >= 0; d = selectedAttributes.nextSetBit(d + 1)) { newAttributes[i] = v.doubleValue(d + 1); i++; } DoubleVector projectedVector = new DoubleVector(newAttributes); return projectedVector; } /** * Provides a new SparseFloatVector as a projection on the specified * attributes. * * If the given SparseFloatVector has already an ID not null, the * same ID is set in the returned new SparseFloatVector. Nevertheless, the * returned SparseFloatVector is not backed by the given SparseFloatVector, * i.e., any changes affecting v after calling this method will * not affect the newly returned SparseFloatVector. * * @param v a SparseFloatVector to project * @param selectedAttributes the attributes selected for projection * @return a new SparseFloatVector as a projection on the specified attributes * @throws IllegalArgumentException if the given selected attributes specify * an attribute as selected which is out of range for the given * SparseFloatVector. */ public static SparseFloatVector project(SparseFloatVector v, BitSet selectedAttributes) { Map values = new HashMap(selectedAttributes.cardinality(), 1); for(int d = selectedAttributes.nextSetBit(0); d >= 0; d = selectedAttributes.nextSetBit(d + 1)) { if(v.getValue(d + 1) != 0.0f) { values.put(d, v.getValue(d + 1)); } } SparseFloatVector projectedVector = new SparseFloatVector(values, selectedAttributes.cardinality()); return projectedVector; } /** * Returns the index of the nth set bit in the given BitSet. For * the parameter nthSetBit, following condition is assumed: * 1 ≤ nthSetBit ≤ bitset.cardinality(). Otherwise, i.e., * if the Bitset contains less than nthSetBit set bits or * nthSetBit is not a positive number, the method throws an * IllegalArgumentException. * * The worstcase runtime complexity of this method is in O( * bitset.cardinality()). * * @param bitset the BitSet to derive the index of the nth set bit * in * @param nthSetBit which set bit to derive the index of * @return the index of the nth set bit in the given BitSet * @throws IllegalArgumentException if the Bitset contains less than * nthSetBit set bits or nthSetBit is not a * positive number */ public static int indexOfNthSetBit(BitSet bitset, int nthSetBit) throws IllegalArgumentException { if(nthSetBit < 1 || nthSetBit > bitset.cardinality()) { throw new IllegalArgumentException("Parameter nthSetBit out of range: nthSetBit=" + nthSetBit + ", bitset.cardinality=" + bitset.cardinality()); } int i = 0; int index = -1; for(int d = bitset.nextSetBit(0); d >= 0 && i < nthSetBit; d = bitset.nextSetBit(d + 1)) { i++; index = d; } return index; } /** * Provides the intersection of the two specified sets in the given result * set. * * @param object class * @param s1 the first set * @param s2 the second set * @param result the result set */ public static void intersection(Set s1, Set s2, Set result) { for(O object : s1) { if(s2.contains(object)) { result.add(object); } } } /** * Converts the specified positive integer value into a bit representation, * where bit 0 denotes 20, bit 1 denotes 21 etc. * * @param n the positive integer value to be converted * @return the specified integer value into a bit representation */ public static BitSet int2Bit(int n) { if(n < 0) { throw new IllegalArgumentException("Parameter n hast to be greater than or equal to zero!"); } BitSet result = new BitSet(); int i = 0; while(n > 0) { boolean rest = (n % 2 == 1); if(rest) { result.set(i); } n = n / 2; i++; } return result; } /** * Joins the specified arrays. * * @param array1 the first array * @param array2 the second array * @return a new array containing the entries of array1 and the * array2. */ public static String[] joinArray(String[] array1, String[] array2) { String[] newArray = new String[array1.length + array2.length]; System.arraycopy(array1, 0, newArray, 0, array1.length); System.arraycopy(array2, 0, newArray, array1.length, array2.length); return newArray; } /** * Adds the entries of the specified array to the end of the given list. * * @param object class * @param list the list * @param array the array containing the objects to be added to the list */ public static void addToList(List list, O[] array) { for(O object : array) { list.add(object); } } /** * Search an (unsorted) array linearly for an object. * * @param arr Array to search * @param ref Object to search for * @return Index of object or -1 if not found. */ public static int arrayFind(String[] arr, Object ref) { for(int index = 0; index < arr.length; index++) { if(ref.equals(arr[index])) { return index; } } return -1; } /** * Mix multiple hashcodes into one. * * @param hash Hashcodes to mix * @return Mixed hash code */ public static final int mixHashCodes(int... hash) { final long prime = 2654435761L; if(hash.length == 0) { return 0; } long result = hash[0]; for (int i = 1; i < hash.length; i++) { result = result * prime + hash[i]; } return (int) result; } /** * This class is a virtual collection based on masking an array list using a * bit mask. * * @author Erich Schubert * * @apiviz.stereotype decorator * @apiviz.composedOf java.util.ArrayList * @apiviz.composedOf java.util.BitSet * * @param Object type */ public static class MaskedArrayList extends AbstractCollection implements Collection { /** * Data storage */ protected ArrayList data; /** * The bitmask used for masking */ protected BitSet bits; /** * Flag whether to iterator over set or unset values. */ protected boolean inverse = false; /** * Constructor. * * @param data Data * @param bits Bitset to use as mask * @param inverse Flag to inverse the masking rule */ public MaskedArrayList(ArrayList data, BitSet bits, boolean inverse) { super(); this.data = data; this.bits = bits; this.inverse = inverse; } @Override public boolean add(T e) { throw new UnsupportedOperationException(); } @Override public Iterator iterator() { if(inverse) { return new InvItr(); } else { return new Itr(); } } @Override public int size() { if(inverse) { return data.size() - bits.cardinality(); } else { return bits.cardinality(); } } /** * Iterator over set bits * * @author Erich Schubert * * @apiviz.exclude */ protected class Itr implements Iterator { /** * Next position. */ private int pos; /** * Constructor */ protected Itr() { this.pos = bits.nextSetBit(0); } @Override public boolean hasNext() { return (pos >= 0) && (pos < data.size()); } @Override public T next() { T cur = data.get(pos); pos = bits.nextSetBit(pos + 1); return cur; } @Override public void remove() { throw new UnsupportedOperationException(); } } /** * Iterator over unset elements. * * @author Erich Schubert * * @apiviz.exclude */ protected class InvItr implements Iterator { /** * Next unset position. */ private int pos; /** * Constructor */ protected InvItr() { this.pos = bits.nextClearBit(0); } @Override public boolean hasNext() { return (pos >= 0) && (pos < data.size()); } @Override public T next() { T cur = data.get(pos); pos = bits.nextClearBit(pos + 1); return cur; } @Override public void remove() { throw new UnsupportedOperationException(); } } } }