package de.lmu.ifi.dbs.elki.database.ids; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ import java.util.Random; import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDPair; import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDPair; import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList; import de.lmu.ifi.dbs.elki.database.ids.distance.KNNHeap; import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList; import de.lmu.ifi.dbs.elki.database.ids.generic.DoubleDistanceKNNSubList; import de.lmu.ifi.dbs.elki.database.ids.generic.KNNSubList; import de.lmu.ifi.dbs.elki.database.ids.generic.UnmodifiableArrayDBIDs; import de.lmu.ifi.dbs.elki.database.ids.generic.UnmodifiableDBIDs; import de.lmu.ifi.dbs.elki.database.ids.integer.IntegerArrayDBIDs; import de.lmu.ifi.dbs.elki.database.ids.integer.IntegerDBIDs; import de.lmu.ifi.dbs.elki.database.ids.integer.UnmodifiableIntegerArrayDBIDs; import de.lmu.ifi.dbs.elki.database.ids.integer.UnmodifiableIntegerDBIDs; import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; import de.lmu.ifi.dbs.elki.persistent.ByteBufferSerializer; import de.lmu.ifi.dbs.elki.utilities.RandomFactory; /** * DBID Utility functions. * * @author Erich Schubert * * @apiviz.landmark * * @apiviz.has DBID * @apiviz.has DBIDs * @apiviz.uses DBIDRef * @apiviz.composedOf DBIDFactory */ public final class DBIDUtil { /** * Static - no public constructor. */ private DBIDUtil() { // Never called. } /** * Final, global copy of empty DBIDs. */ public static final EmptyDBIDs EMPTYDBIDS = new EmptyDBIDs(); /** * Get the invalid special ID. * * @return invalid ID value */ public static DBIDRef invalid() { return DBIDFactory.FACTORY.invalid(); } /** * Import and integer as DBID. * * Note: this may not be possible for some factories! * * @param id Integer ID to import * @return DBID */ public static DBID importInteger(int id) { return DBIDFactory.FACTORY.importInteger(id); } /** * Export a DBID as int. * * Note: this may not be possible for some factories! * * @param id DBID to export * @return integer value */ public static int asInteger(DBIDRef id) { return id.internalGetIndex(); } /** * Compare two DBIDs. * * @param id1 First ID * @param id2 Second ID * @return Comparison result */ public static int compare(DBIDRef id1, DBIDRef id2) { return DBIDFactory.FACTORY.compare(id1, id2); } /** * Test two DBIDs for equality. * * @param id1 First ID * @param id2 Second ID * @return Comparison result */ public static boolean equal(DBIDRef id1, DBIDRef id2) { return DBIDFactory.FACTORY.equal(id1, id2); } /** * Dereference a DBID reference. * * @param ref DBID reference * @return DBID */ public static DBID deref(DBIDRef ref) { if (ref instanceof DBID) { return (DBID) ref; } return importInteger(ref.internalGetIndex()); } /** * Format a DBID as string. * * @param id DBID * @return String representation */ public static String toString(DBIDRef id) { return DBIDFactory.FACTORY.toString(id); } /** * Format a DBID as string. * * @param ids DBIDs * @return String representation */ public static String toString(DBIDs ids) { if (ids instanceof DBID) { return DBIDFactory.FACTORY.toString((DBID) ids); } StringBuilder buf = new StringBuilder(); for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { if (buf.length() > 0) { buf.append(", "); } buf.append(DBIDFactory.FACTORY.toString(iter)); } return buf.toString(); } /** * Get a serializer for DBIDs. * * @return DBID serializer */ public static ByteBufferSerializer getDBIDSerializer() { return DBIDFactory.FACTORY.getDBIDSerializer(); } /** * Get a serializer for DBIDs with static size. * * @return DBID serializer */ public static ByteBufferSerializer getDBIDSerializerStatic() { return DBIDFactory.FACTORY.getDBIDSerializerStatic(); } /** * Generate a single DBID. * * @return A single DBID */ public static DBID generateSingleDBID() { return DBIDFactory.FACTORY.generateSingleDBID(); } /** * Return a single DBID for reuse. * * @param id DBID to deallocate */ public static void deallocateSingleDBID(DBID id) { DBIDFactory.FACTORY.deallocateSingleDBID(id); } /** * Generate a static DBID range. * * @param size Requested size * @return DBID range */ public static DBIDRange generateStaticDBIDRange(int size) { return DBIDFactory.FACTORY.generateStaticDBIDRange(size); } /** * Deallocate a static DBID range. * * @param range Range to deallocate */ public static void deallocateDBIDRange(DBIDRange range) { DBIDFactory.FACTORY.deallocateDBIDRange(range); } /** * Make a new DBID variable. * * @param val Initial value. * @return Variable */ public static DBIDVar newVar(DBIDRef val) { return DBIDFactory.FACTORY.newVar(val); } /** * Make a new DBID variable. * * @return Variable */ public static DBIDVar newVar() { return DBIDFactory.FACTORY.newVar(DBIDFactory.FACTORY.invalid()); } /** * Make a new (modifiable) array of DBIDs. * * @return New array */ public static ArrayModifiableDBIDs newArray() { return DBIDFactory.FACTORY.newArray(); } /** * Make a new (modifiable) hash set of DBIDs. * * @return New hash set */ public static HashSetModifiableDBIDs newHashSet() { return DBIDFactory.FACTORY.newHashSet(); } /** * Make a new (modifiable) array of DBIDs. * * @param size Size hint * @return New array */ public static ArrayModifiableDBIDs newArray(int size) { return DBIDFactory.FACTORY.newArray(size); } /** * Make a new (modifiable) hash set of DBIDs. * * @param size Size hint * @return New hash set */ public static HashSetModifiableDBIDs newHashSet(int size) { return DBIDFactory.FACTORY.newHashSet(size); } /** * Make a new (modifiable) array of DBIDs. * * @param existing Existing DBIDs * @return New array */ public static ArrayModifiableDBIDs newArray(DBIDs existing) { return DBIDFactory.FACTORY.newArray(existing); } /** * Make a new (modifiable) hash set of DBIDs. * * @param existing Existing DBIDs * @return New hash set */ public static HashSetModifiableDBIDs newHashSet(DBIDs existing) { return DBIDFactory.FACTORY.newHashSet(existing); } /** * Compute the set intersection of two sets. * * @param first First set * @param second Second set * @return result. */ // TODO: optimize better? public static ModifiableDBIDs intersection(DBIDs first, DBIDs second) { if (first.size() > second.size()) { return intersection(second, first); } ModifiableDBIDs inter = newHashSet(first.size()); for (DBIDIter it = first.iter(); it.valid(); it.advance()) { if (second.contains(it)) { inter.add(it); } } return inter; } /** * Compute the set intersection size of two sets. * * @param first First set * @param second Second set * @return size */ public static int intersectionSize(DBIDs first, DBIDs second) { // If exactly one is a Set, use it as second parameter. if (second instanceof SetDBIDs) { if (!(first instanceof SetDBIDs)) { return internalIntersectionSize(first, second); } } else { if (first instanceof SetDBIDs) { return internalIntersectionSize(second, first); } } // Both are the same type: both set or both non set. // Smaller goes first. if (first.size() <= second.size()) { return internalIntersectionSize(first, second); } else { return internalIntersectionSize(second, first); } } /** * Compute the set intersection size of two sets. * * @param first First set * @param second Second set * @return size */ private static int internalIntersectionSize(DBIDs first, DBIDs second) { int c = 0; for (DBIDIter it = first.iter(); it.valid(); it.advance()) { if (second.contains(it)) { c++; } } return c; } /** * Compute the set symmetric intersection of two sets. * * @param first First set * @param second Second set * @param firstonly OUTPUT: elements only in first. MUST BE EMPTY * @param intersection OUTPUT: elements in intersection. MUST BE EMPTY * @param secondonly OUTPUT: elements only in second. MUST BE EMPTY */ // TODO: optimize? public static void symmetricIntersection(DBIDs first, DBIDs second, HashSetModifiableDBIDs firstonly, HashSetModifiableDBIDs intersection, HashSetModifiableDBIDs secondonly) { if (first.size() > second.size()) { symmetricIntersection(second, first, secondonly, intersection, firstonly); return; } assert (firstonly.size() == 0) : "OUTPUT set should be empty!"; assert (intersection.size() == 0) : "OUTPUT set should be empty!"; assert (secondonly.size() == 0) : "OUTPUT set should be empty!"; // Initialize with second secondonly.addDBIDs(second); for (DBIDIter it = first.iter(); it.valid(); it.advance()) { // Try to remove if (secondonly.remove(it)) { intersection.add(it); } else { firstonly.add(it); } } } /** * Returns the union of the two specified collection of IDs. * * @param ids1 the first collection * @param ids2 the second collection * @return the union of ids1 and ids2 without duplicates */ public static ModifiableDBIDs union(DBIDs ids1, DBIDs ids2) { ModifiableDBIDs result = DBIDUtil.newHashSet(Math.max(ids1.size(), ids2.size())); result.addDBIDs(ids1); result.addDBIDs(ids2); return result; } /** * Returns the difference of the two specified collection of IDs. * * @param ids1 the first collection * @param ids2 the second collection * @return the difference of ids1 minus ids2 */ public static ModifiableDBIDs difference(DBIDs ids1, DBIDs ids2) { ModifiableDBIDs result = DBIDUtil.newHashSet(ids1); result.removeDBIDs(ids2); return result; } /** * Wrap an existing DBIDs collection to be unmodifiable. * * @param existing Existing collection * @return Unmodifiable collection */ public static StaticDBIDs makeUnmodifiable(DBIDs existing) { if (existing instanceof StaticDBIDs) { return (StaticDBIDs) existing; } if (existing instanceof IntegerArrayDBIDs) { return new UnmodifiableIntegerArrayDBIDs((IntegerArrayDBIDs) existing); } if (existing instanceof IntegerDBIDs) { return new UnmodifiableIntegerDBIDs((IntegerDBIDs) existing); } if (existing instanceof ArrayDBIDs) { return new UnmodifiableArrayDBIDs((ArrayDBIDs) existing); } return new UnmodifiableDBIDs(existing); } /** * Ensure that the given DBIDs are array-indexable. * * @param ids IDs * @return Array DBIDs. */ public static ArrayDBIDs ensureArray(DBIDs ids) { if (ids instanceof ArrayDBIDs) { return (ArrayDBIDs) ids; } else { return newArray(ids); } } /** * Ensure that the given DBIDs support fast "contains" operations. * * @param ids IDs * @return Set DBIDs. */ public static SetDBIDs ensureSet(DBIDs ids) { if (ids instanceof SetDBIDs) { return (SetDBIDs) ids; } else { return newHashSet(ids); } } /** * Ensure modifiable. * * @param ids IDs * @return Modifiable DBIDs. */ public static ModifiableDBIDs ensureModifiable(DBIDs ids) { if (ids instanceof ModifiableDBIDs) { return (ModifiableDBIDs) ids; } else { if (ids instanceof ArrayDBIDs) { return newArray(ids); } if (ids instanceof HashSetDBIDs) { return newHashSet(ids); } return newArray(ids); } } /** * Make a DBID pair. * * @param id1 first ID * @param id2 second ID * * @return DBID pair */ public static DBIDPair newPair(DBIDRef id1, DBIDRef id2) { return DBIDFactory.FACTORY.newPair(id1, id2); } /** * Make a DoubleDBIDPair. * * @param val double value * @param id ID * @return new pair */ public static DoubleDBIDPair newPair(double val, DBIDRef id) { return DBIDFactory.FACTORY.newPair(val, id); } /** * Make a DistanceDBIDPair. * * @param dist Distance value * @param id ID * @return new pair */ public static > DistanceDBIDPair newDistancePair(D dist, DBIDRef id) { return DBIDFactory.FACTORY.newDistancePair(dist, id); } /** * Make a DoubleDistanceDBIDPair. * * @param dist Distance value * @param id ID * @return new pair */ public static DoubleDistanceDBIDPair newDistancePair(double dist, DBIDRef id) { return DBIDFactory.FACTORY.newDistancePair(dist, id); } /** * Create an appropriate heap for the distance type. * * This will use a double heap if appropriate. * * @param distancetype distance prototype * @param k K value * @param distance type * @return New heap of size k, appropriate for this distance type. */ public static > KNNHeap newHeap(D distancetype, int k) { return DBIDFactory.FACTORY.newHeap(distancetype, k); } /** * Build a new heap from a given list. * * @param exist Existing result * @param Distance type * @return New heap */ public static > KNNHeap newHeap(KNNList exist) { return DBIDFactory.FACTORY.newHeap(exist); } /** * Produce a random sample of the given DBIDs. * * @param source Original DBIDs * @param k k Parameter * @param rnd Random generator * @return new DBIDs */ public static ModifiableDBIDs randomSample(DBIDs source, int k, RandomFactory rnd) { return randomSample(source, k, rnd.getRandom()); } /** * Produce a random shuffling of the given DBID array. * * @param ids Original DBIDs * @param rnd Random generator */ public static void randomShuffle(ArrayModifiableDBIDs ids, RandomFactory rnd) { randomShuffle(ids, rnd.getRandom(), ids.size()); } /** * Produce a random shuffling of the given DBID array. * * @param ids Original DBIDs * @param random Random generator */ public static void randomShuffle(ArrayModifiableDBIDs ids, Random random) { randomShuffle(ids, random, ids.size()); } /** * Produce a random shuffling of the given DBID array. * * Only the first {@code limit} elements will be randomized. * * @param ids Original DBIDs * @param random Random generator * @param limit Shuffling limit. */ public static void randomShuffle(ArrayModifiableDBIDs ids, Random random, final int limit) { for (int i = 1; i < limit; i++) { ids.swap(i - 1, i + random.nextInt(limit - i)); } } /** * Produce a random sample of the given DBIDs. * * @param source Original DBIDs * @param k k Parameter * @param seed Random generator seed * @return new DBIDs */ public static ModifiableDBIDs randomSample(DBIDs source, int k, int seed) { return randomSample(source, k, new Random(seed)); } /** * Produce a random sample of the given DBIDs. * * @param source Original DBIDs * @param k k Parameter * @param seed Random generator seed * @return new DBIDs */ public static ModifiableDBIDs randomSample(DBIDs source, int k, Long seed) { if (seed != null) { return randomSample(source, k, new Random(seed.longValue())); } else { return randomSample(source, k, new Random()); } } /** * Produce a random sample of the given DBIDs. * * @param source Original DBIDs * @param k k Parameter * @param random Random generator * @return new DBIDs */ public static ModifiableDBIDs randomSample(DBIDs source, int k, Random random) { if (k <= 0 || k > source.size()) { throw new IllegalArgumentException("Illegal value for size of random sample: " + k + " > " + source.size() + " or < 0"); } if (random == null) { random = new Random(); } // TODO: better balancing for different sizes // Two methods: constructive vs. destructive if (k < source.size() >> 1) { ArrayDBIDs aids = DBIDUtil.ensureArray(source); DBIDArrayIter iter = aids.iter(); HashSetModifiableDBIDs sample = DBIDUtil.newHashSet(k); while (sample.size() < k) { iter.seek(random.nextInt(aids.size())); sample.add(iter); } return sample; } else { ArrayModifiableDBIDs sample = DBIDUtil.newArray(source); randomShuffle(sample, random, k); // Delete trailing elements for (int i = sample.size() - 1; i > k; i--) { sample.remove(i); } return sample; } } /** * Get a subset of the KNN result. * * @param list Existing list * @param k k * @param distance type * @return Subset */ @SuppressWarnings("unchecked") public static > KNNList subList(KNNList list, int k) { if (k >= list.size()) { return list; } if (list instanceof DoubleDistanceKNNList) { return (KNNList) new DoubleDistanceKNNSubList((DoubleDistanceKNNList) list, k); } return new KNNSubList<>(list, k); } }