diff options
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/database/ids/DBIDUtil.java')
-rw-r--r-- | src/de/lmu/ifi/dbs/elki/database/ids/DBIDUtil.java | 355 |
1 files changed, 291 insertions, 64 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/database/ids/DBIDUtil.java b/src/de/lmu/ifi/dbs/elki/database/ids/DBIDUtil.java index 000659b9..9cb4082a 100644 --- a/src/de/lmu/ifi/dbs/elki/database/ids/DBIDUtil.java +++ b/src/de/lmu/ifi/dbs/elki/database/ids/DBIDUtil.java @@ -27,7 +27,13 @@ import java.util.Random; import de.lmu.ifi.dbs.elki.database.ids.generic.UnmodifiableArrayDBIDs; import de.lmu.ifi.dbs.elki.database.ids.generic.UnmodifiableDBIDs; +import de.lmu.ifi.dbs.elki.database.ids.integer.IntegerDBIDs; +import de.lmu.ifi.dbs.elki.database.ids.integer.TroveArrayDBIDs; +import de.lmu.ifi.dbs.elki.database.ids.integer.UnmodifiableIntegerArrayDBIDs; +import de.lmu.ifi.dbs.elki.database.ids.integer.UnmodifiableIntegerDBIDs; +import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; import de.lmu.ifi.dbs.elki.persistent.ByteBufferSerializer; +import de.lmu.ifi.dbs.elki.utilities.RandomFactory; /** * DBID Utility functions. @@ -35,7 +41,11 @@ import de.lmu.ifi.dbs.elki.persistent.ByteBufferSerializer; * @author Erich Schubert * * @apiviz.landmark - * @apiviz.composedOf de.lmu.ifi.dbs.elki.database.ids.DBIDFactory + * + * @apiviz.has DBID + * @apiviz.has DBIDs + * @apiviz.uses DBIDRef + * @apiviz.composedOf DBIDFactory */ public final class DBIDUtil { /** @@ -51,9 +61,20 @@ public final class DBIDUtil { public static final EmptyDBIDs EMPTYDBIDS = new EmptyDBIDs(); /** - * Import an Integer DBID. + * Get the invalid special ID. + * + * @return invalid ID value + */ + public static DBIDRef invalid() { + return DBIDFactory.FACTORY.invalid(); + } + + /** + * Import and integer as DBID. * - * @param id Integer ID + * Note: this may not be possible for some factories! + * + * @param id Integer ID to import * @return DBID */ public static DBID importInteger(int id) { @@ -61,25 +82,102 @@ public final class DBIDUtil { } /** - * Get a serializer for DBIDs + * Export a DBID as int. + * + * Note: this may not be possible for some factories! + * + * @param id DBID to export + * @return integer value + */ + public static int asInteger(DBIDRef id) { + return id.internalGetIndex(); + } + + /** + * Compare two DBIDs. + * + * @param id1 First ID + * @param id2 Second ID + * @return Comparison result + */ + public static int compare(DBIDRef id1, DBIDRef id2) { + return DBIDFactory.FACTORY.compare(id1, id2); + } + + /** + * Test two DBIDs for equality. + * + * @param id1 First ID + * @param id2 Second ID + * @return Comparison result + */ + public static boolean equal(DBIDRef id1, DBIDRef id2) { + return DBIDFactory.FACTORY.equal(id1, id2); + } + + /** + * Dereference a DBID reference. + * + * @param ref DBID reference + * @return DBID + */ + public static DBID deref(DBIDRef ref) { + if (ref instanceof DBID) { + return (DBID) ref; + } + return importInteger(ref.internalGetIndex()); + } + + /** + * Format a DBID as string. + * + * @param id DBID + * @return String representation + */ + public static String toString(DBIDRef id) { + return DBIDFactory.FACTORY.toString(id); + } + + /** + * Format a DBID as string. + * + * @param ids DBIDs + * @return String representation + */ + public static String toString(DBIDs ids) { + if (ids instanceof DBID) { + return DBIDFactory.FACTORY.toString((DBID) ids); + } + StringBuilder buf = new StringBuilder(); + for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { + if (buf.length() > 0) { + buf.append(", "); + } + buf.append(DBIDFactory.FACTORY.toString(iter)); + } + return buf.toString(); + } + + /** + * Get a serializer for DBIDs. * * @return DBID serializer */ - public ByteBufferSerializer<DBID> getDBIDSerializer() { + public static ByteBufferSerializer<DBID> getDBIDSerializer() { return DBIDFactory.FACTORY.getDBIDSerializer(); } /** - * Get a serializer for DBIDs with static size + * Get a serializer for DBIDs with static size. * * @return DBID serializer */ - public ByteBufferSerializer<DBID> getDBIDSerializerStatic() { + public static ByteBufferSerializer<DBID> getDBIDSerializerStatic() { return DBIDFactory.FACTORY.getDBIDSerializerStatic(); } /** - * Generate a single DBID + * Generate a single DBID. * * @return A single DBID */ @@ -116,6 +214,25 @@ public final class DBIDUtil { } /** + * Make a new DBID variable. + * + * @param val Initial value. + * @return Variable + */ + public static DBIDVar newVar(DBIDRef val) { + return DBIDFactory.FACTORY.newVar(val); + } + + /** + * Make a new DBID variable. + * + * @return Variable + */ + public static DBIDVar newVar() { + return DBIDFactory.FACTORY.newVar(DBIDFactory.FACTORY.invalid()); + } + + /** * Make a new (modifiable) array of DBIDs. * * @return New array @@ -180,14 +297,14 @@ public final class DBIDUtil { * @param second Second set * @return result. */ - // TODO: optimize? + // TODO: optimize better? public static ModifiableDBIDs intersection(DBIDs first, DBIDs second) { - if(first.size() > second.size()) { + if (first.size() > second.size()) { return intersection(second, first); } ModifiableDBIDs inter = newHashSet(first.size()); - for(DBIDIter it = first.iter(); it.valid(); it.advance()) { - if(second.contains(it)) { + for (DBIDIter it = first.iter(); it.valid(); it.advance()) { + if (second.contains(it)) { inter.add(it); } } @@ -195,6 +312,26 @@ public final class DBIDUtil { } /** + * Compute the set intersection size of two sets. + * + * @param first First set + * @param second Second set + * @return size + */ + public static int intersectionSize(DBIDs first, DBIDs second) { + if (first.size() > second.size()) { + return intersectionSize(second, first); + } + int c = 0; + for (DBIDIter it = first.iter(); it.valid(); it.advance()) { + if (second.contains(it)) { + c++; + } + } + return c; + } + + /** * Compute the set symmetric intersection of two sets. * * @param first First set @@ -205,18 +342,18 @@ public final class DBIDUtil { */ // TODO: optimize? public static void symmetricIntersection(DBIDs first, DBIDs second, HashSetModifiableDBIDs firstonly, HashSetModifiableDBIDs intersection, HashSetModifiableDBIDs secondonly) { - if(first.size() > second.size()) { + if (first.size() > second.size()) { symmetricIntersection(second, first, secondonly, intersection, firstonly); return; } - assert(firstonly.size() == 0) : "OUTPUT set should be empty!"; - assert(intersection.size() == 0) : "OUTPUT set should be empty!"; - assert(secondonly.size() == 0) : "OUTPUT set should be empty!"; + assert (firstonly.size() == 0) : "OUTPUT set should be empty!"; + assert (intersection.size() == 0) : "OUTPUT set should be empty!"; + assert (secondonly.size() == 0) : "OUTPUT set should be empty!"; // Initialize with second secondonly.addDBIDs(second); - for(DBIDIter it = first.iter(); it.valid(); it.advance()) { + for (DBIDIter it = first.iter(); it.valid(); it.advance()) { // Try to remove - if(secondonly.remove(it)) { + if (secondonly.remove(it)) { intersection.add(it); } else { firstonly.add(it); @@ -258,27 +395,31 @@ public final class DBIDUtil { * @return Unmodifiable collection */ public static StaticDBIDs makeUnmodifiable(DBIDs existing) { - if(existing instanceof StaticDBIDs) { + if (existing instanceof StaticDBIDs) { return (StaticDBIDs) existing; } + if (existing instanceof TroveArrayDBIDs) { + return new UnmodifiableIntegerArrayDBIDs((TroveArrayDBIDs) existing); + } + if (existing instanceof IntegerDBIDs) { + return new UnmodifiableIntegerDBIDs((IntegerDBIDs) existing); + } if (existing instanceof ArrayDBIDs) { - return new UnmodifiableArrayDBIDs((ArrayDBIDs)existing); - } else { - return new UnmodifiableDBIDs(existing); + return new UnmodifiableArrayDBIDs((ArrayDBIDs) existing); } + return new UnmodifiableDBIDs(existing); } /** * Ensure that the given DBIDs are array-indexable. * - * @param ids + * @param ids IDs * @return Array DBIDs. */ public static ArrayDBIDs ensureArray(DBIDs ids) { - if(ids instanceof ArrayDBIDs) { + if (ids instanceof ArrayDBIDs) { return (ArrayDBIDs) ids; - } - else { + } else { return newArray(ids); } } @@ -286,33 +427,31 @@ public final class DBIDUtil { /** * Ensure that the given DBIDs support fast "contains" operations. * - * @param ids - * @return Array DBIDs. + * @param ids IDs + * @return Set DBIDs. */ public static SetDBIDs ensureSet(DBIDs ids) { - if(ids instanceof SetDBIDs) { + if (ids instanceof SetDBIDs) { return (SetDBIDs) ids; - } - else { + } else { return newHashSet(ids); } } /** - * Ensure modifiable + * Ensure modifiable. * - * @param ids - * @return Array DBIDs. + * @param ids IDs + * @return Modifiable DBIDs. */ public static ModifiableDBIDs ensureModifiable(DBIDs ids) { - if(ids instanceof ModifiableDBIDs) { + if (ids instanceof ModifiableDBIDs) { return (ModifiableDBIDs) ids; - } - else { - if(ids instanceof ArrayDBIDs) { + } else { + if (ids instanceof ArrayDBIDs) { return newArray(ids); } - if(ids instanceof HashSetDBIDs) { + if (ids instanceof HashSetDBIDs) { return newHashSet(ids); } return newArray(ids); @@ -328,11 +467,91 @@ public final class DBIDUtil { * @return DBID pair */ public static DBIDPair newPair(DBIDRef id1, DBIDRef id2) { - return DBIDFactory.FACTORY.makePair(id1, id2); + return DBIDFactory.FACTORY.newPair(id1, id2); + } + + /** + * Make a DoubleDBIDPair. + * + * @param val double value + * @param id ID + * @return new pair + */ + public static DoubleDBIDPair newPair(double val, DBIDRef id) { + return DBIDFactory.FACTORY.newPair(val, id); + } + + /** + * Make a DistanceDBIDPair. + * + * @param dist Distance value + * @param id ID + * @return new pair + */ + public static <D extends Distance<D>> DistanceDBIDPair<D> newDistancePair(D dist, DBIDRef id) { + return DBIDFactory.FACTORY.newDistancePair(dist, id); + } + + /** + * Make a DoubleDistanceDBIDPair. + * + * @param dist Distance value + * @param id ID + * @return new pair + */ + public static DoubleDistanceDBIDPair newDistancePair(double dist, DBIDRef id) { + return DBIDFactory.FACTORY.newDistancePair(dist, id); } /** - * Produce a random sample of the given DBIDs + * Produce a random sample of the given DBIDs. + * + * @param source Original DBIDs + * @param k k Parameter + * @param rnd Random generator + * @return new DBIDs + */ + public static ModifiableDBIDs randomSample(DBIDs source, int k, RandomFactory rnd) { + return randomSample(source, k, rnd.getRandom()); + } + + /** + * Produce a random shuffling of the given DBID array. + * + * @param ids Original DBIDs + * @param rnd Random generator + */ + public static void randomShuffle(ArrayModifiableDBIDs ids, RandomFactory rnd) { + randomShuffle(ids, rnd.getRandom(), ids.size()); + } + + /** + * Produce a random shuffling of the given DBID array. + * + * @param ids Original DBIDs + * @param random Random generator + */ + public static void randomShuffle(ArrayModifiableDBIDs ids, Random random) { + randomShuffle(ids, random, ids.size()); + } + + /** + * Produce a random shuffling of the given DBID array. + * + * Only the first {@code limit} elements will be randomized. + * + * @param ids Original DBIDs + * @param random Random generator + * @param limit Shuffling limit. + */ + public static void randomShuffle(ArrayModifiableDBIDs ids, Random random, final int limit) { + for (int i = 1; i < limit; i++) { + ids.swap(i - 1, i + random.nextInt(limit - i)); + } + } + + /** + * Produce a random sample of the given DBIDs. * * @param source Original DBIDs * @param k k Parameter @@ -340,11 +559,11 @@ public final class DBIDUtil { * @return new DBIDs */ public static ModifiableDBIDs randomSample(DBIDs source, int k, int seed) { - return randomSample(source, k, (long) seed); + return randomSample(source, k, new Random(seed)); } /** - * Produce a random sample of the given DBIDs + * Produce a random sample of the given DBIDs. * * @param source Original DBIDs * @param k k Parameter @@ -352,39 +571,47 @@ public final class DBIDUtil { * @return new DBIDs */ public static ModifiableDBIDs randomSample(DBIDs source, int k, Long seed) { - if(k <= 0 || k > source.size()) { - throw new IllegalArgumentException("Illegal value for size of random sample: " + k+ " > "+source.size()+" or < 0"); + if (seed != null) { + return randomSample(source, k, new Random(seed.longValue())); + } else { + return randomSample(source, k, new Random()); } - final Random random; - if(seed != null) { - random = new Random(seed); + } + + /** + * Produce a random sample of the given DBIDs. + * + * @param source Original DBIDs + * @param k k Parameter + * @param random Random generator + * @return new DBIDs + */ + public static ModifiableDBIDs randomSample(DBIDs source, int k, Random random) { + if (k <= 0 || k > source.size()) { + throw new IllegalArgumentException("Illegal value for size of random sample: " + k + " > " + source.size() + " or < 0"); } - else { + if (random == null) { random = new Random(); } // TODO: better balancing for different sizes // Two methods: constructive vs. destructive - if(k < source.size() / 2) { + if (k < source.size() >> 1) { ArrayDBIDs aids = DBIDUtil.ensureArray(source); + DBIDArrayIter iter = aids.iter(); HashSetModifiableDBIDs sample = DBIDUtil.newHashSet(k); - while(sample.size() < k) { - sample.add(aids.get(random.nextInt(aids.size()))); + while (sample.size() < k) { + iter.seek(random.nextInt(aids.size())); + sample.add(iter); } return sample; - } - else { + } else { ArrayModifiableDBIDs sample = DBIDUtil.newArray(source); - while(sample.size() > k) { - // Element to remove - int idx = random.nextInt(sample.size()); - // Remove last element - DBID last = sample.remove(sample.size() - 1); - // Replace target element: - if(idx < sample.size()) { - sample.set(idx, last); - } + randomShuffle(sample, random, k); + // Delete trailing elements + for (int i = sample.size() - 1; i > k; i--) { + sample.remove(i); } return sample; } } -}
\ No newline at end of file +} |