summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/database/ids/DBIDUtil.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/database/ids/DBIDUtil.java')
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/ids/DBIDUtil.java355
1 files changed, 291 insertions, 64 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/database/ids/DBIDUtil.java b/src/de/lmu/ifi/dbs/elki/database/ids/DBIDUtil.java
index 000659b9..9cb4082a 100644
--- a/src/de/lmu/ifi/dbs/elki/database/ids/DBIDUtil.java
+++ b/src/de/lmu/ifi/dbs/elki/database/ids/DBIDUtil.java
@@ -27,7 +27,13 @@ import java.util.Random;
import de.lmu.ifi.dbs.elki.database.ids.generic.UnmodifiableArrayDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.generic.UnmodifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.integer.IntegerDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.integer.TroveArrayDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.integer.UnmodifiableIntegerArrayDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.integer.UnmodifiableIntegerDBIDs;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.persistent.ByteBufferSerializer;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
/**
* DBID Utility functions.
@@ -35,7 +41,11 @@ import de.lmu.ifi.dbs.elki.persistent.ByteBufferSerializer;
* @author Erich Schubert
*
* @apiviz.landmark
- * @apiviz.composedOf de.lmu.ifi.dbs.elki.database.ids.DBIDFactory
+ *
+ * @apiviz.has DBID
+ * @apiviz.has DBIDs
+ * @apiviz.uses DBIDRef
+ * @apiviz.composedOf DBIDFactory
*/
public final class DBIDUtil {
/**
@@ -51,9 +61,20 @@ public final class DBIDUtil {
public static final EmptyDBIDs EMPTYDBIDS = new EmptyDBIDs();
/**
- * Import an Integer DBID.
+ * Get the invalid special ID.
+ *
+ * @return invalid ID value
+ */
+ public static DBIDRef invalid() {
+ return DBIDFactory.FACTORY.invalid();
+ }
+
+ /**
+ * Import and integer as DBID.
*
- * @param id Integer ID
+ * Note: this may not be possible for some factories!
+ *
+ * @param id Integer ID to import
* @return DBID
*/
public static DBID importInteger(int id) {
@@ -61,25 +82,102 @@ public final class DBIDUtil {
}
/**
- * Get a serializer for DBIDs
+ * Export a DBID as int.
+ *
+ * Note: this may not be possible for some factories!
+ *
+ * @param id DBID to export
+ * @return integer value
+ */
+ public static int asInteger(DBIDRef id) {
+ return id.internalGetIndex();
+ }
+
+ /**
+ * Compare two DBIDs.
+ *
+ * @param id1 First ID
+ * @param id2 Second ID
+ * @return Comparison result
+ */
+ public static int compare(DBIDRef id1, DBIDRef id2) {
+ return DBIDFactory.FACTORY.compare(id1, id2);
+ }
+
+ /**
+ * Test two DBIDs for equality.
+ *
+ * @param id1 First ID
+ * @param id2 Second ID
+ * @return Comparison result
+ */
+ public static boolean equal(DBIDRef id1, DBIDRef id2) {
+ return DBIDFactory.FACTORY.equal(id1, id2);
+ }
+
+ /**
+ * Dereference a DBID reference.
+ *
+ * @param ref DBID reference
+ * @return DBID
+ */
+ public static DBID deref(DBIDRef ref) {
+ if (ref instanceof DBID) {
+ return (DBID) ref;
+ }
+ return importInteger(ref.internalGetIndex());
+ }
+
+ /**
+ * Format a DBID as string.
+ *
+ * @param id DBID
+ * @return String representation
+ */
+ public static String toString(DBIDRef id) {
+ return DBIDFactory.FACTORY.toString(id);
+ }
+
+ /**
+ * Format a DBID as string.
+ *
+ * @param ids DBIDs
+ * @return String representation
+ */
+ public static String toString(DBIDs ids) {
+ if (ids instanceof DBID) {
+ return DBIDFactory.FACTORY.toString((DBID) ids);
+ }
+ StringBuilder buf = new StringBuilder();
+ for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ if (buf.length() > 0) {
+ buf.append(", ");
+ }
+ buf.append(DBIDFactory.FACTORY.toString(iter));
+ }
+ return buf.toString();
+ }
+
+ /**
+ * Get a serializer for DBIDs.
*
* @return DBID serializer
*/
- public ByteBufferSerializer<DBID> getDBIDSerializer() {
+ public static ByteBufferSerializer<DBID> getDBIDSerializer() {
return DBIDFactory.FACTORY.getDBIDSerializer();
}
/**
- * Get a serializer for DBIDs with static size
+ * Get a serializer for DBIDs with static size.
*
* @return DBID serializer
*/
- public ByteBufferSerializer<DBID> getDBIDSerializerStatic() {
+ public static ByteBufferSerializer<DBID> getDBIDSerializerStatic() {
return DBIDFactory.FACTORY.getDBIDSerializerStatic();
}
/**
- * Generate a single DBID
+ * Generate a single DBID.
*
* @return A single DBID
*/
@@ -116,6 +214,25 @@ public final class DBIDUtil {
}
/**
+ * Make a new DBID variable.
+ *
+ * @param val Initial value.
+ * @return Variable
+ */
+ public static DBIDVar newVar(DBIDRef val) {
+ return DBIDFactory.FACTORY.newVar(val);
+ }
+
+ /**
+ * Make a new DBID variable.
+ *
+ * @return Variable
+ */
+ public static DBIDVar newVar() {
+ return DBIDFactory.FACTORY.newVar(DBIDFactory.FACTORY.invalid());
+ }
+
+ /**
* Make a new (modifiable) array of DBIDs.
*
* @return New array
@@ -180,14 +297,14 @@ public final class DBIDUtil {
* @param second Second set
* @return result.
*/
- // TODO: optimize?
+ // TODO: optimize better?
public static ModifiableDBIDs intersection(DBIDs first, DBIDs second) {
- if(first.size() > second.size()) {
+ if (first.size() > second.size()) {
return intersection(second, first);
}
ModifiableDBIDs inter = newHashSet(first.size());
- for(DBIDIter it = first.iter(); it.valid(); it.advance()) {
- if(second.contains(it)) {
+ for (DBIDIter it = first.iter(); it.valid(); it.advance()) {
+ if (second.contains(it)) {
inter.add(it);
}
}
@@ -195,6 +312,26 @@ public final class DBIDUtil {
}
/**
+ * Compute the set intersection size of two sets.
+ *
+ * @param first First set
+ * @param second Second set
+ * @return size
+ */
+ public static int intersectionSize(DBIDs first, DBIDs second) {
+ if (first.size() > second.size()) {
+ return intersectionSize(second, first);
+ }
+ int c = 0;
+ for (DBIDIter it = first.iter(); it.valid(); it.advance()) {
+ if (second.contains(it)) {
+ c++;
+ }
+ }
+ return c;
+ }
+
+ /**
* Compute the set symmetric intersection of two sets.
*
* @param first First set
@@ -205,18 +342,18 @@ public final class DBIDUtil {
*/
// TODO: optimize?
public static void symmetricIntersection(DBIDs first, DBIDs second, HashSetModifiableDBIDs firstonly, HashSetModifiableDBIDs intersection, HashSetModifiableDBIDs secondonly) {
- if(first.size() > second.size()) {
+ if (first.size() > second.size()) {
symmetricIntersection(second, first, secondonly, intersection, firstonly);
return;
}
- assert(firstonly.size() == 0) : "OUTPUT set should be empty!";
- assert(intersection.size() == 0) : "OUTPUT set should be empty!";
- assert(secondonly.size() == 0) : "OUTPUT set should be empty!";
+ assert (firstonly.size() == 0) : "OUTPUT set should be empty!";
+ assert (intersection.size() == 0) : "OUTPUT set should be empty!";
+ assert (secondonly.size() == 0) : "OUTPUT set should be empty!";
// Initialize with second
secondonly.addDBIDs(second);
- for(DBIDIter it = first.iter(); it.valid(); it.advance()) {
+ for (DBIDIter it = first.iter(); it.valid(); it.advance()) {
// Try to remove
- if(secondonly.remove(it)) {
+ if (secondonly.remove(it)) {
intersection.add(it);
} else {
firstonly.add(it);
@@ -258,27 +395,31 @@ public final class DBIDUtil {
* @return Unmodifiable collection
*/
public static StaticDBIDs makeUnmodifiable(DBIDs existing) {
- if(existing instanceof StaticDBIDs) {
+ if (existing instanceof StaticDBIDs) {
return (StaticDBIDs) existing;
}
+ if (existing instanceof TroveArrayDBIDs) {
+ return new UnmodifiableIntegerArrayDBIDs((TroveArrayDBIDs) existing);
+ }
+ if (existing instanceof IntegerDBIDs) {
+ return new UnmodifiableIntegerDBIDs((IntegerDBIDs) existing);
+ }
if (existing instanceof ArrayDBIDs) {
- return new UnmodifiableArrayDBIDs((ArrayDBIDs)existing);
- } else {
- return new UnmodifiableDBIDs(existing);
+ return new UnmodifiableArrayDBIDs((ArrayDBIDs) existing);
}
+ return new UnmodifiableDBIDs(existing);
}
/**
* Ensure that the given DBIDs are array-indexable.
*
- * @param ids
+ * @param ids IDs
* @return Array DBIDs.
*/
public static ArrayDBIDs ensureArray(DBIDs ids) {
- if(ids instanceof ArrayDBIDs) {
+ if (ids instanceof ArrayDBIDs) {
return (ArrayDBIDs) ids;
- }
- else {
+ } else {
return newArray(ids);
}
}
@@ -286,33 +427,31 @@ public final class DBIDUtil {
/**
* Ensure that the given DBIDs support fast "contains" operations.
*
- * @param ids
- * @return Array DBIDs.
+ * @param ids IDs
+ * @return Set DBIDs.
*/
public static SetDBIDs ensureSet(DBIDs ids) {
- if(ids instanceof SetDBIDs) {
+ if (ids instanceof SetDBIDs) {
return (SetDBIDs) ids;
- }
- else {
+ } else {
return newHashSet(ids);
}
}
/**
- * Ensure modifiable
+ * Ensure modifiable.
*
- * @param ids
- * @return Array DBIDs.
+ * @param ids IDs
+ * @return Modifiable DBIDs.
*/
public static ModifiableDBIDs ensureModifiable(DBIDs ids) {
- if(ids instanceof ModifiableDBIDs) {
+ if (ids instanceof ModifiableDBIDs) {
return (ModifiableDBIDs) ids;
- }
- else {
- if(ids instanceof ArrayDBIDs) {
+ } else {
+ if (ids instanceof ArrayDBIDs) {
return newArray(ids);
}
- if(ids instanceof HashSetDBIDs) {
+ if (ids instanceof HashSetDBIDs) {
return newHashSet(ids);
}
return newArray(ids);
@@ -328,11 +467,91 @@ public final class DBIDUtil {
* @return DBID pair
*/
public static DBIDPair newPair(DBIDRef id1, DBIDRef id2) {
- return DBIDFactory.FACTORY.makePair(id1, id2);
+ return DBIDFactory.FACTORY.newPair(id1, id2);
+ }
+
+ /**
+ * Make a DoubleDBIDPair.
+ *
+ * @param val double value
+ * @param id ID
+ * @return new pair
+ */
+ public static DoubleDBIDPair newPair(double val, DBIDRef id) {
+ return DBIDFactory.FACTORY.newPair(val, id);
+ }
+
+ /**
+ * Make a DistanceDBIDPair.
+ *
+ * @param dist Distance value
+ * @param id ID
+ * @return new pair
+ */
+ public static <D extends Distance<D>> DistanceDBIDPair<D> newDistancePair(D dist, DBIDRef id) {
+ return DBIDFactory.FACTORY.newDistancePair(dist, id);
+ }
+
+ /**
+ * Make a DoubleDistanceDBIDPair.
+ *
+ * @param dist Distance value
+ * @param id ID
+ * @return new pair
+ */
+ public static DoubleDistanceDBIDPair newDistancePair(double dist, DBIDRef id) {
+ return DBIDFactory.FACTORY.newDistancePair(dist, id);
}
/**
- * Produce a random sample of the given DBIDs
+ * Produce a random sample of the given DBIDs.
+ *
+ * @param source Original DBIDs
+ * @param k k Parameter
+ * @param rnd Random generator
+ * @return new DBIDs
+ */
+ public static ModifiableDBIDs randomSample(DBIDs source, int k, RandomFactory rnd) {
+ return randomSample(source, k, rnd.getRandom());
+ }
+
+ /**
+ * Produce a random shuffling of the given DBID array.
+ *
+ * @param ids Original DBIDs
+ * @param rnd Random generator
+ */
+ public static void randomShuffle(ArrayModifiableDBIDs ids, RandomFactory rnd) {
+ randomShuffle(ids, rnd.getRandom(), ids.size());
+ }
+
+ /**
+ * Produce a random shuffling of the given DBID array.
+ *
+ * @param ids Original DBIDs
+ * @param random Random generator
+ */
+ public static void randomShuffle(ArrayModifiableDBIDs ids, Random random) {
+ randomShuffle(ids, random, ids.size());
+ }
+
+ /**
+ * Produce a random shuffling of the given DBID array.
+ *
+ * Only the first {@code limit} elements will be randomized.
+ *
+ * @param ids Original DBIDs
+ * @param random Random generator
+ * @param limit Shuffling limit.
+ */
+ public static void randomShuffle(ArrayModifiableDBIDs ids, Random random, final int limit) {
+ for (int i = 1; i < limit; i++) {
+ ids.swap(i - 1, i + random.nextInt(limit - i));
+ }
+ }
+
+ /**
+ * Produce a random sample of the given DBIDs.
*
* @param source Original DBIDs
* @param k k Parameter
@@ -340,11 +559,11 @@ public final class DBIDUtil {
* @return new DBIDs
*/
public static ModifiableDBIDs randomSample(DBIDs source, int k, int seed) {
- return randomSample(source, k, (long) seed);
+ return randomSample(source, k, new Random(seed));
}
/**
- * Produce a random sample of the given DBIDs
+ * Produce a random sample of the given DBIDs.
*
* @param source Original DBIDs
* @param k k Parameter
@@ -352,39 +571,47 @@ public final class DBIDUtil {
* @return new DBIDs
*/
public static ModifiableDBIDs randomSample(DBIDs source, int k, Long seed) {
- if(k <= 0 || k > source.size()) {
- throw new IllegalArgumentException("Illegal value for size of random sample: " + k+ " > "+source.size()+" or < 0");
+ if (seed != null) {
+ return randomSample(source, k, new Random(seed.longValue()));
+ } else {
+ return randomSample(source, k, new Random());
}
- final Random random;
- if(seed != null) {
- random = new Random(seed);
+ }
+
+ /**
+ * Produce a random sample of the given DBIDs.
+ *
+ * @param source Original DBIDs
+ * @param k k Parameter
+ * @param random Random generator
+ * @return new DBIDs
+ */
+ public static ModifiableDBIDs randomSample(DBIDs source, int k, Random random) {
+ if (k <= 0 || k > source.size()) {
+ throw new IllegalArgumentException("Illegal value for size of random sample: " + k + " > " + source.size() + " or < 0");
}
- else {
+ if (random == null) {
random = new Random();
}
// TODO: better balancing for different sizes
// Two methods: constructive vs. destructive
- if(k < source.size() / 2) {
+ if (k < source.size() >> 1) {
ArrayDBIDs aids = DBIDUtil.ensureArray(source);
+ DBIDArrayIter iter = aids.iter();
HashSetModifiableDBIDs sample = DBIDUtil.newHashSet(k);
- while(sample.size() < k) {
- sample.add(aids.get(random.nextInt(aids.size())));
+ while (sample.size() < k) {
+ iter.seek(random.nextInt(aids.size()));
+ sample.add(iter);
}
return sample;
- }
- else {
+ } else {
ArrayModifiableDBIDs sample = DBIDUtil.newArray(source);
- while(sample.size() > k) {
- // Element to remove
- int idx = random.nextInt(sample.size());
- // Remove last element
- DBID last = sample.remove(sample.size() - 1);
- // Replace target element:
- if(idx < sample.size()) {
- sample.set(idx, last);
- }
+ randomShuffle(sample, random, k);
+ // Delete trailing elements
+ for (int i = sample.size() - 1; i > k; i--) {
+ sample.remove(i);
}
return sample;
}
}
-} \ No newline at end of file
+}