diff options
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/database/ids/DBIDUtil.java')
-rw-r--r-- | src/de/lmu/ifi/dbs/elki/database/ids/DBIDUtil.java | 210 |
1 files changed, 105 insertions, 105 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/database/ids/DBIDUtil.java b/src/de/lmu/ifi/dbs/elki/database/ids/DBIDUtil.java index c60e63ae..9c39fc77 100644 --- a/src/de/lmu/ifi/dbs/elki/database/ids/DBIDUtil.java +++ b/src/de/lmu/ifi/dbs/elki/database/ids/DBIDUtil.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.database.ids; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -25,24 +25,19 @@ package de.lmu.ifi.dbs.elki.database.ids; import java.util.Random; -import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDPair; -import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDPair; -import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNHeap; -import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList; -import de.lmu.ifi.dbs.elki.database.ids.distance.KNNHeap; -import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList; -import de.lmu.ifi.dbs.elki.database.ids.generic.DoubleDistanceKNNSubList; import de.lmu.ifi.dbs.elki.database.ids.generic.KNNSubList; import de.lmu.ifi.dbs.elki.database.ids.generic.UnmodifiableArrayDBIDs; import de.lmu.ifi.dbs.elki.database.ids.generic.UnmodifiableDBIDs; import de.lmu.ifi.dbs.elki.database.ids.integer.IntegerArrayDBIDs; +import de.lmu.ifi.dbs.elki.database.ids.integer.IntegerDBIDKNNList; +import de.lmu.ifi.dbs.elki.database.ids.integer.IntegerDBIDKNNSubList; import de.lmu.ifi.dbs.elki.database.ids.integer.IntegerDBIDs; import de.lmu.ifi.dbs.elki.database.ids.integer.UnmodifiableIntegerArrayDBIDs; import de.lmu.ifi.dbs.elki.database.ids.integer.UnmodifiableIntegerDBIDs; -import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; -import de.lmu.ifi.dbs.elki.persistent.ByteBufferSerializer; -import de.lmu.ifi.dbs.elki.utilities.RandomFactory; -import de.lmu.ifi.dbs.elki.utilities.UnsafeRandom; +import de.lmu.ifi.dbs.elki.math.random.FastNonThreadsafeRandom; +import de.lmu.ifi.dbs.elki.math.random.RandomFactory; +import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; +import de.lmu.ifi.dbs.elki.utilities.io.ByteBufferSerializer; /** * DBID Utility functions. @@ -51,9 +46,8 @@ import de.lmu.ifi.dbs.elki.utilities.UnsafeRandom; * * @apiviz.landmark * - * @apiviz.has DBID * @apiviz.has DBIDs - * @apiviz.uses DBIDRef + * @apiviz.has DBIDRef * @apiviz.composedOf DBIDFactory */ public final class DBIDUtil { @@ -131,7 +125,7 @@ public final class DBIDUtil { * @return DBID */ public static DBID deref(DBIDRef ref) { - if (ref instanceof DBID) { + if(ref instanceof DBID) { return (DBID) ref; } return importInteger(ref.internalGetIndex()); @@ -154,12 +148,12 @@ public final class DBIDUtil { * @return String representation */ public static String toString(DBIDs ids) { - if (ids instanceof DBID) { + if(ids instanceof DBID) { return DBIDFactory.FACTORY.toString((DBID) ids); } StringBuilder buf = new StringBuilder(); - for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { - if (buf.length() > 0) { + for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { + if(buf.length() > 0) { buf.append(", "); } buf.append(DBIDFactory.FACTORY.toString(iter)); @@ -308,12 +302,12 @@ public final class DBIDUtil { */ // TODO: optimize better? public static ModifiableDBIDs intersection(DBIDs first, DBIDs second) { - if (first.size() > second.size()) { + if(first.size() > second.size()) { return intersection(second, first); } ModifiableDBIDs inter = newHashSet(first.size()); - for (DBIDIter it = first.iter(); it.valid(); it.advance()) { - if (second.contains(it)) { + for(DBIDIter it = first.iter(); it.valid(); it.advance()) { + if(second.contains(it)) { inter.add(it); } } @@ -329,20 +323,22 @@ public final class DBIDUtil { */ public static int intersectionSize(DBIDs first, DBIDs second) { // If exactly one is a Set, use it as second parameter. - if (second instanceof SetDBIDs) { - if (!(first instanceof SetDBIDs)) { + if(second instanceof SetDBIDs) { + if(!(first instanceof SetDBIDs)) { return internalIntersectionSize(first, second); } - } else { - if (first instanceof SetDBIDs) { + } + else { + if(first instanceof SetDBIDs) { return internalIntersectionSize(second, first); } } // Both are the same type: both set or both non set. // Smaller goes first. - if (first.size() <= second.size()) { + if(first.size() <= second.size()) { return internalIntersectionSize(first, second); - } else { + } + else { return internalIntersectionSize(second, first); } } @@ -356,8 +352,8 @@ public final class DBIDUtil { */ private static int internalIntersectionSize(DBIDs first, DBIDs second) { int c = 0; - for (DBIDIter it = first.iter(); it.valid(); it.advance()) { - if (second.contains(it)) { + for(DBIDIter it = first.iter(); it.valid(); it.advance()) { + if(second.contains(it)) { c++; } } @@ -375,7 +371,7 @@ public final class DBIDUtil { */ // TODO: optimize? public static void symmetricIntersection(DBIDs first, DBIDs second, HashSetModifiableDBIDs firstonly, HashSetModifiableDBIDs intersection, HashSetModifiableDBIDs secondonly) { - if (first.size() > second.size()) { + if(first.size() > second.size()) { symmetricIntersection(second, first, secondonly, intersection, firstonly); return; } @@ -384,11 +380,12 @@ public final class DBIDUtil { assert (secondonly.size() == 0) : "OUTPUT set should be empty!"; // Initialize with second secondonly.addDBIDs(second); - for (DBIDIter it = first.iter(); it.valid(); it.advance()) { + for(DBIDIter it = first.iter(); it.valid(); it.advance()) { // Try to remove - if (secondonly.remove(it)) { + if(secondonly.remove(it)) { intersection.add(it); - } else { + } + else { firstonly.add(it); } } @@ -428,16 +425,16 @@ public final class DBIDUtil { * @return Unmodifiable collection */ public static StaticDBIDs makeUnmodifiable(DBIDs existing) { - if (existing instanceof StaticDBIDs) { + if(existing instanceof StaticDBIDs) { return (StaticDBIDs) existing; } - if (existing instanceof IntegerArrayDBIDs) { + if(existing instanceof IntegerArrayDBIDs) { return new UnmodifiableIntegerArrayDBIDs((IntegerArrayDBIDs) existing); } - if (existing instanceof IntegerDBIDs) { + if(existing instanceof IntegerDBIDs) { return new UnmodifiableIntegerDBIDs((IntegerDBIDs) existing); } - if (existing instanceof ArrayDBIDs) { + if(existing instanceof ArrayDBIDs) { return new UnmodifiableArrayDBIDs((ArrayDBIDs) existing); } return new UnmodifiableDBIDs(existing); @@ -450,9 +447,10 @@ public final class DBIDUtil { * @return Array DBIDs. */ public static ArrayDBIDs ensureArray(DBIDs ids) { - if (ids instanceof ArrayDBIDs) { + if(ids instanceof ArrayDBIDs) { return (ArrayDBIDs) ids; - } else { + } + else { return newArray(ids); } } @@ -464,9 +462,10 @@ public final class DBIDUtil { * @return Set DBIDs. */ public static SetDBIDs ensureSet(DBIDs ids) { - if (ids instanceof SetDBIDs) { + if(ids instanceof SetDBIDs) { return (SetDBIDs) ids; - } else { + } + else { return newHashSet(ids); } } @@ -478,13 +477,14 @@ public final class DBIDUtil { * @return Modifiable DBIDs. */ public static ModifiableDBIDs ensureModifiable(DBIDs ids) { - if (ids instanceof ModifiableDBIDs) { + if(ids instanceof ModifiableDBIDs) { return (ModifiableDBIDs) ids; - } else { - if (ids instanceof ArrayDBIDs) { + } + else { + if(ids instanceof ArrayDBIDs) { return newArray(ids); } - if (ids instanceof HashSetDBIDs) { + if(ids instanceof HashSetDBIDs) { return newHashSet(ids); } return newArray(ids); @@ -515,59 +515,24 @@ public final class DBIDUtil { } /** - * Make a DistanceDBIDPair. - * - * @param dist Distance value - * @param id ID - * @return new pair - */ - public static <D extends Distance<D>> DistanceDBIDPair<D> newDistancePair(D dist, DBIDRef id) { - return DBIDFactory.FACTORY.newDistancePair(dist, id); - } - - /** - * Make a DoubleDistanceDBIDPair. - * - * @param dist Distance value - * @param id ID - * @return new pair - */ - public static DoubleDistanceDBIDPair newDistancePair(double dist, DBIDRef id) { - return DBIDFactory.FACTORY.newDistancePair(dist, id); - } - - /** * Create an appropriate heap for the distance type. * * This will use a double heap if appropriate. * - * @param distancetype distance prototype - * @param k K value - * @param <D> distance type - * @return New heap of size k, appropriate for this distance type. - */ - public static <D extends Distance<D>> KNNHeap<D> newHeap(D distancetype, int k) { - return DBIDFactory.FACTORY.newHeap(distancetype, k); - } - - /** - * Create an appropriate heap for double distances. - * * @param k K value * @return New heap of size k, appropriate for this distance type. */ - public static DoubleDistanceKNNHeap newDoubleDistanceHeap(int k) { - return DBIDFactory.FACTORY.newDoubleDistanceHeap(k); + public static KNNHeap newHeap(int k) { + return DBIDFactory.FACTORY.newHeap(k); } /** * Build a new heap from a given list. * * @param exist Existing result - * @param <D> Distance type * @return New heap */ - public static <D extends Distance<D>> KNNHeap<D> newHeap(KNNList<D> exist) { + public static KNNHeap newHeap(KNNList exist) { return DBIDFactory.FACTORY.newHeap(exist); } @@ -601,7 +566,7 @@ public final class DBIDUtil { * @param limit Shuffling limit. */ public static void randomShuffle(ArrayModifiableDBIDs ids, Random random, final int limit) { - for (int i = 1; i < limit; i++) { + for(int i = 1; i < limit; i++) { ids.swap(i - 1, i + random.nextInt(limit - i)); } } @@ -627,9 +592,10 @@ public final class DBIDUtil { * @return new DBIDs */ public static ModifiableDBIDs randomSample(DBIDs source, int k, Long seed) { - if (seed != null) { + if(seed != null) { return randomSample(source, k, new Random(seed.longValue())); - } else { + } + else { return randomSample(source, k, new Random()); } } @@ -655,29 +621,31 @@ public final class DBIDUtil { * @return new DBIDs */ public static ModifiableDBIDs randomSample(DBIDs source, int k, Random random) { - if (k < 0 || k > source.size()) { + if(k < 0 || k > source.size()) { throw new IllegalArgumentException("Illegal value for size of random sample: " + k + " > " + source.size() + " or < 0"); } - if (random == null) { - random = new UnsafeRandom(); // Fast, and we're single-threaded here anyway. + if(random == null) { + // Fast, and we're single-threaded here anyway. + random = new FastNonThreadsafeRandom(); } - + // TODO: better balancing for different sizes // Two methods: constructive vs. destructive - if (k < source.size() >> 1) { + if(k < source.size() >> 1) { ArrayDBIDs aids = DBIDUtil.ensureArray(source); DBIDArrayIter iter = aids.iter(); HashSetModifiableDBIDs sample = DBIDUtil.newHashSet(k); - while (sample.size() < k) { + while(sample.size() < k) { iter.seek(random.nextInt(aids.size())); sample.add(iter); } return sample; - } else { + } + else { ArrayModifiableDBIDs sample = DBIDUtil.newArray(source); randomShuffle(sample, random, k); // Delete trailing elements - for (int i = sample.size() - 1; i > k; i--) { + for(int i = sample.size() - 1; i > k; i--) { sample.remove(i); } return sample; @@ -703,19 +671,20 @@ public final class DBIDUtil { * @param random Random generator */ public static ArrayDBIDs[] randomSplit(DBIDs oids, int p, Random random) { - if (random == null) { - random = new UnsafeRandom(); // Fast, and we're single-threaded here anyway. + if(random == null) { + // Fast, and we're single-threaded here anyway. + random = new FastNonThreadsafeRandom(); } ArrayModifiableDBIDs ids = newArray(oids); final int size = ids.size(); ArrayDBIDs[] split = new ArrayDBIDs[p]; // Shuffle - for (int i = 1; i < size; i++) { + for(int i = 1; i < size; i++) { ids.swap(i - 1, i + random.nextInt(size - i)); } final int minsize = size / p; // Floor. final int extra = size % p; // Remainder - for (int beg = 0, part = 0; part < p; part++) { + for(int beg = 0, part = 0; part < p; part++) { // First partitions are smaller, last partitions are larger. final int psize = minsize + ((part < extra) ? 1 : 0); split[part] = ids.slice(beg, beg + psize); @@ -729,17 +698,48 @@ public final class DBIDUtil { * * @param list Existing list * @param k k - * @param <D> distance type * @return Subset */ - @SuppressWarnings("unchecked") - public static <D extends Distance<D>> KNNList<D> subList(KNNList<D> list, int k) { - if (k >= list.size()) { + public static KNNList subList(KNNList list, int k) { + if(k >= list.size()) { return list; } - if (list instanceof DoubleDistanceKNNList) { - return (KNNList<D>) new DoubleDistanceKNNSubList((DoubleDistanceKNNList) list, k); + if(list instanceof IntegerDBIDKNNList) { + return new IntegerDBIDKNNSubList((IntegerDBIDKNNList) list, k); + } + return new KNNSubList(list, k); + } + + /** + * Create a modifiable list to store distance-DBID pairs. + * + * @param size Estimated upper list size + * @return Empty list + */ + public static ModifiableDoubleDBIDList newDistanceDBIDList(int size) { + return DBIDFactory.FACTORY.newDistanceDBIDList(size); + } + + /** + * Create a modifiable list to store distance-DBID pairs. + * + * @return Empty list + */ + public static ModifiableDoubleDBIDList newDistanceDBIDList() { + return DBIDFactory.FACTORY.newDistanceDBIDList(); + } + + /** + * Assert that the presented ids constitute a continuous {@link DBIDRange}. + * + * @param ids ID range. + * @return DBID range. + * @throws AbortException + */ + public static DBIDRange assertRange(DBIDs ids) { + if(!(ids instanceof DBIDRange)) { + throw new AbortException("This class may currently only be used with static databases and DBID ranges."); } - return new KNNSubList<>(list, k); + return (DBIDRange) ids; } } |