diff options
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/utilities/datastructures')
61 files changed, 6163 insertions, 884 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/QuickSelect.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/QuickSelect.java index a191dde2..bfa7950d 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/QuickSelect.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/QuickSelect.java @@ -5,6 +5,8 @@ import java.util.List; import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs; import de.lmu.ifi.dbs.elki.database.ids.DBID; +import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter; +import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; /* This file is part of ELKI: @@ -90,14 +92,14 @@ public class QuickSelect { final int length = end - begin; assert (length > 0); // Integer division is "floor" since we are non-negative. - final int left = begin + (length - 1) / 2; + final int left = begin + ((length - 1) >> 1); quickSelect(data, begin, end, left); if(length % 2 == 1) { return data[left]; } else { quickSelect(data, begin, end, left + 1); - return data[left] + (data[left + 1] - data[left]) / 2; + return data[left] + .5 * (data[left + 1] - data[left]); } } @@ -155,59 +157,64 @@ public class QuickSelect { * @param rank rank position we are interested in (starting at 0) */ public static void quickSelect(double[] data, int start, int end, int rank) { - // Optimization for small arrays - // This also ensures a minimum size below - if(start + SMALL > end) { - insertionSort(data, start, end); - return; - } + while(true) { + // Optimization for small arrays + // This also ensures a minimum size below + if(start + SMALL > end) { + insertionSort(data, start, end); + return; + } - // Pick pivot from three candidates: start, middle, end - // Since we compare them, we can also just "bubble sort" them. - final int middle = (start + end) / 2; - if(data[start] > data[middle]) { - swap(data, start, middle); - } - if(data[start] > data[end - 1]) { - swap(data, start, end - 1); - } - if(data[middle] > data[end - 1]) { - swap(data, middle, end - 1); - } - // TODO: use more candidates for larger arrays? + // Pick pivot from three candidates: start, middle, end + // Since we compare them, we can also just "bubble sort" them. + final int middle = (start + end) >> 1; + if(data[start] > data[middle]) { + swap(data, start, middle); + } + if(data[start] > data[end - 1]) { + swap(data, start, end - 1); + } + if(data[middle] > data[end - 1]) { + swap(data, middle, end - 1); + } + // TODO: use more candidates for larger arrays? + + final double pivot = data[middle]; + // Move middle element out of the way, just before end + // (Since we already know that "end" is bigger) + swap(data, middle, end - 2); + + // Begin partitioning + int i = start + 1, j = end - 3; + // This is classic quicksort stuff + while(true) { + while(data[i] <= pivot && i <= j) { + i++; + } + while(data[j] >= pivot && j >= i) { + j--; + } + if(i >= j) { + break; + } + swap(data, i, j); + } - final double pivot = data[middle]; - // Move middle element out of the way, just before end - // (Since we already know that "end" is bigger) - swap(data, middle, end - 2); + // Move pivot (former middle element) back into the appropriate place + swap(data, i, end - 2); - // Begin partitioning - int i = start + 1, j = end - 3; - // This is classic quicksort stuff - while(true) { - while(data[i] <= pivot && i <= j) { - i++; + // In contrast to quicksort, we only need to recurse into the half we are + // interested in. Instead of recursion we now use iteration. + if(rank < i) { + end = i; } - while(data[j] >= pivot && j >= i) { - j--; + else if(rank > i) { + start = i + 1; } - if(i >= j) { + else { break; } - swap(data, i, j); - } - - // Move pivot (former middle element) back into the appropriate place - swap(data, i, end - 2); - - // In contrast to quicksort, we only need to recurse into the half we are - // interested in. - if(rank < i) { - quickSelect(data, start, i, rank); - } - else if(rank > i) { - quickSelect(data, i + 1, end, rank); - } + } // Loop until rank==i } /** @@ -283,7 +290,7 @@ public class QuickSelect { final int length = end - begin; assert (length > 0); // Integer division is "floor" since we are non-negative. - final int left = begin + (length - 1) / 2; + final int left = begin + ((length - 1) >> 1); quickSelect(data, begin, end, left); return data[left]; } @@ -338,59 +345,64 @@ public class QuickSelect { * @param rank rank position we are interested in (starting at 0) */ public static <T extends Comparable<? super T>> void quickSelect(T[] data, int start, int end, int rank) { - // Optimization for small arrays - // This also ensures a minimum size below - if(start + SMALL > end) { - insertionSort(data, start, end); - return; - } + while(true) { + // Optimization for small arrays + // This also ensures a minimum size below + if(start + SMALL > end) { + insertionSort(data, start, end); + return; + } - // Pick pivot from three candidates: start, middle, end - // Since we compare them, we can also just "bubble sort" them. - final int middle = (start + end) / 2; - if(data[start].compareTo(data[middle]) > 0) { - swap(data, start, middle); - } - if(data[start].compareTo(data[end - 1]) > 0) { - swap(data, start, end - 1); - } - if(data[middle].compareTo(data[end - 1]) > 0) { - swap(data, middle, end - 1); - } - // TODO: use more candidates for larger arrays? + // Pick pivot from three candidates: start, middle, end + // Since we compare them, we can also just "bubble sort" them. + final int middle = (start + end) >> 1; + if(data[start].compareTo(data[middle]) > 0) { + swap(data, start, middle); + } + if(data[start].compareTo(data[end - 1]) > 0) { + swap(data, start, end - 1); + } + if(data[middle].compareTo(data[end - 1]) > 0) { + swap(data, middle, end - 1); + } + // TODO: use more candidates for larger arrays? + + final T pivot = data[middle]; + // Move middle element out of the way, just before end + // (Since we already know that "end" is bigger) + swap(data, middle, end - 2); + + // Begin partitioning + int i = start + 1, j = end - 3; + // This is classic quicksort stuff + while(true) { + while(data[i].compareTo(pivot) <= 0 && i <= j) { + i++; + } + while(data[j].compareTo(pivot) >= 0 && j >= i) { + j--; + } + if(i >= j) { + break; + } + swap(data, i, j); + } - final T pivot = data[middle]; - // Move middle element out of the way, just before end - // (Since we already know that "end" is bigger) - swap(data, middle, end - 2); + // Move pivot (former middle element) back into the appropriate place + swap(data, i, end - 2); - // Begin partitioning - int i = start + 1, j = end - 3; - // This is classic quicksort stuff - while(true) { - while(data[i].compareTo(pivot) <= 0 && i <= j) { - i++; + // In contrast to quicksort, we only need to recurse into the half we are + // interested in. Instead of recursion we now use iteration. + if(rank < i) { + end = i; } - while(data[j].compareTo(pivot) >= 0 && j >= i) { - j--; + else if(rank > i) { + start = i + 1; } - if(i >= j) { + else { break; } - swap(data, i, j); - } - - // Move pivot (former middle element) back into the appropriate place - swap(data, i, end - 2); - - // In contrast to quicksort, we only need to recurse into the half we are - // interested in. - if(rank < i) { - quickSelect(data, start, i, rank); - } - else if(rank > i) { - quickSelect(data, i + 1, end, rank); - } + } // Loop until rank==i } /** @@ -469,7 +481,7 @@ public class QuickSelect { final int length = end - begin; assert (length > 0); // Integer division is "floor" since we are non-negative. - final int left = begin + (length - 1) / 2; + final int left = begin + ((length - 1) >> 1); quickSelect(data, begin, end, left); return data.get(left); } @@ -524,59 +536,64 @@ public class QuickSelect { * @param rank rank position we are interested in (starting at 0) */ public static <T extends Comparable<? super T>> void quickSelect(List<? extends T> data, int start, int end, int rank) { - // Optimization for small arrays - // This also ensures a minimum size below - if(start + SMALL > end) { - insertionSort(data, start, end); - return; - } + while(true) { + // Optimization for small arrays + // This also ensures a minimum size below + if(start + SMALL > end) { + insertionSort(data, start, end); + return; + } - // Pick pivot from three candidates: start, middle, end - // Since we compare them, we can also just "bubble sort" them. - final int middle = (start + end) / 2; - if(data.get(start).compareTo(data.get(middle)) > 0) { - swap(data, start, middle); - } - if(data.get(start).compareTo(data.get(end - 1)) > 0) { - swap(data, start, end - 1); - } - if(data.get(middle).compareTo(data.get(end - 1)) > 0) { - swap(data, middle, end - 1); - } - // TODO: use more candidates for larger arrays? + // Pick pivot from three candidates: start, middle, end + // Since we compare them, we can also just "bubble sort" them. + final int middle = (start + end) >> 1; + if(data.get(start).compareTo(data.get(middle)) > 0) { + swap(data, start, middle); + } + if(data.get(start).compareTo(data.get(end - 1)) > 0) { + swap(data, start, end - 1); + } + if(data.get(middle).compareTo(data.get(end - 1)) > 0) { + swap(data, middle, end - 1); + } + // TODO: use more candidates for larger arrays? + + final T pivot = data.get(middle); + // Move middle element out of the way, just before end + // (Since we already know that "end" is bigger) + swap(data, middle, end - 2); + + // Begin partitioning + int i = start + 1, j = end - 3; + // This is classic quicksort stuff + while(true) { + while(data.get(i).compareTo(pivot) <= 0 && i <= j) { + i++; + } + while(data.get(j).compareTo(pivot) >= 0 && j >= i) { + j--; + } + if(i >= j) { + break; + } + swap(data, i, j); + } - final T pivot = data.get(middle); - // Move middle element out of the way, just before end - // (Since we already know that "end" is bigger) - swap(data, middle, end - 2); + // Move pivot (former middle element) back into the appropriate place + swap(data, i, end - 2); - // Begin partitioning - int i = start + 1, j = end - 3; - // This is classic quicksort stuff - while(true) { - while(data.get(i).compareTo(pivot) <= 0 && i <= j) { - i++; + // In contrast to quicksort, we only need to recurse into the half we are + // interested in. Instead of recursion we now use iteration. + if(rank < i) { + end = i; } - while(data.get(j).compareTo(pivot) >= 0 && j >= i) { - j--; + else if(rank > i) { + start = i + 1; } - if(i >= j) { + else { break; } - swap(data, i, j); - } - - // Move pivot (former middle element) back into the appropriate place - swap(data, i, end - 2); - - // In contrast to quicksort, we only need to recurse into the half we are - // interested in. - if(rank < i) { - quickSelect(data, start, i, rank); - } - else if(rank > i) { - quickSelect(data, i + 1, end, rank); - } + } // Loop until rank==i } /** @@ -656,7 +673,7 @@ public class QuickSelect { final int length = end - begin; assert (length > 0); // Integer division is "floor" since we are non-negative. - final int left = begin + (length - 1) / 2; + final int left = begin + ((length - 1) >> 1); quickSelect(data, comparator, begin, end, left); return data.get(left); } @@ -714,59 +731,64 @@ public class QuickSelect { * @param rank rank position we are interested in (starting at 0) */ public static <T> void quickSelect(List<? extends T> data, Comparator<? super T> comparator, int start, int end, int rank) { - // Optimization for small arrays - // This also ensures a minimum size below - if(start + SMALL > end) { - insertionSort(data, comparator, start, end); - return; - } + while(true) { + // Optimization for small arrays + // This also ensures a minimum size below + if(start + SMALL > end) { + insertionSort(data, comparator, start, end); + return; + } - // Pick pivot from three candidates: start, middle, end - // Since we compare them, we can also just "bubble sort" them. - final int middle = (start + end) / 2; - if(comparator.compare(data.get(start), data.get(middle)) > 0) { - swap(data, start, middle); - } - if(comparator.compare(data.get(start), data.get(end - 1)) > 0) { - swap(data, start, end - 1); - } - if(comparator.compare(data.get(middle), data.get(end - 1)) > 0) { - swap(data, middle, end - 1); - } - // TODO: use more candidates for larger arrays? + // Pick pivot from three candidates: start, middle, end + // Since we compare them, we can also just "bubble sort" them. + final int middle = (start + end) >> 1; + if(comparator.compare(data.get(start), data.get(middle)) > 0) { + swap(data, start, middle); + } + if(comparator.compare(data.get(start), data.get(end - 1)) > 0) { + swap(data, start, end - 1); + } + if(comparator.compare(data.get(middle), data.get(end - 1)) > 0) { + swap(data, middle, end - 1); + } + // TODO: use more candidates for larger arrays? + + final T pivot = data.get(middle); + // Move middle element out of the way, just before end + // (Since we already know that "end" is bigger) + swap(data, middle, end - 2); + + // Begin partitioning + int i = start + 1, j = end - 3; + // This is classic quicksort stuff + while(true) { + while(comparator.compare(data.get(i), pivot) <= 0 && i <= j) { + i++; + } + while(comparator.compare(data.get(j), pivot) >= 0 && j >= i) { + j--; + } + if(i >= j) { + break; + } + swap(data, i, j); + } - final T pivot = data.get(middle); - // Move middle element out of the way, just before end - // (Since we already know that "end" is bigger) - swap(data, middle, end - 2); + // Move pivot (former middle element) back into the appropriate place + swap(data, i, end - 2); - // Begin partitioning - int i = start + 1, j = end - 3; - // This is classic quicksort stuff - while(true) { - while(comparator.compare(data.get(i), pivot) <= 0 && i <= j) { - i++; + // In contrast to quicksort, we only need to recurse into the half we are + // interested in. Instead of recursion we now use iteration. + if(rank < i) { + end = i; } - while(comparator.compare(data.get(j), pivot) >= 0 && j >= i) { - j--; + else if(rank > i) { + start = i + 1; } - if(i >= j) { + else { break; } - swap(data, i, j); - } - - // Move pivot (former middle element) back into the appropriate place - swap(data, i, end - 2); - - // In contrast to quicksort, we only need to recurse into the half we are - // interested in. - if(rank < i) { - quickSelect(data, comparator, start, i, rank); - } - else if(rank > i) { - quickSelect(data, comparator, i + 1, end, rank); - } + } // Loop until rank==i } /** @@ -796,7 +818,7 @@ public class QuickSelect { * @param rank Rank position that we are interested in (integer!) * @return Value at the given rank */ - public static DBID quickSelect(ArrayModifiableDBIDs data, Comparator<? super DBID> comparator, int rank) { + public static DBID quickSelect(ArrayModifiableDBIDs data, Comparator<? super DBIDRef> comparator, int rank) { quickSelect(data, comparator, 0, data.size(), rank); return data.get(rank); } @@ -810,7 +832,7 @@ public class QuickSelect { * @param comparator Comparator to use * @return Median value */ - public static DBID median(ArrayModifiableDBIDs data, Comparator<? super DBID> comparator) { + public static DBID median(ArrayModifiableDBIDs data, Comparator<? super DBIDRef> comparator) { return median(data, comparator, 0, data.size()); } @@ -827,11 +849,11 @@ public class QuickSelect { * @param end End of valid values (exclusive!) * @return Median value */ - public static DBID median(ArrayModifiableDBIDs data, Comparator<? super DBID> comparator, int begin, int end) { + public static DBID median(ArrayModifiableDBIDs data, Comparator<? super DBIDRef> comparator, int begin, int end) { final int length = end - begin; assert (length > 0); // Integer division is "floor" since we are non-negative. - final int left = begin + (length - 1) / 2; + final int left = begin + ((length - 1) >> 1); quickSelect(data, comparator, begin, end, left); return data.get(left); } @@ -846,7 +868,7 @@ public class QuickSelect { * @param quant Quantile to compute * @return Value at quantile */ - public static DBID quantile(ArrayModifiableDBIDs data, Comparator<? super DBID> comparator, double quant) { + public static DBID quantile(ArrayModifiableDBIDs data, Comparator<? super DBIDRef> comparator, double quant) { return quantile(data, comparator, 0, data.size(), quant); } @@ -864,7 +886,7 @@ public class QuickSelect { * @param quant Quantile to compute * @return Value at quantile */ - public static DBID quantile(ArrayModifiableDBIDs data, Comparator<? super DBID> comparator, int begin, int end, double quant) { + public static DBID quantile(ArrayModifiableDBIDs data, Comparator<? super DBIDRef> comparator, int begin, int end, double quant) { final int length = end - begin; assert (length > 0) : "Quantile on empty set?"; // Integer division is "floor" since we are non-negative. @@ -885,60 +907,70 @@ public class QuickSelect { * @param end Interval end (inclusive) * @param rank rank position we are interested in (starting at 0) */ - public static void quickSelect(ArrayModifiableDBIDs data, Comparator<? super DBID> comparator, int start, int end, int rank) { - // Optimization for small arrays - // This also ensures a minimum size below - if(start + SMALL > end) { - insertionSort(data, comparator, start, end); - return; - } + public static void quickSelect(ArrayModifiableDBIDs data, Comparator<? super DBIDRef> comparator, int start, int end, int rank) { + while(true) { + // Optimization for small arrays + // This also ensures a minimum size below + if(start + SMALL > end) { + insertionSort(data, comparator, start, end); + return; + } - // Pick pivot from three candidates: start, middle, end - // Since we compare them, we can also just "bubble sort" them. - final int middle = (start + end) / 2; - if(comparator.compare(data.get(start), data.get(middle)) > 0) { - data.swap(start, middle); - } - if(comparator.compare(data.get(start), data.get(end - 1)) > 0) { - data.swap(start, end - 1); - } - if(comparator.compare(data.get(middle), data.get(end - 1)) > 0) { - data.swap(middle, end - 1); - } - // TODO: use more candidates for larger arrays? + // Pick pivot from three candidates: start, middle, end + // Since we compare them, we can also just "bubble sort" them. + final int middle = (start + end) >> 1; + if(comparator.compare(data.get(start), data.get(middle)) > 0) { + data.swap(start, middle); + } + if(comparator.compare(data.get(start), data.get(end - 1)) > 0) { + data.swap(start, end - 1); + } + if(comparator.compare(data.get(middle), data.get(end - 1)) > 0) { + data.swap(middle, end - 1); + } + // TODO: use more candidates for larger arrays? + + final DBID pivot = data.get(middle); + // Move middle element out of the way, just before end + // (Since we already know that "end" is bigger) + data.swap(middle, end - 2); + + // Begin partitioning + int i = start + 1, j = end - 3; + DBIDArrayIter refi = data.iter(), refj = data.iter(); + refi.seek(i); + refj.seek(j); + // This is classic quicksort stuff + while(true) { + while(comparator.compare(refi, pivot) <= 0 && i <= j) { + i++; + refi.advance(); + } + while(comparator.compare(refj, pivot) >= 0 && j >= i) { + j--; + refj.retract(); + } + if(i >= j) { + break; + } + data.swap(i, j); + } - final DBID pivot = data.get(middle); - // Move middle element out of the way, just before end - // (Since we already know that "end" is bigger) - data.swap(middle, end - 2); + // Move pivot (former middle element) back into the appropriate place + data.swap(i, end - 2); - // Begin partitioning - int i = start + 1, j = end - 3; - // This is classic quicksort stuff - while(true) { - while(comparator.compare(data.get(i), pivot) <= 0 && i <= j) { - i++; + // In contrast to quicksort, we only need to recurse into the half we are + // interested in. Instead of recursion we now use iteration. + if(rank < i) { + end = i; } - while(comparator.compare(data.get(j), pivot) >= 0 && j >= i) { - j--; + else if(rank > i) { + start = i + 1; } - if(i >= j) { + else { break; } - data.swap(i, j); - } - - // Move pivot (former middle element) back into the appropriate place - data.swap(i, end - 2); - - // In contrast to quicksort, we only need to recurse into the half we are - // interested in. - if(rank < i) { - quickSelect(data, comparator, start, i, rank); - } - else if(rank > i) { - quickSelect(data, comparator, i + 1, end, rank); - } + } // Loop until rank==i } /** @@ -948,9 +980,15 @@ public class QuickSelect { * @param start Interval start * @param end Interval end */ - private static void insertionSort(ArrayModifiableDBIDs data, Comparator<? super DBID> comparator, int start, int end) { + private static void insertionSort(ArrayModifiableDBIDs data, Comparator<? super DBIDRef> comparator, int start, int end) { + DBIDArrayIter iter1 = data.iter(), iter2 = data.iter(); for(int i = start + 1; i < end; i++) { - for(int j = i; j > start && comparator.compare(data.get(j - 1), data.get(j)) > 0; j--) { + iter1.seek(i - 1); + iter2.seek(i); + for(int j = i; j > start; j--, iter1.retract(), iter2.retract()) { + if(comparator.compare(iter1, iter2) > 0) { + break; + } data.swap(j, j - 1); } } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ArrayLikeUtil.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ArrayLikeUtil.java index f485e214..a08feb1a 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ArrayLikeUtil.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ArrayLikeUtil.java @@ -40,29 +40,29 @@ import de.lmu.ifi.dbs.elki.data.NumberVector; */ public final class ArrayLikeUtil { /** - * Static instance for lists + * Static instance for lists. */ private static final ListArrayAdapter<Object> LISTADAPTER = new ListArrayAdapter<Object>(); /** - * Static instance for lists of numbers + * Static instance for lists of numbers. */ private static final NumberListArrayAdapter<Number> NUMBERLISTADAPTER = new NumberListArrayAdapter<Number>(); /** - * Static instance + * Static instance. */ - private final static IdentityArrayAdapter<?> IDENTITYADAPTER = new IdentityArrayAdapter<Object>(); + private static final IdentityArrayAdapter<?> IDENTITYADAPTER = new IdentityArrayAdapter<Object>(); /** - * Static instance + * Static instance. */ - private static final FeatureVectorAdapter<?> FEATUREVECTORADAPTER = new FeatureVectorAdapter<Number>(); + public static final FeatureVectorAdapter<?> FEATUREVECTORADAPTER = new FeatureVectorAdapter<Number>(); /** * Use a number vector in the array API. */ - private static final NumberVectorAdapter<?> NUMBERVECTORADAPTER = new NumberVectorAdapter<Double>(); + public static final NumberVectorAdapter<?> NUMBERVECTORADAPTER = new NumberVectorAdapter<Double>(); /** * Use a double array in the array API. @@ -83,6 +83,13 @@ public final class ArrayLikeUtil { * Use ArrayDBIDs as array. */ public static final ArrayDBIDsAdapter ARRAYDBIDADAPTER = new ArrayDBIDsAdapter(); + + /** + * Fake constructor. Do not instantiate! + */ + private ArrayLikeUtil() { + // Do not instantiate + } /** * Cast the static instance. @@ -124,7 +131,7 @@ public final class ArrayLikeUtil { * @return Instance */ @SuppressWarnings("unchecked") - public static <F> FeatureVectorAdapter<F> featureVectorAdapter(FeatureVector<?, F> prototype) { + public static <F> FeatureVectorAdapter<F> featureVectorAdapter(FeatureVector<F> prototype) { return (FeatureVectorAdapter<F>) FEATUREVECTORADAPTER; } @@ -135,7 +142,7 @@ public final class ArrayLikeUtil { * @return Instance */ @SuppressWarnings("unchecked") - public static <N extends Number> NumberVectorAdapter<N> numberVectorAdapter(NumberVector<?, N> prototype) { + public static <N extends Number> NumberVectorAdapter<N> numberVectorAdapter(NumberVector<N> prototype) { return (NumberVectorAdapter<N>) NUMBERVECTORADAPTER; } @@ -185,7 +192,7 @@ public final class ArrayLikeUtil { } /** - * Convert a numeric array-like to a <code>double[]</code> + * Convert a numeric array-like to a <code>double[]</code>. * * @param array Array-like * @param adapter Adapter @@ -215,12 +222,12 @@ public final class ArrayLikeUtil { * @param obj Object to convert * @return primitive double array */ - public static <N extends Number> double[] toPrimitiveDoubleArray(NumberVector<?, N> obj) { + public static <N extends Number> double[] toPrimitiveDoubleArray(NumberVector<N> obj) { return toPrimitiveDoubleArray(obj, numberVectorAdapter(obj)); } /** - * Convert a numeric array-like to a <code>float[]</code> + * Convert a numeric array-like to a <code>float[]</code>. * * @param array Array-like * @param adapter Adapter @@ -250,7 +257,7 @@ public final class ArrayLikeUtil { * @param obj Object to convert * @return primitive float array */ - public static <N extends Number> float[] toPrimitiveFloatArray(NumberVector<?, N> obj) { + public static <N extends Number> float[] toPrimitiveFloatArray(NumberVector<N> obj) { return toPrimitiveFloatArray(obj, numberVectorAdapter(obj)); } }
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/DoubleArrayAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/DoubleArrayAdapter.java index 43419103..117f3845 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/DoubleArrayAdapter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/DoubleArrayAdapter.java @@ -45,8 +45,9 @@ class DoubleArrayAdapter implements NumberArrayAdapter<Double, double[]> { } @Override + @Deprecated public Double get(double[] array, int off) throws IndexOutOfBoundsException { - return array[off]; + return Double.valueOf(array[off]); } @Override diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ExtendedArray.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ExtendedArray.java index a195360b..491c4f95 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ExtendedArray.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ExtendedArray.java @@ -68,13 +68,13 @@ public class ExtendedArray<T> implements ArrayAdapter<T, ExtendedArray<T>> { @Override public int size(ExtendedArray<T> array) { - assert (array == this); + assert (this == array); return size; } @Override public T get(ExtendedArray<T> array, int off) throws IndexOutOfBoundsException { - assert (array == this); + assert (this == array); if(off == size - 1) { return extra; } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/FeatureVectorAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/FeatureVectorAdapter.java index 19c2ec19..38b662e8 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/FeatureVectorAdapter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/FeatureVectorAdapter.java @@ -34,7 +34,7 @@ import de.lmu.ifi.dbs.elki.data.FeatureVector; * * @param <F> Feature type */ -public class FeatureVectorAdapter<F> implements ArrayAdapter<F, FeatureVector<?, F>> { +public class FeatureVectorAdapter<F> implements ArrayAdapter<F, FeatureVector<F>> { /** * Constructor. * @@ -45,12 +45,12 @@ public class FeatureVectorAdapter<F> implements ArrayAdapter<F, FeatureVector<?, } @Override - public int size(FeatureVector<?, F> array) { + public int size(FeatureVector<F> array) { return array.getDimensionality(); } @Override - public F get(FeatureVector<?, F> array, int off) throws IndexOutOfBoundsException { + public F get(FeatureVector<F> array, int off) throws IndexOutOfBoundsException { return array.getValue(off); } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/FloatArrayAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/FloatArrayAdapter.java index 14d42dc5..ae501039 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/FloatArrayAdapter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/FloatArrayAdapter.java @@ -46,8 +46,9 @@ class FloatArrayAdapter implements NumberArrayAdapter<Float, float[]> { } @Override + @Deprecated public Float get(float[] array, int off) throws IndexOutOfBoundsException { - return array[off]; + return Float.valueOf(array[off]); } @Override diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ListArrayAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ListArrayAdapter.java index 792fad0b..cba1e706 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ListArrayAdapter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ListArrayAdapter.java @@ -1,4 +1,26 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike; +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ import java.util.List; diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/NumberVectorAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/NumberVectorAdapter.java index c75acd64..fd1e6636 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/NumberVectorAdapter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/NumberVectorAdapter.java @@ -1,7 +1,5 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike; -import de.lmu.ifi.dbs.elki.data.NumberVector; - /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures @@ -25,6 +23,8 @@ import de.lmu.ifi.dbs.elki.data.NumberVector; along with this program. If not, see <http://www.gnu.org/licenses/>. */ +import de.lmu.ifi.dbs.elki.data.NumberVector; + /** * Adapter to use a feature vector as an array of features. * @@ -34,7 +34,7 @@ import de.lmu.ifi.dbs.elki.data.NumberVector; * * @param <N> Number type */ -public class NumberVectorAdapter<N extends Number> implements NumberArrayAdapter<N, NumberVector<?, N>> { +public class NumberVectorAdapter<N extends Number> implements NumberArrayAdapter<N, NumberVector<N>> { /** * Constructor. * @@ -45,42 +45,43 @@ public class NumberVectorAdapter<N extends Number> implements NumberArrayAdapter } @Override - public int size(NumberVector<?, N> array) { + public int size(NumberVector<N> array) { return array.getDimensionality(); } @Override - public N get(NumberVector<?, N> array, int off) throws IndexOutOfBoundsException { + @Deprecated + public N get(NumberVector<N> array, int off) throws IndexOutOfBoundsException { return array.getValue(off + 1); } @Override - public double getDouble(NumberVector<?, N> array, int off) throws IndexOutOfBoundsException { - return array.doubleValue(off + 1); + public double getDouble(NumberVector<N> array, int off) throws IndexOutOfBoundsException { + return array.doubleValue(off); } @Override - public float getFloat(NumberVector<?, N> array, int off) throws IndexOutOfBoundsException { - return array.floatValue(off + 1); + public float getFloat(NumberVector<N> array, int off) throws IndexOutOfBoundsException { + return array.floatValue(off); } @Override - public int getInteger(NumberVector<?, N> array, int off) throws IndexOutOfBoundsException { - return array.intValue(off + 1); + public int getInteger(NumberVector<N> array, int off) throws IndexOutOfBoundsException { + return array.intValue(off); } @Override - public short getShort(NumberVector<?, N> array, int off) throws IndexOutOfBoundsException { - return array.shortValue(off + 1); + public short getShort(NumberVector<N> array, int off) throws IndexOutOfBoundsException { + return array.shortValue(off); } @Override - public long getLong(NumberVector<?, N> array, int off) throws IndexOutOfBoundsException { - return array.longValue(off + 1); + public long getLong(NumberVector<N> array, int off) throws IndexOutOfBoundsException { + return array.longValue(off); } @Override - public byte getByte(NumberVector<?, N> array, int off) throws IndexOutOfBoundsException { - return array.byteValue(off + 1); + public byte getByte(NumberVector<N> array, int off) throws IndexOutOfBoundsException { + return array.byteValue(off); } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/SubsetArrayAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/SubsetArrayAdapter.java index b2958358..c607759f 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/SubsetArrayAdapter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/SubsetArrayAdapter.java @@ -23,7 +23,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike; */ /** - * Subset array adapter (allows reordering and projection) + * Subset array adapter (allows reordering and projection). * * @author Erich Schubert * @@ -32,12 +32,12 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike; */ public class SubsetArrayAdapter<T, A> implements ArrayAdapter<T, A> { /** - * Wrapped adapter + * Wrapped adapter. */ ArrayAdapter<T, ? super A> wrapped; /** - * Offsets to return + * Offsets to return. */ int[] offs; diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/TDoubleListAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/TDoubleListAdapter.java index 889c64e8..cee393ac 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/TDoubleListAdapter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/TDoubleListAdapter.java @@ -45,8 +45,9 @@ public class TDoubleListAdapter implements NumberArrayAdapter<Double, TDoubleLis } @Override + @Deprecated public Double get(TDoubleList array, int off) throws IndexOutOfBoundsException { - return array.get(off); + return Double.valueOf(array.get(off)); } @Override diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/IntegerArrayQuickSort.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/IntegerArrayQuickSort.java new file mode 100644 index 00000000..a0c93997 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/IntegerArrayQuickSort.java @@ -0,0 +1,225 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.arrays; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; + +/** + * Class to sort an int array, using a modified quicksort. + * + * The implementation is closely based on: + * <p> + * Dual-Pivot Quicksort<br /> + * Vladimir Yaroslavskiy + * </p> + * + * and differs mostly in that we sort different kinds of arrays, + * and allow the use of comparators - useful in particular when + * the array references external objects. + * + * @author Erich Schubert + * + * @apiviz.uses IntegerComparator + */ +@Reference(authors = "Vladimir Yaroslavskiy", title = "Dual-Pivot Quicksort", booktitle = "http://iaroslavski.narod.ru/quicksort/", url = "http://iaroslavski.narod.ru/quicksort/") +public class IntegerArrayQuickSort { + /** + * Threshold for using insertion sort. Value taken from Javas QuickSort, + * assuming that it will be similar for our data sets. + */ + private static final int INSERTION_THRESHOLD = 47; + + /** + * Sort the full array using the given comparator. + * + * @param data Data to sort + * @param comp Comparator + */ + public static void sort(int[] data, IntegerComparator comp) { + sort(data, 0, data.length, comp); + } + + /** + * Sort the array using the given comparator. + * + * @param data Data to sort + * @param start First index + * @param end Last index (exclusive) + * @param comp Comparator + */ + public static void sort(int[] data, int start, int end, IntegerComparator comp) { + quickSort(data, start, end - 1, comp); + } + + /** + * Actual recursive sort function. + * + * @param data Data to sort + * @param start First index + * @param end Last index (inclusive!) + * @param comp Comparator + */ + private static void quickSort(int[] data, final int start, final int end, IntegerComparator comp) { + final int len = end - start; + if (len < INSERTION_THRESHOLD) { + // Classic insertion sort. + for (int i = start + 1; i <= end; i++) { + for (int j = i; j > start; j--) { + if (comp.compare(data[j], data[j - 1]) < 0) { + final int tmp = data[j - 1]; + data[j - 1] = data[j]; + data[j] = tmp; + } else { + break; + } + } + } + return; + } + + // Choose pivots by looking at five candidates. + final int seventh = (len >> 3) + (len >> 6) + 1; + final int m3 = (start + end) >> 1; // middle + final int m2 = m3 - seventh; + final int m1 = m2 - seventh; + final int m4 = m3 + seventh; + final int m5 = m4 + seventh; + + // Explicit (and optimal) sorting network for 5 elements + // See Knuth for details. + if (comp.compare(data[m1], data[m2]) > 0) { + final int tmp = data[m2]; + data[m2] = data[m1]; + data[m1] = tmp; + } + if (comp.compare(data[m1], data[m3]) > 0) { + final int tmp = data[m3]; + data[m3] = data[m1]; + data[m1] = tmp; + } + if (comp.compare(data[m2], data[m3]) > 0) { + final int tmp = data[m3]; + data[m3] = data[m2]; + data[m2] = tmp; + } + if (comp.compare(data[m4], data[m5]) > 0) { + final int tmp = data[m5]; + data[m5] = data[m4]; + data[m4] = tmp; + } + if (comp.compare(data[m1], data[m4]) > 0) { + final int tmp = data[m4]; + data[m4] = data[m1]; + data[m1] = tmp; + } + if (comp.compare(data[m3], data[m4]) > 0) { + final int tmp = data[m4]; + data[m4] = data[m3]; + data[m3] = tmp; + } + if (comp.compare(data[m2], data[m5]) > 0) { + final int tmp = data[m5]; + data[m5] = data[m2]; + data[m2] = tmp; + } + if (comp.compare(data[m2], data[m3]) > 0) { + final int tmp = data[m3]; + data[m3] = data[m2]; + data[m2] = tmp; + } + if (comp.compare(data[m4], data[m5]) > 0) { + final int tmp = data[m5]; + data[m5] = data[m4]; + data[m4] = tmp; + } + + // Choose the 2 and 4th as pivots, as we want to get three parts + // Copy to variables v1 and v3, replace them with the start and end + // Note: do not modify v1 or v3 until we put them back! + final int lpivot = data[m2]; + final int rpivot = data[m4]; + data[m2] = data[start]; + data[m4] = data[end]; + + // A tie is when the two chosen pivots are the same + final boolean tied = comp.compare(lpivot, rpivot) == 0; + + // Insertion points for pivot areas. + int left = start + 1; + int right = end - 1; + + // Note: we merged the ties and no ties cases. + // This likely is marginally slower, but not at a macro level + // And you never know with hotspot. + for (int k = left; k <= right; k++) { + final int tmp = data[k]; + final int c = comp.compare(tmp, lpivot); + if (c == 0) { + continue; + } else if (c < 0) { + // Traditional quicksort + data[k] = data[left]; + data[left] = tmp; + left++; + } else if (tied || comp.compare(tmp, rpivot) > 0) { + // Now look at the right. First skip correct entries there, too + while (true) { + final int tmp2 = data[right]; + if (comp.compare(tmp2, rpivot) > 0 && k < right) { + right--; + } else { + break; + } + } + // Now move tmp from k to the right. + data[k] = data[right]; + data[right] = tmp; + right--; + // Test the element we just inserted: left or center? + if (comp.compare(data[k], lpivot) < 0) { + final int tmp2 = data[k]; + data[k] = data[left]; + data[left] = tmp2; + left++; + } // else: center. cannot be on right. + } + } + // Put the pivot elements back in. + // Remember: we must not modify v1 and v3 above. + data[start] = data[left - 1]; + data[left - 1] = lpivot; + data[end] = data[right + 1]; + data[right + 1] = rpivot; + // v1 and v3 are now safe to modify again. Perform recursion: + quickSort(data, start, left - 2, comp); + // Handle the middle part - if necessary: + if (!tied) { + // TODO: the original publication had a special tie handling here. + // It shouldn't affect correctness, but probably improves situations + // with a lot of tied elements. + quickSort(data, left, right, comp); + } + quickSort(data, right + 2, end, comp); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/IntegerComparator.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/IntegerComparator.java new file mode 100644 index 00000000..51164425 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/IntegerComparator.java @@ -0,0 +1,40 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.arrays; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +/** + * Interface for comparing two integers. + * + * @author Erich Schubert + */ +public interface IntegerComparator { + /** + * Compare two integers. + * + * @param x First int + * @param y Second int + * @return Comparison result + */ + int compare(int x, int y); +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/package-info.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/package-info.java new file mode 100644 index 00000000..3db78032 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/package-info.java @@ -0,0 +1,26 @@ +/** + * <p>Utilities for arrays: advanced sorting for primitvie arrays.</p> + */ +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2012 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ +package de.lmu.ifi.dbs.elki.utilities.datastructures.arrays;
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/AbstractHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/AbstractHeap.java new file mode 100644 index 00000000..b6f098e6 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/AbstractHeap.java @@ -0,0 +1,103 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +/** + * Abstract base class for heaps. + * + * @author Erich Schubert + */ +public class AbstractHeap { + /** + * Default initial capacity + */ + public static final int DEFAULT_INITIAL_CAPACITY = 11; + + /** + * Current number of objects + */ + public int size = 0; + + /** + * Indicate up to where the heap is valid + */ + public int validSize = 0; + + /** + * (Structural) modification counter. Used to invalidate iterators. + */ + public int modCount = 0; + + /** + * Constructor. + */ + public AbstractHeap() { + super(); + } + + /** + * Query the size + * + * @return Size + */ + public int size() { + return this.size; + } + + /** + * Delete all elements from the heap. + */ + public void clear() { + this.size = 0; + this.validSize = -1; + heapModified(); + } + + /** + * Test whether we need to resize to have the requested capacity. + * + * @param requiredSize required capacity + * @param capacity Current capacity + * @return new capacity + */ + protected final int desiredSize(int requiredSize, int capacity) { + // Double until 64, then increase by 50% each time. + int newCapacity = ((capacity < 64) ? ((capacity + 1) * 2) : ((capacity / 2) * 3)); + // overflow? + if (newCapacity < 0) { + throw new OutOfMemoryError(); + } + if (requiredSize > newCapacity) { + newCapacity = requiredSize; + } + return newCapacity; + } + + /** + * Called at the end of each heap modification. + */ + protected void heapModified() { + modCount++; + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMaxHeap.java new file mode 100644 index 00000000..ab8ef1bb --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMaxHeap.java @@ -0,0 +1,63 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +/** + * Basic in-memory heap structure. + * + * This heap is built lazily: if you first add many elements, then poll the + * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log + * n). This is implemented via a simple validTo counter. + * + * @author Erich Schubert + */ +public class ComparableMaxHeap<K extends Comparable<K>> extends ObjectHeap<K> { + /** + * Constructor with default capacity. + */ + public ComparableMaxHeap() { + super(DEFAULT_INITIAL_CAPACITY); + } + + /** + * Constructor with initial capacity. + * + * @param size initial capacity + */ + public ComparableMaxHeap(int size) { + super(size); + } + + /** + * Compare two objects + * + * @param o1 First object + * @param o2 Second object + */ + @Override + @SuppressWarnings("unchecked") + protected boolean comp(Object o1, Object o2) { + return ((K) o1).compareTo((K) o2) < 0; + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMinHeap.java new file mode 100644 index 00000000..06d2cb32 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMinHeap.java @@ -0,0 +1,63 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +/** + * Basic in-memory heap structure. + * + * This heap is built lazily: if you first add many elements, then poll the + * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log + * n). This is implemented via a simple validTo counter. + * + * @author Erich Schubert + */ +public class ComparableMinHeap<K extends Comparable<K>> extends ObjectHeap<K> { + /** + * Constructor with default capacity. + */ + public ComparableMinHeap() { + super(DEFAULT_INITIAL_CAPACITY); + } + + /** + * Constructor with initial capacity. + * + * @param size initial capacity + */ + public ComparableMinHeap(int size) { + super(size); + } + + /** + * Compare two objects + * + * @param o1 First object + * @param o2 Second object + */ + @Override + @SuppressWarnings("unchecked") + protected boolean comp(Object o1, Object o2) { + return ((K) o1).compareTo((K) o2) > 0; + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleHeap.java new file mode 100644 index 00000000..f9f928bd --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleHeap.java @@ -0,0 +1,264 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Arrays; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + +/** + * Basic in-memory heap structure. + * + * This heap is built lazily: if you first add many elements, then poll the + * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log + * n). This is implemented via a simple validTo counter. + * + * @author Erich Schubert + */ +public abstract class DoubleHeap extends AbstractHeap { + /** + * Heap storage: queue + */ + protected transient double[] queue; + + /** + * Constructor with initial capacity. + * + * @param size initial capacity + */ + public DoubleHeap(int size) { + super(); + this.size = 0; + this.queue = new double[size]; + } + + /** + * Add a key-value pair to the heap + * + * @param key Key + */ + public void add(double key) { + // resize when needed + if (size + 1 > queue.length) { + resize(size + 1); + } + // final int pos = size; + this.queue[size] = key; + this.size += 1; + heapifyUp(size - 1, key); + validSize += 1; + heapModified(); + } + + /** + * Add a key-value pair to the heap, except if the new element is larger than + * the top, and we are at design size (overflow) + * + * @param key Key + * @param max Maximum size of heap + */ + public void add(double key, int max) { + if (size < max) { + add(key); + } else if (comp(key, peek())) { + replaceTopElement(key); + } + } + + /** + * Combined operation that removes the top element, and inserts a new element + * instead. + * + * @param e New element to insert + * @return Previous top element of the heap + */ + public double replaceTopElement(double e) { + ensureValid(); + double oldroot = queue[0]; + heapifyDown(0, e); + heapModified(); + return oldroot; + } + + /** + * Get the current top key + * + * @return Top key + */ + public double peek() { + if (size == 0) { + throw new ArrayIndexOutOfBoundsException("Peek() on an empty heap!"); + } + ensureValid(); + return queue[0]; + } + + /** + * Remove the first element + * + * @return Top element + */ + public double poll() { + return removeAt(0); + } + + /** + * Repair the heap + */ + protected void ensureValid() { + if (validSize != size) { + if (size > 1) { + // Parent of first invalid + int nextmin = validSize > 0 ? ((validSize - 1) >>> 1) : 0; + int curmin = MathUtil.nextAllOnesInt(nextmin); // Next line + int nextmax = curmin - 1; // End of valid line + int pos = (size - 2) >>> 1; // Parent of last element + // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin+", "+nextmin); + while (pos >= nextmin) { + // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin); + while (pos >= curmin) { + if (!heapifyDown(pos, queue[pos])) { + final int parent = (pos - 1) >>> 1; + if (parent < curmin) { + nextmin = Math.min(nextmin, parent); + nextmax = Math.max(nextmax, parent); + } + } + pos--; + } + curmin = nextmin; + pos = Math.min(pos, nextmax); + nextmax = -1; + } + } + validSize = size; + } + } + + /** + * Remove the element at the given position. + * + * @param pos Element position. + * @return Removed element + */ + protected double removeAt(int pos) { + if (pos < 0 || pos >= size) { + return 0.0; + } + final double top = queue[0]; + // Replacement object: + final double reinkey = queue[size - 1]; + // Keep heap in sync + if (validSize == size) { + size -= 1; + validSize -= 1; + heapifyDown(pos, reinkey); + } else { + size -= 1; + validSize = Math.min(pos >>> 1, validSize); + queue[pos] = reinkey; + } + heapModified(); + return top; + } + + /** + * Execute a "Heapify Upwards" aka "SiftUp". Used in insertions. + * + * @param pos insertion position + * @param curkey Current key + */ + protected void heapifyUp(int pos, double curkey) { + while (pos > 0) { + final int parent = (pos - 1) >>> 1; + double parkey = queue[parent]; + + if (comp(curkey, parkey)) { // Compare + break; + } + queue[pos] = parkey; + pos = parent; + } + queue[pos] = curkey; + } + + /** + * Execute a "Heapify Downwards" aka "SiftDown". Used in deletions. + * + * @param ipos re-insertion position + * @param curkey Current key + * @return true when the order was changed + */ + protected boolean heapifyDown(final int ipos, double curkey) { + int pos = ipos; + final int half = size >>> 1; + while (pos < half) { + // Get left child (must exist!) + int cpos = (pos << 1) + 1; + double chikey = queue[cpos]; + // Test right child, if present + final int rchild = cpos + 1; + if (rchild < size) { + double right = queue[rchild]; + if (comp(chikey, right)) { // Compare + cpos = rchild; + chikey = right; + } + } + + if (comp(chikey, curkey)) { // Compare + break; + } + queue[pos] = chikey; + pos = cpos; + } + queue[pos] = curkey; + return (pos == ipos); + } + + /** + * Test whether we need to resize to have the requested capacity. + * + * @param requiredSize required capacity + */ + protected final void resize(int requiredSize) { + queue = Arrays.copyOf(queue, desiredSize(requiredSize, queue.length)); + } + + /** + * Delete all elements from the heap. + */ + @Override + public void clear() { + super.clear(); + for (int i = 0; i < size; i++) { + queue[i] = 0.0; + } + } + + /** + * Compare two objects + */ + abstract protected boolean comp(double o1, double o2); +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMaxHeap.java new file mode 100644 index 00000000..1b7d6037 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMaxHeap.java @@ -0,0 +1,62 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +/** + * Basic in-memory heap structure. + * + * This heap is built lazily: if you first add many elements, then poll the + * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log + * n). This is implemented via a simple validTo counter. + * + * @author Erich Schubert + */ +public class DoubleMaxHeap extends DoubleHeap { + /** + * Constructor with default capacity. + */ + public DoubleMaxHeap() { + super(DEFAULT_INITIAL_CAPACITY); + } + + /** + * Constructor with initial capacity. + * + * @param size initial capacity + */ + public DoubleMaxHeap(int size) { + super(size); + } + + /** + * Compare two objects + * + * @param o1 First object + * @param o2 Second object + */ + @Override + protected boolean comp(double o1, double o2) { + return o1 < o2; + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMinHeap.java new file mode 100644 index 00000000..2ce05ff9 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMinHeap.java @@ -0,0 +1,62 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +/** + * Basic in-memory heap structure. + * + * This heap is built lazily: if you first add many elements, then poll the + * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log + * n). This is implemented via a simple validTo counter. + * + * @author Erich Schubert + */ +public class DoubleMinHeap extends DoubleHeap { + /** + * Constructor with default capacity. + */ + public DoubleMinHeap() { + super(DEFAULT_INITIAL_CAPACITY); + } + + /** + * Constructor with initial capacity. + * + * @param size initial capacity + */ + public DoubleMinHeap(int size) { + super(size); + } + + /** + * Compare two objects + * + * @param o1 First object + * @param o2 Second object + */ + @Override + protected boolean comp(double o1, double o2) { + return o1 > o2; + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjMaxHeap.java new file mode 100644 index 00000000..8417309a --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjMaxHeap.java @@ -0,0 +1,328 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Arrays; +import java.util.Comparator; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + +/** + * Basic in-memory heap structure. + * + * This heap is built lazily: if you first add many elements, then poll the + * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log + * n). This is implemented via a simple validTo counter. + * + * @author Erich Schubert + * + * @param <V> value type + */ +public class DoubleObjMaxHeap<V> { + /** + * Heap storage: keys + */ + protected double[] keys; + + /** + * Heap storage: values + */ + protected Object[] values; + + /** + * Current number of objects + */ + protected int size = 0; + + /** + * Indicate up to where the heap is valid + */ + protected int validSize = 0; + + /** + * (Structural) modification counter. Used to invalidate iterators. + */ + public transient int modCount = 0; + + /** + * Default initial capacity + */ + private static final int DEFAULT_INITIAL_CAPACITY = 11; + + /** + * Default constructor: default capacity, natural ordering. + */ + public DoubleObjMaxHeap() { + this(DEFAULT_INITIAL_CAPACITY); + } + + /** + * Constructor with initial capacity and {@link Comparator}. + * + * @param size initial capacity + */ + public DoubleObjMaxHeap(int size) { + super(); + this.size = 0; + this.keys = new double[size]; + this.values = new Object[size]; + } + + /** + * Add a key-value pair to the heap + * + * @param key Key + * @param val Value + * @return Success code + */ + public boolean add(double key, V val) { + // resize when needed + if(size + 1 > keys.length) { + resize(size + 1); + } + // final int pos = size; + this.keys[size] = key; + this.values[size] = val; + this.size += 1; + heapifyUp(size - 1, key, val); + validSize += 1; + // We have changed - return true according to {@link Collection#put} + modCount++; + return true; + } + + /** + * Get the current top key + * + * @return Top key + */ + public double peekKey() { + if(size == 0) { + throw new ArrayIndexOutOfBoundsException("Peek() on an empty heap!"); + } + ensureValid(); + return keys[0]; + } + + /** + * Get the current top value + * + * @return Value + */ + @SuppressWarnings("unchecked") + public V peekValue() { + if(size == 0) { + throw new ArrayIndexOutOfBoundsException("Peek() on an empty heap!"); + } + ensureValid(); + return (V) values[0]; + } + + /** + * Remove the first element + */ + public void poll() { + removeAt(0); + } + + /** + * Repair the heap + */ + protected void ensureValid() { + if(validSize != size) { + if(size > 1) { + // Parent of first invalid + int nextmin = validSize > 0 ? ((validSize - 1) >>> 1) : 0; + int curmin = MathUtil.nextAllOnesInt(nextmin); // Next line + int nextmax = curmin - 1; // End of valid line + int pos = (size - 2) >>> 1; // Parent of last element + // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin+", "+nextmin); + while(pos >= nextmin) { + // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin); + while(pos >= curmin) { + if(!heapifyDown(pos, keys[pos], values[pos])) { + final int parent = (pos - 1) >>> 1; + if(parent < curmin) { + nextmin = Math.min(nextmin, parent); + nextmax = Math.max(nextmax, parent); + } + } + pos--; + } + curmin = nextmin; + pos = Math.min(pos, nextmax); + nextmax = -1; + } + } + validSize = size; + } + } + + /** + * Remove the element at the given position. + * + * @param pos Element position. + */ + protected void removeAt(int pos) { + if(pos < 0 || pos >= size) { + return; + } + // Replacement object: + final double reinkey = keys[size - 1]; + final Object reinval = values[size - 1]; + values[size - 1] = null; + // Keep heap in sync + if(validSize == size) { + size -= 1; + validSize -= 1; + heapifyDown(pos, reinkey, reinval); + } + else { + size -= 1; + validSize = Math.min(pos >>> 1, validSize); + keys[pos] = reinkey; + values[pos] = reinval; + } + modCount++; + } + + /** + * Execute a "Heapify Upwards" aka "SiftUp". Used in insertions. + * + * @param pos insertion position + * @param curkey Current key + * @param curval Current value + */ + protected void heapifyUp(int pos, double curkey, Object curval) { + while(pos > 0) { + final int parent = (pos - 1) >>> 1; + double parkey = keys[parent]; + + if(curkey <= parkey) { // Compare + break; + } + keys[pos] = parkey; + values[pos] = values[parent]; + pos = parent; + } + keys[pos] = curkey; + values[pos] = curval; + } + + /** + * Execute a "Heapify Downwards" aka "SiftDown". Used in deletions. + * + * @param ipos re-insertion position + * @param curkey Current key + * @param curval Current value + * @return true when the order was changed + */ + protected boolean heapifyDown(final int ipos, double curkey, Object curval) { + int pos = ipos; + final int half = size >>> 1; + while(pos < half) { + // Get left child (must exist!) + int cpos = (pos << 1) + 1; + double chikey = keys[cpos]; + Object chival = values[cpos]; + // Test right child, if present + final int rchild = cpos + 1; + if(rchild < size) { + double right = keys[rchild]; + if(chikey < right) { // Compare + cpos = rchild; + chikey = right; + chival = values[rchild]; + } + } + + if(curkey >= chikey) { // Compare + break; + } + keys[pos] = chikey; + values[pos] = chival; + pos = cpos; + } + keys[pos] = curkey; + values[pos] = curval; + return (pos == ipos); + } + + /** + * Query the size + * + * @return Size + */ + public int size() { + return this.size; + } + + /** + * Test whether we need to resize to have the requested capacity. + * + * @param requiredSize required capacity + */ + protected final void resize(int requiredSize) { + // Double until 64, then increase by 50% each time. + int newCapacity = ((keys.length < 64) ? ((keys.length + 1) << 1) : ((keys.length >> 1) * 3)); + // overflow? + if(newCapacity < 0) { + throw new OutOfMemoryError(); + } + if(requiredSize > newCapacity) { + newCapacity = requiredSize; + } + keys = Arrays.copyOf(keys, newCapacity); + values = Arrays.copyOf(values, newCapacity); + } + + /** + * Delete all elements from the heap. + */ + public void clear() { + // clean up references in the array for memory management + Arrays.fill(values, null); + this.size = 0; + this.validSize = -1; + modCount++; + } + + /** + * Test whether the heap is still valid. + * + * Debug method. + * + * @return {@code null} when the heap is correct + */ + protected String checkHeap() { + ensureValid(); + for(int i = 1; i < size; i++) { + final int parent = (i - 1) >>> 1; + if(keys[parent] < keys[i]) { // Compare + return "@" + parent + ": " + keys[parent] + " < @" + i + ": " + keys[i]; + } + } + return null; + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjMinHeap.java new file mode 100644 index 00000000..244277e8 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjMinHeap.java @@ -0,0 +1,328 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Arrays; +import java.util.Comparator; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + +/** + * Basic in-memory heap structure. + * + * This heap is built lazily: if you first add many elements, then poll the + * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log + * n). This is implemented via a simple validTo counter. + * + * @author Erich Schubert + * + * @param <V> value type + */ +public class DoubleObjMinHeap<V> { + /** + * Heap storage: keys + */ + protected double[] keys; + + /** + * Heap storage: values + */ + protected Object[] values; + + /** + * Current number of objects + */ + protected int size = 0; + + /** + * Indicate up to where the heap is valid + */ + protected int validSize = 0; + + /** + * (Structural) modification counter. Used to invalidate iterators. + */ + public transient int modCount = 0; + + /** + * Default initial capacity + */ + private static final int DEFAULT_INITIAL_CAPACITY = 11; + + /** + * Default constructor: default capacity, natural ordering. + */ + public DoubleObjMinHeap() { + this(DEFAULT_INITIAL_CAPACITY); + } + + /** + * Constructor with initial capacity and {@link Comparator}. + * + * @param size initial capacity + */ + public DoubleObjMinHeap(int size) { + super(); + this.size = 0; + this.keys = new double[size]; + this.values = new Object[size]; + } + + /** + * Add a key-value pair to the heap + * + * @param key Key + * @param val Value + * @return Success code + */ + public boolean add(double key, V val) { + // resize when needed + if(size + 1 > keys.length) { + resize(size + 1); + } + // final int pos = size; + this.keys[size] = key; + this.values[size] = val; + this.size += 1; + heapifyUp(size - 1, key, val); + validSize += 1; + // We have changed - return true according to {@link Collection#put} + modCount++; + return true; + } + + /** + * Get the current top key + * + * @return Top key + */ + public double peekKey() { + if(size == 0) { + throw new ArrayIndexOutOfBoundsException("Peek() on an empty heap!"); + } + ensureValid(); + return keys[0]; + } + + /** + * Get the current top value + * + * @return Value + */ + @SuppressWarnings("unchecked") + public V peekValue() { + if(size == 0) { + throw new ArrayIndexOutOfBoundsException("Peek() on an empty heap!"); + } + ensureValid(); + return (V) values[0]; + } + + /** + * Remove the first element + */ + public void poll() { + removeAt(0); + } + + /** + * Repair the heap + */ + protected void ensureValid() { + if(validSize != size) { + if(size > 1) { + // Parent of first invalid + int nextmin = validSize > 0 ? ((validSize - 1) >>> 1) : 0; + int curmin = MathUtil.nextAllOnesInt(nextmin); // Next line + int nextmax = curmin - 1; // End of valid line + int pos = (size - 2) >>> 1; // Parent of last element + // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin+", "+nextmin); + while(pos >= nextmin) { + // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin); + while(pos >= curmin) { + if(!heapifyDown(pos, keys[pos], values[pos])) { + final int parent = (pos - 1) >>> 1; + if(parent < curmin) { + nextmin = Math.min(nextmin, parent); + nextmax = Math.max(nextmax, parent); + } + } + pos--; + } + curmin = nextmin; + pos = Math.min(pos, nextmax); + nextmax = -1; + } + } + validSize = size; + } + } + + /** + * Remove the element at the given position. + * + * @param pos Element position. + */ + protected void removeAt(int pos) { + if(pos < 0 || pos >= size) { + return; + } + // Replacement object: + final double reinkey = keys[size - 1]; + final Object reinval = values[size - 1]; + values[size - 1] = null; + // Keep heap in sync + if(validSize == size) { + size -= 1; + validSize -= 1; + heapifyDown(pos, reinkey, reinval); + } + else { + size -= 1; + validSize = Math.min(pos >>> 1, validSize); + keys[pos] = reinkey; + values[pos] = reinval; + } + modCount++; + } + + /** + * Execute a "Heapify Upwards" aka "SiftUp". Used in insertions. + * + * @param pos insertion position + * @param curkey Current key + * @param curval Current value + */ + protected void heapifyUp(int pos, double curkey, Object curval) { + while(pos > 0) { + final int parent = (pos - 1) >>> 1; + double parkey = keys[parent]; + + if(curkey >= parkey) { // Compare + break; + } + keys[pos] = parkey; + values[pos] = values[parent]; + pos = parent; + } + keys[pos] = curkey; + values[pos] = curval; + } + + /** + * Execute a "Heapify Downwards" aka "SiftDown". Used in deletions. + * + * @param ipos re-insertion position + * @param curkey Current key + * @param curval Current value + * @return true when the order was changed + */ + protected boolean heapifyDown(final int ipos, double curkey, Object curval) { + int pos = ipos; + final int half = size >>> 1; + while(pos < half) { + // Get left child (must exist!) + int cpos = (pos << 1) + 1; + double chikey = keys[cpos]; + Object chival = values[cpos]; + // Test right child, if present + final int rchild = cpos + 1; + if(rchild < size) { + double right = keys[rchild]; + if(chikey > right) { // Compare + cpos = rchild; + chikey = right; + chival = values[rchild]; + } + } + + if(curkey <= chikey) { // Compare + break; + } + keys[pos] = chikey; + values[pos] = chival; + pos = cpos; + } + keys[pos] = curkey; + values[pos] = curval; + return (pos == ipos); + } + + /** + * Query the size + * + * @return Size + */ + public int size() { + return this.size; + } + + /** + * Test whether we need to resize to have the requested capacity. + * + * @param requiredSize required capacity + */ + protected final void resize(int requiredSize) { + // Double until 64, then increase by 50% each time. + int newCapacity = ((keys.length < 64) ? ((keys.length + 1) << 1) : ((keys.length >> 1) * 3)); + // overflow? + if(newCapacity < 0) { + throw new OutOfMemoryError(); + } + if(requiredSize > newCapacity) { + newCapacity = requiredSize; + } + keys = Arrays.copyOf(keys, newCapacity); + values = Arrays.copyOf(values, newCapacity); + } + + /** + * Delete all elements from the heap. + */ + public void clear() { + // clean up references in the array for memory management + Arrays.fill(values, null); + this.size = 0; + this.validSize = -1; + modCount++; + } + + /** + * Test whether the heap is still valid. + * + * Debug method. + * + * @return {@code null} when the heap is correct + */ + protected String checkHeap() { + ensureValid(); + for(int i = 1; i < size; i++) { + final int parent = (i - 1) >>> 1; + if(keys[parent] > keys[i]) { // Compare + return "@" + parent + ": " + keys[parent] + " < @" + i + ": " + keys[i]; + } + } + return null; + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoublePriorityObject.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoublePriorityObject.java index 976a4d0c..92d548cb 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoublePriorityObject.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoublePriorityObject.java @@ -79,8 +79,9 @@ public class DoublePriorityObject<O> implements PairInterface<Double, O>, Compar } @Override + @Deprecated public Double getFirst() { - return priority; + return Double.valueOf(priority); } @Override @@ -120,8 +121,8 @@ public class DoublePriorityObject<O> implements PairInterface<Double, O>, Compar @Override public String toString() { - StringBuffer buf = new StringBuffer(); - buf.append(priority).append(":").append(object.toString()); + StringBuilder buf = new StringBuilder(); + buf.append(priority).append(':').append(object.toString()); return buf.toString(); } }
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/Heap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/Heap.java index 307a0807..86d3ae08 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/Heap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/Heap.java @@ -23,11 +23,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; along with this program. If not, see <http://www.gnu.org/licenses/>. */ -import java.io.Serializable; -import java.util.AbstractQueue; -import java.util.ArrayList; import java.util.Arrays; -import java.util.Collection; import java.util.Comparator; import java.util.ConcurrentModificationException; import java.util.Iterator; @@ -49,39 +45,34 @@ import de.lmu.ifi.dbs.elki.math.MathUtil; * @param <E> Element type. Should be {@link java.lang.Comparable} or a * {@link java.util.Comparator} needs to be given. */ -public class Heap<E> extends AbstractQueue<E> implements Serializable { +public class Heap<E> implements Iterable<E> { /** - * Serial version - */ - private static final long serialVersionUID = 1L; - - /** - * Heap storage + * Heap storage. */ protected transient Object[] queue; /** - * Current number of objects + * Current number of objects. */ protected int size = 0; /** - * Indicate up to where the heap is valid + * Indicate up to where the heap is valid. */ protected int validSize = 0; /** - * The comparator or {@code null} + * The comparator or {@code null}. */ protected final Comparator<Object> comparator; /** * (Structural) modification counter. Used to invalidate iterators. */ - public transient int modCount = 0; + private transient int modCount = 0; /** - * Default initial capacity + * Default initial capacity. */ private static final int DEFAULT_INITIAL_CAPACITY = 11; @@ -124,16 +115,14 @@ public class Heap<E> extends AbstractQueue<E> implements Serializable { this.comparator = (Comparator<Object>) comparator; } - @Override - public boolean add(E e) { - // Never full - overriding probably slightly faster - return offer(e); - } - - @Override - public boolean offer(E e) { + /** + * Add an element to the heap. + * + * @param e Element to add + */ + public void add(E e) { // resize when needed - if(size + 1 > queue.length) { + if (size + 1 > queue.length) { resize(size + 1); } // final int pos = size; @@ -141,46 +130,69 @@ public class Heap<E> extends AbstractQueue<E> implements Serializable { this.size += 1; heapifyUp(size - 1, e); validSize += 1; - // We have changed - return true according to {@link Collection#put} - modCount++; - return true; + heapModified(); } - @Override + /** + * Combined operation that removes the top element, and inserts a new element + * instead. + * + * @param e New element to insert + * @return Previous top element of the heap + */ + @SuppressWarnings("unchecked") + public E replaceTopElement(E e) { + ensureValid(); + E oldroot = (E) queue[0]; + heapifyDown(0, e); + heapModified(); + return oldroot; + } + + /** + * Peek at the top element. + * + * @return Top element. + */ + @SuppressWarnings("unchecked") public E peek() { - if(size == 0) { + if (size == 0) { return null; } ensureValid(); - return castQueueElement(0); + return (E) queue[0]; } - @Override + /** + * Remove the top element. + * + * @return Top element. + */ public E poll() { ensureValid(); return removeAt(0); } /** - * Repair the heap + * Perform pending heap repair operations in a single bulk operation. */ protected void ensureValid() { - if(validSize != size) { - if(size > 1) { + if (validSize != size) { + if (size > 1) { // Bottom up heap update. - if(comparator != null) { + if (comparator != null) { // Parent of first invalid int nextmin = validSize > 0 ? ((validSize - 1) >>> 1) : 0; int curmin = MathUtil.nextAllOnesInt(nextmin); // Next line int nextmax = curmin - 1; // End of valid line int pos = (size - 2) >>> 1; // Parent of last element // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin+", "+nextmin); - while(pos >= nextmin) { + while (pos >= nextmin) { // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin); - while(pos >= curmin) { - if(!heapifyDownComparator(pos, queue[pos])) { + while (pos >= curmin) { + if (!heapifyDownComparator(pos, queue[pos])) { final int parent = (pos - 1) >>> 1; - if(parent < curmin) { + if (parent < curmin) { nextmin = Math.min(nextmin, parent); nextmax = Math.max(nextmax, parent); } @@ -191,20 +203,19 @@ public class Heap<E> extends AbstractQueue<E> implements Serializable { pos = Math.min(pos, nextmax); nextmax = -1; } - } - else { + } else { // Parent of first invalid int nextmin = validSize > 0 ? ((validSize - 1) >>> 1) : 0; int curmin = MathUtil.nextAllOnesInt(nextmin); // Next line int nextmax = curmin - 1; // End of valid line int pos = (size - 2) >>> 1; // Parent of last element // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin+", "+nextmin); - while(pos >= nextmin) { + while (pos >= nextmin) { // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin); - while(pos >= curmin) { - if(!heapifyDownComparable(pos, queue[pos])) { + while (pos >= curmin) { + if (!heapifyDownComparable(pos, queue[pos])) { final int parent = (pos - 1) >>> 1; - if(parent < curmin) { + if (parent < curmin) { nextmin = Math.min(nextmin, parent); nextmax = Math.max(nextmax, parent); } @@ -225,27 +236,28 @@ public class Heap<E> extends AbstractQueue<E> implements Serializable { * Remove the element at the given position. * * @param pos Element position. + * @return Element that was at this position. */ + @SuppressWarnings("unchecked") protected E removeAt(int pos) { - if(pos < 0 || pos >= size) { + if (pos < 0 || pos >= size) { return null; } - final E ret = castQueueElement(pos); + final E ret = (E) queue[pos]; // Replacement object: final Object reinsert = queue[size - 1]; queue[size - 1] = null; // Keep heap in sync - if(validSize == size) { + if (validSize == size) { size -= 1; validSize -= 1; heapifyDown(pos, reinsert); - } - else { + } else { size -= 1; validSize = Math.min(pos >>> 1, validSize); queue[pos] = reinsert; } - modCount++; + heapModified(); return ret; } @@ -257,10 +269,9 @@ public class Heap<E> extends AbstractQueue<E> implements Serializable { */ protected void heapifyUp(int pos, E elem) { assert (pos < size && pos >= 0); - if(comparator != null) { + if (comparator != null) { heapifyUpComparator(pos, elem); - } - else { + } else { heapifyUpComparable(pos, elem); } } @@ -274,11 +285,11 @@ public class Heap<E> extends AbstractQueue<E> implements Serializable { @SuppressWarnings("unchecked") protected void heapifyUpComparable(int pos, Object elem) { final Comparable<Object> cur = (Comparable<Object>) elem; // queue[pos]; - while(pos > 0) { + while (pos > 0) { final int parent = (pos - 1) >>> 1; Object par = queue[parent]; - if(cur.compareTo(par) >= 0) { + if (cur.compareTo(par) >= 0) { break; } queue[pos] = par; @@ -294,11 +305,11 @@ public class Heap<E> extends AbstractQueue<E> implements Serializable { * @param cur Element to insert */ protected void heapifyUpComparator(int pos, Object cur) { - while(pos > 0) { + while (pos > 0) { final int parent = (pos - 1) >>> 1; Object par = queue[parent]; - if(comparator.compare(cur, par) >= 0) { + if (comparator.compare(cur, par) >= 0) { break; } queue[pos] = par; @@ -316,10 +327,9 @@ public class Heap<E> extends AbstractQueue<E> implements Serializable { */ protected boolean heapifyDown(int pos, Object reinsert) { assert (pos >= 0); - if(comparator != null) { + if (comparator != null) { return heapifyDownComparator(pos, reinsert); - } - else { + } else { return heapifyDownComparable(pos, reinsert); } } @@ -328,6 +338,7 @@ public class Heap<E> extends AbstractQueue<E> implements Serializable { * Execute a "Heapify Downwards" aka "SiftDown". Used in deletions. * * @param ipos re-insertion position + * @param reinsert Object to reinsert * @return true when the order was changed */ @SuppressWarnings("unchecked") @@ -335,21 +346,21 @@ public class Heap<E> extends AbstractQueue<E> implements Serializable { Comparable<Object> cur = (Comparable<Object>) reinsert; int pos = ipos; final int half = size >>> 1; - while(pos < half) { + while (pos < half) { // Get left child (must exist!) int cpos = (pos << 1) + 1; Object child = queue[cpos]; // Test right child, if present final int rchild = cpos + 1; - if(rchild < size) { + if (rchild < size) { Object right = queue[rchild]; - if(((Comparable<Object>) child).compareTo(right) > 0) { + if (((Comparable<Object>) child).compareTo(right) > 0) { cpos = rchild; child = right; } } - if(cur.compareTo(child) <= 0) { + if (cur.compareTo(child) <= 0) { break; } queue[pos] = child; @@ -363,30 +374,31 @@ public class Heap<E> extends AbstractQueue<E> implements Serializable { * Execute a "Heapify Downwards" aka "SiftDown". Used in deletions. * * @param ipos re-insertion position + * @param cur Object to reinsert * @return true when the order was changed */ protected boolean heapifyDownComparator(final int ipos, Object cur) { int pos = ipos; final int half = size >>> 1; - while(pos < half) { + while (pos < half) { int min = pos; Object best = cur; final int lchild = (pos << 1) + 1; Object left = queue[lchild]; - if(comparator.compare(best, left) > 0) { + if (comparator.compare(best, left) > 0) { min = lchild; best = left; } final int rchild = lchild + 1; - if(rchild < size) { + if (rchild < size) { Object right = queue[rchild]; - if(comparator.compare(best, right) > 0) { + if (comparator.compare(best, right) > 0) { min = rchild; best = right; } } - if(min == pos) { + if (min == pos) { break; } queue[pos] = best; @@ -396,56 +408,53 @@ public class Heap<E> extends AbstractQueue<E> implements Serializable { return (pos == ipos); } - @SuppressWarnings("unchecked") - protected final E castQueueElement(int n) { - return (E) queue[n]; - } - - @Override + /** + * Get the heap size. + * + * @return Heap size + */ public int size() { return this.size; } /** + * Test for emptiness. + * + * @return true when the heap is empty + */ + public boolean isEmpty() { + return size == 0; + } + + /** * Test whether we need to resize to have the requested capacity. * * @param requiredSize required capacity */ protected final void resize(int requiredSize) { // Double until 64, then increase by 50% each time. - int newCapacity = ((queue.length < 64) ? ((queue.length + 1) * 2) : ((queue.length / 2) * 3)); + int newCapacity = ((queue.length < 64) ? ((queue.length + 1) << 1) : ((queue.length >> 1) * 3)); // overflow? - if(newCapacity < 0) { + if (newCapacity < 0) { throw new OutOfMemoryError(); } - if(requiredSize > newCapacity) { + if (requiredSize > newCapacity) { newCapacity = requiredSize; } queue = Arrays.copyOf(queue, newCapacity); } - @Override + /** + * Clear the heap. + */ public void clear() { // clean up references in the array for memory management - for(int i = 0; i < size; i++) { + for (int i = 0; i < size; i++) { queue[i] = null; } this.size = 0; this.validSize = -1; - modCount++; - } - - @Override - public boolean contains(Object o) { - if(o != null) { - // TODO: use order to reduce search space? - for(int i = 0; i < size; i++) { - if(o.equals(queue[i])) { - return true; - } - } - } - return false; + heapModified(); } @Override @@ -453,19 +462,11 @@ public class Heap<E> extends AbstractQueue<E> implements Serializable { return new Itr(); } - @Override - public boolean addAll(Collection<? extends E> c) { - final int addsize = c.size(); - if(addsize <= 0) { - return false; - } - if(size + addsize > queue.length) { - resize(size + addsize); - } - for(E elem : c) { - add(elem); - } - return true; + /** + * Called at the end of each heap modification. + */ + protected void heapModified() { + modCount++; } /** @@ -477,7 +478,7 @@ public class Heap<E> extends AbstractQueue<E> implements Serializable { */ protected final class Itr implements Iterator<E> { /** - * Cursor position + * Cursor position. */ private int cursor = 0; @@ -491,26 +492,26 @@ public class Heap<E> extends AbstractQueue<E> implements Serializable { return cursor < size; } + @SuppressWarnings("unchecked") @Override public E next() { - if(expectedModCount != modCount) { + if (expectedModCount != modCount) { throw new ConcurrentModificationException(); } - if(cursor < size) { - return castQueueElement(cursor++); + if (cursor < size) { + return (E) queue[cursor++]; } throw new NoSuchElementException(); } @Override public void remove() { - if(expectedModCount != modCount) { + if (expectedModCount != modCount) { throw new ConcurrentModificationException(); } - if(cursor > 0) { + if (cursor > 0) { cursor--; - } - else { + } else { throw new IllegalStateException(); } expectedModCount = modCount; @@ -518,20 +519,6 @@ public class Heap<E> extends AbstractQueue<E> implements Serializable { } /** - * Return the heap as a sorted array list, by repeated polling. This will - * empty the heap! - * - * @return new array list - */ - public ArrayList<E> toSortedArrayList() { - ArrayList<E> ret = new ArrayList<E>(size()); - while(!isEmpty()) { - ret.add(poll()); - } - return ret; - } - - /** * Test whether the heap is still valid. * * Debug method. @@ -540,24 +527,23 @@ public class Heap<E> extends AbstractQueue<E> implements Serializable { */ protected String checkHeap() { ensureValid(); - if(comparator == null) { - for(int i = 1; i < size; i++) { + if (comparator == null) { + for (int i = 1; i < size; i++) { final int parent = (i - 1) >>> 1; @SuppressWarnings("unchecked") Comparable<Object> po = (Comparable<Object>) queue[parent]; - if(po.compareTo(queue[i]) > 0) { + if (po.compareTo(queue[i]) > 0) { return "@" + parent + ": " + queue[parent] + " < @" + i + ": " + queue[i]; } } - } - else { - for(int i = 1; i < size; i++) { + } else { + for (int i = 1; i < size; i++) { final int parent = (i - 1) >>> 1; - if(comparator.compare(queue[parent], queue[i]) > 0) { + if (comparator.compare(queue[parent], queue[i]) > 0) { return "@" + parent + ": " + queue[parent] + " < @" + i + ": " + queue[i]; } } } return null; } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerHeap.java new file mode 100644 index 00000000..6203ad96 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerHeap.java @@ -0,0 +1,264 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Arrays; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + +/** + * Basic in-memory heap structure. + * + * This heap is built lazily: if you first add many elements, then poll the + * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log + * n). This is implemented via a simple validTo counter. + * + * @author Erich Schubert + */ +public abstract class IntegerHeap extends AbstractHeap { + /** + * Heap storage: queue + */ + protected transient int[] queue; + + /** + * Constructor with initial capacity. + * + * @param size initial capacity + */ + public IntegerHeap(int size) { + super(); + this.size = 0; + this.queue = new int[size]; + } + + /** + * Add a key-value pair to the heap + * + * @param key Key + */ + public void add(int key) { + // resize when needed + if (size + 1 > queue.length) { + resize(size + 1); + } + // final int pos = size; + this.queue[size] = key; + this.size += 1; + heapifyUp(size - 1, key); + validSize += 1; + heapModified(); + } + + /** + * Add a key-value pair to the heap, except if the new element is larger than + * the top, and we are at design size (overflow) + * + * @param key Key + * @param max Maximum size of heap + */ + public void add(int key, int max) { + if (size < max) { + add(key); + } else if (comp(key, peek())) { + replaceTopElement(key); + } + } + + /** + * Combined operation that removes the top element, and inserts a new element + * instead. + * + * @param e New element to insert + * @return Previous top element of the heap + */ + public int replaceTopElement(int e) { + ensureValid(); + int oldroot = queue[0]; + heapifyDown(0, e); + heapModified(); + return oldroot; + } + + /** + * Get the current top key + * + * @return Top key + */ + public int peek() { + if (size == 0) { + throw new ArrayIndexOutOfBoundsException("Peek() on an empty heap!"); + } + ensureValid(); + return queue[0]; + } + + /** + * Remove the first element + * + * @return Top element + */ + public int poll() { + return removeAt(0); + } + + /** + * Repair the heap + */ + protected void ensureValid() { + if (validSize != size) { + if (size > 1) { + // Parent of first invalid + int nextmin = validSize > 0 ? ((validSize - 1) >>> 1) : 0; + int curmin = MathUtil.nextAllOnesInt(nextmin); // Next line + int nextmax = curmin - 1; // End of valid line + int pos = (size - 2) >>> 1; // Parent of last element + // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin+", "+nextmin); + while (pos >= nextmin) { + // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin); + while (pos >= curmin) { + if (!heapifyDown(pos, queue[pos])) { + final int parent = (pos - 1) >>> 1; + if (parent < curmin) { + nextmin = Math.min(nextmin, parent); + nextmax = Math.max(nextmax, parent); + } + } + pos--; + } + curmin = nextmin; + pos = Math.min(pos, nextmax); + nextmax = -1; + } + } + validSize = size; + } + } + + /** + * Remove the element at the given position. + * + * @param pos Element position. + * @return Removed element + */ + protected int removeAt(int pos) { + if (pos < 0 || pos >= size) { + return 0; + } + final int top = queue[0]; + // Replacement object: + final int reinkey = queue[size - 1]; + // Keep heap in sync + if (validSize == size) { + size -= 1; + validSize -= 1; + heapifyDown(pos, reinkey); + } else { + size -= 1; + validSize = Math.min(pos >>> 1, validSize); + queue[pos] = reinkey; + } + heapModified(); + return top; + } + + /** + * Execute a "Heapify Upwards" aka "SiftUp". Used in insertions. + * + * @param pos insertion position + * @param curkey Current key + */ + protected void heapifyUp(int pos, int curkey) { + while (pos > 0) { + final int parent = (pos - 1) >>> 1; + int parkey = queue[parent]; + + if (comp(curkey, parkey)) { // Compare + break; + } + queue[pos] = parkey; + pos = parent; + } + queue[pos] = curkey; + } + + /** + * Execute a "Heapify Downwards" aka "SiftDown". Used in deletions. + * + * @param ipos re-insertion position + * @param curkey Current key + * @return true when the order was changed + */ + protected boolean heapifyDown(final int ipos, int curkey) { + int pos = ipos; + final int half = size >>> 1; + while (pos < half) { + // Get left child (must exist!) + int cpos = (pos << 1) + 1; + int chikey = queue[cpos]; + // Test right child, if present + final int rchild = cpos + 1; + if (rchild < size) { + int right = queue[rchild]; + if (comp(chikey, right)) { // Compare + cpos = rchild; + chikey = right; + } + } + + if (comp(chikey, curkey)) { // Compare + break; + } + queue[pos] = chikey; + pos = cpos; + } + queue[pos] = curkey; + return (pos == ipos); + } + + /** + * Test whether we need to resize to have the requested capacity. + * + * @param requiredSize required capacity + */ + protected final void resize(int requiredSize) { + queue = Arrays.copyOf(queue, desiredSize(requiredSize, queue.length)); + } + + /** + * Delete all elements from the heap. + */ + @Override + public void clear() { + super.clear(); + for (int i = 0; i < size; i++) { + queue[i] = 0; + } + } + + /** + * Compare two objects + */ + abstract protected boolean comp(int o1, int o2); +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMaxHeap.java new file mode 100644 index 00000000..383eb727 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMaxHeap.java @@ -0,0 +1,62 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +/** + * Basic in-memory heap structure. + * + * This heap is built lazily: if you first add many elements, then poll the + * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log + * n). This is implemented via a simple validTo counter. + * + * @author Erich Schubert + */ +public class IntegerMaxHeap extends IntegerHeap { + /** + * Constructor with default capacity. + */ + public IntegerMaxHeap() { + super(DEFAULT_INITIAL_CAPACITY); + } + + /** + * Constructor with initial capacity. + * + * @param size initial capacity + */ + public IntegerMaxHeap(int size) { + super(size); + } + + /** + * Compare two objects + * + * @param o1 First object + * @param o2 Second object + */ + @Override + protected boolean comp(int o1, int o2) { + return o1 < o2; + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMinHeap.java new file mode 100644 index 00000000..f81fe275 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMinHeap.java @@ -0,0 +1,62 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +/** + * Basic in-memory heap structure. + * + * This heap is built lazily: if you first add many elements, then poll the + * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log + * n). This is implemented via a simple validTo counter. + * + * @author Erich Schubert + */ +public class IntegerMinHeap extends IntegerHeap { + /** + * Constructor with default capacity. + */ + public IntegerMinHeap() { + super(DEFAULT_INITIAL_CAPACITY); + } + + /** + * Constructor with initial capacity. + * + * @param size initial capacity + */ + public IntegerMinHeap(int size) { + super(size); + } + + /** + * Compare two objects + * + * @param o1 First object + * @param o2 Second object + */ + @Override + protected boolean comp(int o1, int o2) { + return o1 > o2; + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerPriorityObject.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerPriorityObject.java index 9dd941ec..2014de65 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerPriorityObject.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerPriorityObject.java @@ -79,8 +79,9 @@ public class IntegerPriorityObject<O> implements PairInterface<Integer, O>, Comp } @Override + @Deprecated public Integer getFirst() { - return priority; + return Integer.valueOf(priority); } @Override @@ -120,8 +121,8 @@ public class IntegerPriorityObject<O> implements PairInterface<Integer, O>, Comp @Override public String toString() { - StringBuffer buf = new StringBuffer(); - buf.append(priority).append(":").append(object.toString()); + StringBuilder buf = new StringBuilder(); + buf.append(priority).append(':').append(object.toString()); return buf.toString(); } }
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/KNNHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/KNNHeap.java deleted file mode 100644 index 6c59123b..00000000 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/KNNHeap.java +++ /dev/null @@ -1,153 +0,0 @@ -package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; - -/* - This file is part of ELKI: - Environment for Developing KDD-Applications Supported by Index-Structures - - Copyright (C) 2012 - Ludwig-Maximilians-Universität München - Lehr- und Forschungseinheit für Datenbanksysteme - ELKI Development Team - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; - -import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; -import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair; -import de.lmu.ifi.dbs.elki.database.query.GenericDistanceResultPair; -import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; - -/** - * Heap used for KNN management. - * - * @author Erich Schubert - * - * @apiviz.has KNNList oneway - - serializes to - * - * @param <D> distance type - */ -public class KNNHeap<D extends Distance<D>> extends TiedTopBoundedHeap<DistanceResultPair<D>> { - /** - * Serial version - */ - private static final long serialVersionUID = 1L; - - /** - * Maximum distance, usually infiniteDistance - */ - private final D maxdist; - - /** - * Constructor. - * - * @param k k Parameter - * @param maxdist k-distance to return for less than k neighbors - usually - * infiniteDistance - */ - public KNNHeap(int k, D maxdist) { - super(k, new Comp<D>()); - this.maxdist = maxdist; - } - - /** - * Simplified constructor. Will return {@code null} as kNN distance with less - * than k entries. - * - * @param k k Parameter - */ - public KNNHeap(int k) { - this(k, null); - } - - @Override - public ArrayList<DistanceResultPair<D>> toSortedArrayList() { - ArrayList<DistanceResultPair<D>> list = super.toSortedArrayList(); - Collections.reverse(list); - return list; - } - - /** - * Serialize to a {@link KNNList}. This empties the heap! - * - * @return KNNList with the heaps contents. - */ - public KNNList<D> toKNNList() { - return new KNNList<D>(this); - } - - /** - * Get the K parameter ("maxsize" internally). - * - * @return K - */ - public int getK() { - return super.getMaxSize(); - } - - /** - * Get the distance to the k nearest neighbor, or maxdist otherwise. - * - * @return Maximum distance - */ - public D getKNNDistance() { - if(size() < getK()) { - return maxdist; - } - return peek().getDistance(); - } - - /** - * Get maximum distance in heap - */ - public D getMaximumDistance() { - if(isEmpty()) { - return maxdist; - } - return peek().getDistance(); - } - - /** - * Add a distance-id pair to the heap unless the distance is too large. - * - * Compared to the super.add() method, this often saves the pair construction. - * - * @param distance Distance value - * @param id ID number - * @return success code - */ - public boolean add(D distance, DBIDRef id) { - if(size() < maxsize || peek().getDistance().compareTo(distance) >= 0) { - return super.add(new GenericDistanceResultPair<D>(distance, id.getDBID())); - } - return true; /* "success" */ - } - - /** - * Comparator to use. - * - * @author Erich Schubert - * - * @apiviz.exclude - */ - public static class Comp<D extends Distance<D>> implements Comparator<DistanceResultPair<D>> { - @Override - public int compare(DistanceResultPair<D> o1, DistanceResultPair<D> o2) { - return -o1.compareByDistance(o2); - } - } -}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/KNNList.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/KNNList.java deleted file mode 100644 index 3e6ecc9d..00000000 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/KNNList.java +++ /dev/null @@ -1,181 +0,0 @@ -package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; - -/* - This file is part of ELKI: - Environment for Developing KDD-Applications Supported by Index-Structures - - Copyright (C) 2012 - Ludwig-Maximilians-Universität München - Lehr- und Forschungseinheit für Datenbanksysteme - ELKI Development Team - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -import java.util.AbstractList; -import java.util.Iterator; -import java.util.List; -import java.util.Queue; - -import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs; -import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair; -import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult; -import de.lmu.ifi.dbs.elki.database.query.knn.KNNUtil; -import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; - -/** - * Finalized KNN List. - * - * @author Erich Schubert - * - * @param <D> Distance type - */ -public class KNNList<D extends Distance<D>> extends AbstractList<DistanceResultPair<D>> implements KNNResult<D> { - /** - * The value of k this was materialized for. - */ - private final int k; - - /** - * The actual data array. - */ - private final Object[] data; - - /** - * Constructor, to be called from KNNHeap only. Use {@link KNNHeap#toKNNList} - * instead! - * - * @param heap Calling heap - */ - protected KNNList(KNNHeap<D> heap) { - super(); - this.data = new Object[heap.size()]; - this.k = heap.getK(); - assert(heap.size() >= this.k) : "Heap doesn't contain enough objects!"; - // Get sorted data from heap; but in reverse. - int i = heap.size(); - while(!heap.isEmpty()) { - i--; - assert (i >= 0); - data[i] = heap.poll(); - } - assert (data.length == 0 || data[0] != null); - assert (heap.size() == 0); - } - - /** - * Constructor. With a KNNHeap, use {@link KNNHeap#toKNNList} instead! - * - * @param heap Calling heap - * @param k K value - */ - public KNNList(Queue<D> heap, int k) { - super(); - this.data = new Object[heap.size()]; - this.k = k; - assert(heap.size() >= this.k) : "Heap doesn't contain enough objects!"; - // Get sorted data from heap; but in reverse. - int i = heap.size(); - while(!heap.isEmpty()) { - i--; - assert (i >= 0); - data[i] = heap.poll(); - } - assert (data.length == 0 || data[0] != null); - assert (heap.size() == 0); - } - - @Override - public int getK() { - return k; - } - - @Override - public D getKNNDistance() { - return get(getK() - 1).getDistance(); - } - - @Override - public ArrayDBIDs asDBIDs() { - return KNNUtil.asDBIDs(this); - } - - @Override - public List<D> asDistanceList() { - return KNNUtil.asDistanceList(this); - } - - @Override - public String toString() { - StringBuffer buf = new StringBuffer(); - buf.append("kNNList["); - Iterator<DistanceResultPair<D>> iter = this.iterator(); - while(iter.hasNext()) { - DistanceResultPair<D> pair = iter.next(); - buf.append(pair.getDistance()).append(":").append(pair.getDBID()); - if(iter.hasNext()) { - buf.append(","); - } - } - buf.append("]"); - return buf.toString(); - } - - @SuppressWarnings("unchecked") - @Override - public DistanceResultPair<D> get(int index) { - return (DistanceResultPair<D>) data[index]; - } - - @Override - public Iterator<DistanceResultPair<D>> iterator() { - return new Itr(); - } - - @Override - public int size() { - return data.length; - } - - /** - * Iterator - * - * @author Erich Schubert - * - * @apiviz.exclude - */ - private class Itr implements Iterator<DistanceResultPair<D>> { - /** - * Cursor position - */ - private int pos = -1; - - @Override - public boolean hasNext() { - return pos + 1 < data.length; - } - - @SuppressWarnings("unchecked") - @Override - public DistanceResultPair<D> next() { - pos++; - return (DistanceResultPair<D>) data[pos]; - } - - @Override - public void remove() { - throw new UnsupportedOperationException("kNN results are unmodifiable."); - } - } -}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ObjectHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ObjectHeap.java new file mode 100644 index 00000000..2e20ed56 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ObjectHeap.java @@ -0,0 +1,267 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Arrays; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + +/** + * Basic in-memory heap structure. + * + * This heap is built lazily: if you first add many elements, then poll the + * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log + * n). This is implemented via a simple validTo counter. + * + * @author Erich Schubert + */ +public abstract class ObjectHeap<K> extends AbstractHeap { + /** + * Heap storage: queue + */ + protected transient Object[] queue; + + /** + * Constructor with initial capacity. + * + * @param size initial capacity + */ + public ObjectHeap(int size) { + super(); + this.size = 0; + this.queue = new Object[size]; + } + + /** + * Add a key-value pair to the heap + * + * @param key Key + */ + public void add(Object key) { + // resize when needed + if (size + 1 > queue.length) { + resize(size + 1); + } + // final int pos = size; + this.queue[size] = key; + this.size += 1; + heapifyUp(size - 1, key); + validSize += 1; + heapModified(); + } + + /** + * Add a key-value pair to the heap, except if the new element is larger than + * the top, and we are at design size (overflow) + * + * @param key Key + * @param max Maximum size of heap + */ + public void add(Object key, int max) { + if (size < max) { + add(key); + } else if (comp(key, peek())) { + replaceTopElement(key); + } + } + + /** + * Combined operation that removes the top element, and inserts a new element + * instead. + * + * @param e New element to insert + * @return Previous top element of the heap + */ + @SuppressWarnings("unchecked") + public Object replaceTopElement(Object e) { + ensureValid(); + Object oldroot = (K) queue[0]; + heapifyDown(0, e); + heapModified(); + return oldroot; + } + + /** + * Get the current top key + * + * @return Top key + */ + @SuppressWarnings("unchecked") + public Object peek() { + if (size == 0) { + throw new ArrayIndexOutOfBoundsException("Peek() on an empty heap!"); + } + ensureValid(); + return (K) queue[0]; + } + + /** + * Remove the first element + * + * @return Top element + */ + public Object poll() { + return removeAt(0); + } + + /** + * Repair the heap + */ + protected void ensureValid() { + if (validSize != size) { + if (size > 1) { + // Parent of first invalid + int nextmin = validSize > 0 ? ((validSize - 1) >>> 1) : 0; + int curmin = MathUtil.nextAllOnesInt(nextmin); // Next line + int nextmax = curmin - 1; // End of valid line + int pos = (size - 2) >>> 1; // Parent of last element + // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin+", "+nextmin); + while (pos >= nextmin) { + // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin); + while (pos >= curmin) { + if (!heapifyDown(pos, queue[pos])) { + final int parent = (pos - 1) >>> 1; + if (parent < curmin) { + nextmin = Math.min(nextmin, parent); + nextmax = Math.max(nextmax, parent); + } + } + pos--; + } + curmin = nextmin; + pos = Math.min(pos, nextmax); + nextmax = -1; + } + } + validSize = size; + } + } + + /** + * Remove the element at the given position. + * + * @param pos Element position. + * @return Removed element + */ + @SuppressWarnings("unchecked") + protected Object removeAt(int pos) { + if (pos < 0 || pos >= size) { + return null; + } + final Object top = (K) queue[0]; + // Replacement object: + final Object reinkey = queue[size - 1]; + // Keep heap in sync + if (validSize == size) { + size -= 1; + validSize -= 1; + heapifyDown(pos, reinkey); + } else { + size -= 1; + validSize = Math.min(pos >>> 1, validSize); + queue[pos] = reinkey; + } + heapModified(); + return top; + } + + /** + * Execute a "Heapify Upwards" aka "SiftUp". Used in insertions. + * + * @param pos insertion position + * @param curkey Current key + */ + protected void heapifyUp(int pos, Object curkey) { + while (pos > 0) { + final int parent = (pos - 1) >>> 1; + Object parkey = queue[parent]; + + if (comp(curkey, parkey)) { // Compare + break; + } + queue[pos] = parkey; + pos = parent; + } + queue[pos] = curkey; + } + + /** + * Execute a "Heapify Downwards" aka "SiftDown". Used in deletions. + * + * @param ipos re-insertion position + * @param curkey Current key + * @return true when the order was changed + */ + protected boolean heapifyDown(final int ipos, Object curkey) { + int pos = ipos; + final int half = size >>> 1; + while (pos < half) { + // Get left child (must exist!) + int cpos = (pos << 1) + 1; + Object chikey = queue[cpos]; + // Test right child, if present + final int rchild = cpos + 1; + if (rchild < size) { + Object right = queue[rchild]; + if (comp(chikey, right)) { // Compare + cpos = rchild; + chikey = right; + } + } + + if (comp(chikey, curkey)) { // Compare + break; + } + queue[pos] = chikey; + pos = cpos; + } + queue[pos] = curkey; + return (pos == ipos); + } + + /** + * Test whether we need to resize to have the requested capacity. + * + * @param requiredSize required capacity + */ + protected final void resize(int requiredSize) { + queue = Arrays.copyOf(queue, desiredSize(requiredSize, queue.length)); + } + + /** + * Delete all elements from the heap. + */ + @Override + public void clear() { + super.clear(); + for (int i = 0; i < size; i++) { + queue[i] = null; + } + } + + /** + * Compare two objects + */ + abstract protected boolean comp(Object o1, Object o2); +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TiedTopBoundedHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TiedTopBoundedHeap.java index c0ce3acf..2daaafa4 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TiedTopBoundedHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TiedTopBoundedHeap.java @@ -41,11 +41,6 @@ import de.lmu.ifi.dbs.elki.utilities.iterator.MergedIterator; */ public class TiedTopBoundedHeap<E> extends TopBoundedHeap<E> { /** - * Serial version - */ - private static final long serialVersionUID = 1L; - - /** * List to keep ties in. */ private List<E> ties = new ArrayList<E>(); @@ -80,11 +75,6 @@ public class TiedTopBoundedHeap<E> extends TopBoundedHeap<E> { ties.clear(); } - @Override - public boolean contains(Object o) { - return ties.contains(o) || super.contains(o); - } - @SuppressWarnings("unchecked") @Override public Iterator<E> iterator() { @@ -93,45 +83,52 @@ public class TiedTopBoundedHeap<E> extends TopBoundedHeap<E> { @Override public E peek() { - if(ties.isEmpty()) { + if (ties.isEmpty()) { return super.peek(); - } - else { + } else { return ties.get(ties.size() - 1); } } @Override public E poll() { - if(ties.isEmpty()) { + if (ties.isEmpty()) { return super.poll(); - } - else { + } else { return ties.remove(ties.size() - 1); } } @Override + public E replaceTopElement(E e) { + if (ties.isEmpty()) { + return super.replaceTopElement(e); + } + // Fall back to classic emulation via poll and offer: + E prev = poll(); + add(e); + return prev; + } + + @Override protected void handleOverflow(E e) { boolean tied = false; - if(comparator == null) { + if (comparator == null) { @SuppressWarnings("unchecked") Comparable<Object> c = (Comparable<Object>) e; - if(c.compareTo(queue[0]) == 0) { + if (c.compareTo(queue[0]) == 0) { tied = true; } - } - else { - if(comparator.compare(e, queue[0]) == 0) { + } else { + if (comparator.compare(e, queue[0]) == 0) { tied = true; } } - if(tied) { + if (tied) { ties.add(e); - } - else { + } else { // Also remove old ties. ties.clear(); } } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TiedTopBoundedUpdatableHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TiedTopBoundedUpdatableHeap.java index 4fbc852e..8e39af1d 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TiedTopBoundedUpdatableHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TiedTopBoundedUpdatableHeap.java @@ -42,11 +42,6 @@ import de.lmu.ifi.dbs.elki.utilities.iterator.MergedIterator; */ public class TiedTopBoundedUpdatableHeap<E> extends TopBoundedUpdatableHeap<E> { /** - * Serial version - */ - private static final long serialVersionUID = 1L; - - /** * List to keep ties in. */ private List<E> ties = new ArrayList<E>(); @@ -81,11 +76,6 @@ public class TiedTopBoundedUpdatableHeap<E> extends TopBoundedUpdatableHeap<E> { ties.clear(); } - @Override - public boolean contains(Object o) { - return ties.contains(o) || super.contains(o); - } - @SuppressWarnings("unchecked") @Override public Iterator<E> iterator() { @@ -93,7 +83,7 @@ public class TiedTopBoundedUpdatableHeap<E> extends TopBoundedUpdatableHeap<E> { } @Override - public boolean offerAt(int pos, E e) { + public void offerAt(int pos, E e) { if(pos == IN_TIES) { for(Iterator<E> i = ties.iterator(); i.hasNext();) { E e2 = i.next(); @@ -102,8 +92,7 @@ public class TiedTopBoundedUpdatableHeap<E> extends TopBoundedUpdatableHeap<E> { i.remove(); index.remove(e2); } - // while we did not change, this still was "successful". - return true; + return; } } throw new AbortException("Heap corrupt - should not be reached"); @@ -117,9 +106,9 @@ public class TiedTopBoundedUpdatableHeap<E> extends TopBoundedUpdatableHeap<E> { final E e2 = ties.remove(ties.size() - 1); // index.remove(e2); super.offerAt(NO_VALUE, e2); - return true; + return; } - return super.offerAt(pos, e); + super.offerAt(pos, e); } @Override diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TopBoundedHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TopBoundedHeap.java index 5b61e6f3..07b595f6 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TopBoundedHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TopBoundedHeap.java @@ -36,12 +36,7 @@ import java.util.Comparator; */ public class TopBoundedHeap<E> extends Heap<E> { /** - * Serial version - */ - private static final long serialVersionUID = 1L; - - /** - * Maximum size + * Maximum size. */ protected int maxsize; @@ -67,31 +62,32 @@ public class TopBoundedHeap<E> extends Heap<E> { } @Override - public boolean offer(E e) { - // don't add if we hit maxsize and are worse - if(super.size() >= maxsize) { - ensureValid(); - if(comparator == null) { - @SuppressWarnings("unchecked") - Comparable<Object> c = (Comparable<Object>) e; - if(c.compareTo(queue[0]) < 0) { - // while we did not change, this still was "successful". - return true; - } - } - else { - if(comparator.compare(e, queue[0]) < 0) { - // while we did not change, this still was "successful". - return true; - } - } + public void add(E e) { + if (super.size() < maxsize) { + // Just offer. + super.add(e); + return; } - boolean result = super.offer(e); - // purge unneeded entry(s) - while(super.size() > maxsize) { - handleOverflow(super.poll()); + // Peek at the top element, return if we are worse. + ensureValid(); + final int comp; + if (comparator == null) { + @SuppressWarnings("unchecked") + Comparable<Object> c = (Comparable<Object>) e; + comp = c.compareTo(queue[0]); + } else { + comp = comparator.compare(e, queue[0]); + } + if (comp < 0) { + return; + } + if (comp == 0) { + handleOverflow(e); + } else { + // Otherwise, replace and repair: + E prev = super.replaceTopElement(e); + handleOverflow(prev); } - return result; } /** @@ -105,9 +101,11 @@ public class TopBoundedHeap<E> extends Heap<E> { } /** + * Get the maximum size. + * * @return the maximum size */ public int getMaxSize() { return maxsize; } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TopBoundedUpdatableHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TopBoundedUpdatableHeap.java index 2b22eba2..75f2abcf 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TopBoundedUpdatableHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TopBoundedUpdatableHeap.java @@ -36,12 +36,7 @@ import java.util.Comparator; */ public class TopBoundedUpdatableHeap<E> extends UpdatableHeap<E> { /** - * Serial version - */ - private static final long serialVersionUID = 1L; - - /** - * Maximum size + * Maximum size. */ protected int maxsize; @@ -67,22 +62,19 @@ public class TopBoundedUpdatableHeap<E> extends UpdatableHeap<E> { } @Override - public boolean offerAt(int pos, E e) { + public void offerAt(int pos, E e) { // don't add if we hit maxsize and are worse - if(pos == NO_VALUE && super.size() >= maxsize) { - ensureValid(); - if(compare(e, queue[0]) < 0) { - // while we did not change, this still was "successful". - return true; - } - // pos = index.get(e); // Should not be needed. + if (pos != NO_VALUE || super.size() < maxsize) { + super.offerAt(pos, e); + return; } - boolean result = super.offerAt(pos, e); - // purge unneeded entry(s) - while(super.size() > maxsize) { - handleOverflow(super.poll()); + ensureValid(); + if (compare(e, queue[0]) < 0) { + // while we did not change, this still was "successful". + return; } - return result; + E prev = super.replaceTopElement(e); + handleOverflow(prev); } /** @@ -93,12 +85,11 @@ public class TopBoundedUpdatableHeap<E> extends UpdatableHeap<E> { * @return True when an update is needed */ protected int compare(Object e, Object object) { - if(comparator == null) { + if (comparator == null) { @SuppressWarnings("unchecked") Comparable<Object> c = (Comparable<Object>) e; return c.compareTo(queue[0]); - } - else { + } else { return comparator.compare(e, queue[0]); } } @@ -114,9 +105,11 @@ public class TopBoundedUpdatableHeap<E> extends UpdatableHeap<E> { } /** + * Get the maximum size. + * * @return the maximum size */ public int getMaxSize() { return maxsize; } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/UpdatableHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/UpdatableHeap.java index 05858981..1ab5f4df 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/UpdatableHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/UpdatableHeap.java @@ -37,7 +37,7 @@ import java.util.Comparator; */ public class UpdatableHeap<O> extends Heap<O> { /** - * Constant for "not in heap" + * Constant for "not in heap". */ protected static final int NO_VALUE = Integer.MIN_VALUE; @@ -47,11 +47,6 @@ public class UpdatableHeap<O> extends Heap<O> { protected static final int IN_TIES = -1; /** - * Serial version - */ - private static final long serialVersionUID = 1L; - - /** * Holds the indices in the heap of each element. */ protected final TObjectIntMap<Object> index = new TObjectIntHashMap<Object>(100, 0.5f, NO_VALUE); @@ -73,7 +68,7 @@ public class UpdatableHeap<O> extends Heap<O> { } /** - * Constructor with comparator + * Constructor with comparator. * * @param comparator Comparator */ @@ -82,7 +77,7 @@ public class UpdatableHeap<O> extends Heap<O> { } /** - * Constructor with predefined size and comparator + * Constructor with predefined size and comparator. * * @param size Size * @param comparator Comparator @@ -98,12 +93,18 @@ public class UpdatableHeap<O> extends Heap<O> { } @Override - public boolean offer(O e) { + public void add(O e) { final int pos = index.get(e); - return offerAt(pos, e); + offerAt(pos, e); } - protected boolean offerAt(final int pos, O e) { + /** + * Offer element at the given position. + * + * @param pos Position + * @param e Element + */ + protected void offerAt(final int pos, O e) { if(pos == NO_VALUE) { // resize when needed if(size + 1 > queue.length) { @@ -114,9 +115,8 @@ public class UpdatableHeap<O> extends Heap<O> { index.put(e, size); size += 1; // We do NOT YET update the heap. This is done lazily. - // We have changed - return true according to {@link Collection#put} - modCount++; - return true; + heapModified(); + return; } else { assert (pos >= 0) : "Unexpected negative position."; @@ -126,14 +126,12 @@ public class UpdatableHeap<O> extends Heap<O> { @SuppressWarnings("unchecked") Comparable<Object> c = (Comparable<Object>) e; if(c.compareTo(queue[pos]) >= 0) { - // Ignore, but return true according to {@link Collection#put} - return true; + return; } } else { if(comparator.compare(e, queue[pos]) >= 0) { - // Ignore, but return true according to {@link Collection#put} - return true; + return; } } if(pos >= validSize) { @@ -144,9 +142,8 @@ public class UpdatableHeap<O> extends Heap<O> { // ensureValid(); heapifyUp(pos, e); } - modCount++; - // We have changed - return true according to {@link Collection#put} - return true; + heapModified(); + return; } } @@ -155,7 +152,8 @@ public class UpdatableHeap<O> extends Heap<O> { if(pos < 0 || pos >= size) { return null; } - final O ret = castQueueElement(pos); + @SuppressWarnings("unchecked") + final O ret = (O) queue[pos]; // Replacement object: final Object reinsert = queue[size - 1]; queue[size - 1] = null; @@ -188,7 +186,7 @@ public class UpdatableHeap<O> extends Heap<O> { queue[pos] = reinsert; index.put(reinsert, pos); } - modCount++; + heapModified(); // Keep index up to date index.remove(ret); return ret; @@ -216,6 +214,13 @@ public class UpdatableHeap<O> extends Heap<O> { index.remove(node); return node; } + + @Override + public O replaceTopElement(O e) { + O node = super.replaceTopElement(e); + index.remove(node); + return node; + } /** * Execute a "Heapify Upwards" aka "SiftUp". Used in insertions. diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/HierarchyHashmapList.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/HierarchyHashmapList.java index 76bee0f9..bd6d67bf 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/HierarchyHashmapList.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/HierarchyHashmapList.java @@ -71,7 +71,7 @@ public class HierarchyHashmapList<O> implements ModifiableHierarchy<O> { if(!pchi.contains(child)) { pchi.add(child); } else { - LoggingUtil.warning("Result added twice: "+parent+" -> "+child); + LoggingUtil.warning("Result added twice: "+parent+" -> "+child, new Throwable()); } } // Add child to parent @@ -84,7 +84,7 @@ public class HierarchyHashmapList<O> implements ModifiableHierarchy<O> { if(!cpar.contains(parent)) { cpar.add(parent); } else { - LoggingUtil.warning("Result added twice: "+parent+" <- "+child); + LoggingUtil.warning("Result added twice: "+parent+" <- "+child, new Throwable()); } } } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractObjDynamicHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractObjDynamicHistogram.java new file mode 100644 index 00000000..9d0dba0d --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractObjDynamicHistogram.java @@ -0,0 +1,271 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.scales.LinearScale; +import de.lmu.ifi.dbs.elki.utilities.BitsUtil; + +/** + * A dynamic histogram can dynamically adapt the number of bins to the data fed + * into the histogram. + * + * @author Erich Schubert + * + * @param <T> Data type + */ +public abstract class AbstractObjDynamicHistogram<T> extends AbstractObjStaticHistogram<T> { + /** + * Cache for positions to be inserted. + */ + private double[] cacheposs; + + /** + * Cache for data to be inserted. + */ + private Object[] cachevals; + + /** + * Cache fill size + */ + private int cachefill; + + /** + * Destination (minimum) size of the structure. At most 2*destsize bins are + * allowed. + */ + private int destsize; + + /** + * Constructor. + * + * @param bins Design number of bins - may become twice as large! + */ + public AbstractObjDynamicHistogram(int bins) { + super(-1, 0.0, 1.0); + this.destsize = bins; + cacheposs = new double[this.destsize << 1]; + cachevals = new Object[this.destsize << 1]; + cachefill = 0; + } + + /** + * Materialize the histogram from the cache. + */ + @SuppressWarnings("unchecked") + void materialize() { + // already materialized? + if (cachefill <= 0) { + return; + } + // Compute minimum and maximum + double min = Double.MAX_VALUE, max = Double.MIN_VALUE; + for (int i = 0; i < cachefill; i++) { + min = Math.min(min, cacheposs[i]); + max = Math.max(max, cacheposs[i]); + } + // use the LinearScale magic to round to "likely suiteable" step sizes. + // TODO: extract into a reusable function? + LinearScale scale = new LinearScale(min, max); + min = scale.getMin(); + max = scale.getMax(); + this.base = min; + this.max = max; + this.binsize = (max - min) / this.destsize; + // initialize array + this.data = new Object[this.destsize << 1]; + for (int i = 0; i < this.destsize; i++) { + this.data[i] = makeObject(); + } + size = destsize; + // re-insert data we have + final int end = cachefill; + cachefill = -1; // So reinsert works! + for (int i = 0; i < end; i++) { + putData(cacheposs[i], (T) cachevals[i]); + } + // delete cache, signal that we're initialized + cacheposs = null; + cachevals = null; + } + + @Override + public T get(double coord) { + materialize(); + testResample(coord); + T ret = super.get(coord); + return ret; + } + + /** + * Put fresh data into the histogram (or into the cache) + * + * @param coord Coordinate + * @param value Value + */ + @Override + public void putData(double coord, T value) { + // Store in cache + if (cachefill >= 0) { + if (cachefill < cacheposs.length) { + + cacheposs[cachefill] = coord; + cachevals[cachefill] = cloneForCache(value); + cachefill++; + return; + } else { + materialize(); + // But continue below! + } + } + // Check if we need to resample to accomodate this bin. + testResample(coord); + // super class will handle histogram resizing / shifting + T exist = get(coord); + data[getBinNr(coord)] = aggregate(exist, value); + } + + /** + * Test (and perform) downsampling when neede. + * + * @param coord coordinate to accomodate. + */ + private void testResample(double coord) { + final int bin = getBinNr(coord); + final int sizereq, off; + if (bin < 0) { + sizereq = size - bin; + off = -bin; + } else if (bin >= data.length) { + sizereq = bin + 1; + off = 0; + } else { + // Within the designated size - nothing to do. + return; + } + if (sizereq < data.length) { + // Accomodate by shifting. Let super do the job in {@link #get} + return; + } + // Resampling, eventually by multiple levels. + final int levels = BitsUtil.magnitude(sizereq / this.destsize) - 1; + assert (levels > 0) : "No resampling required?!?"; + final int step = 1 << levels; + + final int fixpoint = off / (step - 1); + { + // Start positions for in-place bottom-up downsampling. + int oup = fixpoint; + int inp = (oup << levels) - off; + assert (-step < inp && inp <= oup && oup < inp + step) : (inp + " -> " + oup + " s=" + step + " o=" + off + " l=" + levels); + for (; inp < size; inp += step, oup++) { + assert (oup < inp + step && oup < data.length); + data[oup] = downsample(data, Math.max(0, inp), Math.min(size, inp + step), step); + } + // Clean upwards + for (; oup < data.length; oup++) { + data[oup] = null; + } + } + if (off >= step) { + // Start positions for in-place downsampling top-down: + int oup = fixpoint - 1; + int inp = (oup << levels) - off; + assert (oup > inp) : (inp + " -> " + oup + " s=" + step + " o=" + off + " l=" + levels); + for (; inp > -step; inp -= step, oup--) { + assert (oup >= inp && oup >= 0); + data[oup] = downsample(data, Math.max(0, inp), Math.min(size, inp + step), step); + } + for (; oup >= 0; oup--) { + data[oup] = makeObject(); + } + } + // recalculate histogram base. + base = base - (offset + off) * binsize; + offset = 0; + size = (size + 1) >> levels; + binsize = binsize * (1 << levels); + max = base + binsize * size; + } + + @Override + public Iter iter() { + materialize(); + return super.iter(); + } + + @Override + public int getNumBins() { + materialize(); + return super.getNumBins(); + } + + @Override + public double getBinsize() { + materialize(); + return super.getBinsize(); + } + + @Override + public double getCoverMinimum() { + materialize(); + return super.getCoverMinimum(); + } + + @Override + public double getCoverMaximum() { + materialize(); + return super.getCoverMaximum(); + } + + /** + * Perform downsampling on a number of bins. + * + * @param data Data array (needs cast!) + * @param start Interval start + * @param end Interval end (exclusive) + * @param size Intended size - extra bins are assumed to be empty, should be a + * power of two + * @return New bin value + */ + protected abstract T downsample(Object[] data, int start, int end, int size); + + /** + * Rule to combine two bins or entries into one. + * + * Note: first and second MAY be modified and returned, they will not be used + * afterwards. + * + * @param first First bin value + * @param second Second bin value + * @return combined bin value + */ + protected abstract T aggregate(T first, T second); + + /** + * Clone a data passed to the algorithm for computing the initial size. + * + * @param data Data to be cloned + * @return cloned data + */ + protected abstract T cloneForCache(T data); +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractObjStaticHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractObjStaticHistogram.java new file mode 100644 index 00000000..c1882302 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractObjStaticHistogram.java @@ -0,0 +1,129 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +/** + * Histogram class storing double values. + * + * The histogram will start with "bin" bins, but it can grow dynamicall to the + * left and right. + * + * @author Erich Schubert + * + * @param <T> Data type + */ +public abstract class AbstractObjStaticHistogram<T> extends AbstractStaticHistogram implements ObjHistogram<T> { + /** + * Constructor. + * + * @param bins Number of bins + * @param min Cover minimum + * @param max Cover maximum + */ + public AbstractObjStaticHistogram(int bins, double min, double max) { + super(bins, min, max); + if (bins >= 0) { + // -1 will be used by FlexiHistogram to delay initialization. + data = new Object[bins]; + } + } + + /** + * Data store + */ + Object[] data; + + /** + * Access the value of a bin with new data. + * + * @param coord Coordinate + * @return bin contents + */ + @SuppressWarnings("unchecked") + public T get(double coord) { + int bin = getBinNr(coord); + if (bin < 0) { + if (size - bin > data.length) { + // Reallocate. TODO: use an arraylist-like grow strategy! + Object[] tmpdata = new Object[growSize(data.length, size - bin)]; + System.arraycopy(data, 0, tmpdata, -bin, size); + data = tmpdata; + } else { + // Shift in place + System.arraycopy(data, 0, data, -bin, size); + } + for (int i = 0; i < -bin; i++) { + data[i] = makeObject(); + } + // Note that bin is negative, -bin is the shift offset! + offset -= bin; + size -= bin; + // TODO: modCounter++; and have iterators fast-fail + // Unset max value when resizing + max = Double.MAX_VALUE; + return (T) data[0]; + } else if (bin >= size) { + if (bin >= data.length) { + Object[] tmpdata = new Object[growSize(data.length, bin + 1)]; + System.arraycopy(data, 0, tmpdata, 0, size); + data = tmpdata; + } + for (int i = size; i <= bin; i++) { + data[i] = makeObject(); + } + size = bin + 1; + // TODO: modCounter++; and have iterators fast-fail + // Unset max value when resizing + max = Double.MAX_VALUE; + return (T) data[bin]; + } else { + return (T) data[bin]; + } + } + + /** + * Class to make a new object for the data store. + * + * @return New instance. + */ + protected abstract T makeObject(); + + @Override + public Iter iter() { + return new Iter(); + } + + /** + * Iterator class. + * + * @author Erich Schubert + */ + public class Iter extends AbstractStaticHistogram.Iter implements ObjHistogram.Iter<T> { + @SuppressWarnings("unchecked") + @Override + public T getValue() { + return (T) data[bin]; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractStaticHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractStaticHistogram.java new file mode 100644 index 00000000..799ac009 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractStaticHistogram.java @@ -0,0 +1,220 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + + +/** + * Abstract base class for histograms. + * + * Note that this is abstracted from the actual data storage, so it can be + * adapted for multiple use cases. + * + * @author Erich Schubert + */ +public abstract class AbstractStaticHistogram implements Histogram { + /** + * Array shift to account for negative indices. + */ + protected int offset = 0; + + /** + * Size of array storage. + */ + protected int size; + + /** + * Array 'base', i.e. the point of 0.0. Usually the minimum. + */ + protected double base; + + /** + * To avoid introducing an extra bucket for the maximum value. + */ + protected double max; + + /** + * Width of a bin. + */ + protected double binsize; + + /** + * Histogram constructor + * + * @param bins Number of bins to use. + * @param min Minimum Value + * @param max Maximum Value + */ + public AbstractStaticHistogram(int bins, double min, double max) { + this.base = min; + this.max = max; + this.binsize = (max - min) / bins; + this.size = bins; + } + + /** + * Compute the bin number. Has a special case for rounding max down to the + * last bin. + * + * @param coord Coordinate + * @return bin number + */ + protected int getBinNr(double coord) { + if (Double.isInfinite(coord) || Double.isNaN(coord)) { + throw new UnsupportedOperationException("Encountered non-finite value in Histogram: " + coord); + } + if (coord == max) { + // System.err.println("Triggered special case: "+ (Math.floor((coord - + // base) / binsize) + offset) + " vs. " + (size - 1)); + return size - 1; + } + return (int) Math.floor((coord - base) / binsize) + offset; + } + + /** + * Compute the size to grow to. + * + * @param current Current size + * @param requiredSize Required size + * @return Size to allocate + */ + protected static int growSize(int current, int requiredSize) { + // Double until 64, then increase by 50% each time. + int newCapacity = ((current < 64) ? ((current + 1) << 1) : ((current >> 1) * 3)); + // overflow? + if (newCapacity < 0) { + throw new OutOfMemoryError(); + } + if (requiredSize > newCapacity) { + newCapacity = requiredSize; + } + return requiredSize; + } + + /** + * Get the number of bins actually in use. + * + * @return number of bins + */ + @Override + public int getNumBins() { + return size; + } + + /** + * Get the size (width) of a bin. + * + * @return bin size + */ + @Override + public double getBinsize() { + return binsize; + } + + /** + * Get minimum (covered by bins, not data!) + * + * @return minimum + */ + @Override + public double getCoverMinimum() { + return base - offset * binsize; + } + + /** + * Get maximum (covered by bins, not data!) + * + * @return maximum + */ + @Override + public double getCoverMaximum() { + return base + (size - offset) * binsize; + } + + /** + * Get an iterator over all histogram bins. + * + * @return Iterator + */ + @Override + public abstract Iter iter(); + + /** + * Iterator class to iterate over all bins. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public abstract class Iter implements Histogram.Iter { + /** + * Current bin number + */ + int bin = 0; + + @Override + public double getCenter() { + return base + (bin + 0.5 - offset) * binsize; + } + + @Override + public double getLeft() { + return base + (bin - offset) * binsize; + } + + @Override + public double getRight() { + return base + (bin + 1 - offset) * binsize; + } + + @Override + public boolean valid() { + return bin >= 0 && bin < size; + } + + @Override + public void advance() { + bin++; + } + + @Override + public int getOffset() { + return bin; + } + + @Override + public void advance(int count) { + bin += count; + } + + @Override + public void retract() { + bin--; + } + + @Override + public void seek(int off) { + bin = off; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleArrayStaticHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleArrayStaticHistogram.java new file mode 100644 index 00000000..aeba3c4b --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleArrayStaticHistogram.java @@ -0,0 +1,85 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; + +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2012 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * Histogram containing an array of doubles (i.e. {@code double[]}). This is + * actually one of the simpler specializations, as arrays are objects not + * primitive in Java. + * + * @author Erich Schubert + */ +public final class DoubleArrayStaticHistogram extends AbstractObjStaticHistogram<double[]> { + /** + * Desired number of columns in each bin. + */ + private final int cols; + + /** + * Constructor. + * + * @param bins Number of bins + * @param min Minimum value for the coordinates + * @param max Maximum value for the coordinates + * @param cols Number of columns in each bin. + */ + public DoubleArrayStaticHistogram(int bins, double min, double max, int cols) { + super(bins, min, max); + this.cols = cols; + for (int i = 0; i < bins; i++) { + data[i] = new double[cols]; + } + } + + /** + * Increment histogram by a double[]. + * + * @param coord Coordinate + * @param data Data to increment by. + */ + public void increment(double coord, double[] data) { + double[] existing = get(coord); + for (int i = 0; i < existing.length; i++) { + existing[i] += data[i]; + } + } + + /** + * {@inheritDoc} + * + * Data is combined by incrementing. + * + * @deprecated use the explicit {@link #increment} instead. + */ + @Deprecated + @Override + public void putData(double coord, double[] data) { + increment(coord, data); + } + + @Override + protected double[] makeObject() { + return new double[cols]; + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleDynamicHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleDynamicHistogram.java new file mode 100644 index 00000000..77a1f9e4 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleDynamicHistogram.java @@ -0,0 +1,248 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.scales.LinearScale; +import de.lmu.ifi.dbs.elki.utilities.BitsUtil; + +/** + * A flexible histogram storing double, that can dynamically adapt the number of + * bins to the data fed into the histogram. + * + * @author Erich Schubert + */ +public class DoubleDynamicHistogram extends DoubleStaticHistogram { + /** + * Cache for data to be inserted. + */ + private double[] cachec; + + /** + * Cache for data to be inserted. + */ + private double[] cachev; + + /** + * Cache fill size + */ + private int cachefill; + + /** + * Destination (minimum) size of the structure. + * At most destsize * 2 bins are allowed. + */ + private int destsize; + + /** + * Constructor. + * + * @param bins Design number of bins - may become twice as large! + */ + public DoubleDynamicHistogram(int bins) { + super(-1, 0.0, 1.0); + this.destsize = bins; + cachec = new double[this.destsize << CACHE_SHIFT]; + cachev = new double[this.destsize << CACHE_SHIFT]; + cachefill = 0; + } + + /** + * Materialize the histogram from the cache. + */ + void materialize() { + // already materialized? + if (cachefill < 0) { + return; + } + // Compute minimum and maximum + double min = Double.MAX_VALUE, max = Double.MIN_VALUE; + for (int i = 0; i < cachefill; i++) { + min = Math.min(min, cachec[i]); + max = Math.max(max, cachec[i]); + } + // use the LinearScale magic to round to "likely suiteable" step sizes. + // TODO: extract into a reusable function? + LinearScale scale = new LinearScale(min, max); + min = scale.getMin(); + max = scale.getMax(); + this.base = min; + this.max = max; + this.binsize = (max - min) / this.destsize; + // initialize array + this.data = new double[this.destsize << 1]; + size = destsize; + // re-insert data we have + final int end = cachefill; + cachefill = -1; // So reinsert works! + for (int i = 0; i < end; i++) { + increment(cachec[i], cachev[i]); + } + // delete cache, signal that we're initialized + cachec = null; + cachev = null; + } + + @Override + public double get(double coord) { + materialize(); + testResample(coord); + return super.get(coord); + } + + /** + * Put fresh data into the histogram (or into the cache) + * + * @param coord Coordinate + * @param value Value + */ + @Override + public void increment(double coord, double value) { + // Store in cache + if (cachefill >= 0) { + if (cachefill < cachec.length) { + cachec[cachefill] = coord; + cachev[cachefill] = value; + cachefill ++; + return; + } else { + materialize(); + // But continue below! + } + } + // Check if we need to resample to accomodate this bin. + testResample(coord); + // super class will handle histogram resizing / shifting + super.increment(coord, value); + } + + /** + * Test (and perform) downsampling when neede. + * + * @param coord coordinate to accomodate. + */ + private void testResample(double coord) { + final int bin = getBinNr(coord); + final int sizereq, off; + if (bin < 0) { + sizereq = size - bin; + off = -bin; + } else if (bin >= data.length) { + sizereq = bin + 1; + off = 0; + } else { + // Within the designated size - nothing to do. + return; + } + if (sizereq < data.length) { + // Accomodate by shifting. Let super do the job in {@link #get} + return; + } + // Resampling, eventually by multiple levels. + final int levels = BitsUtil.magnitude(sizereq / this.destsize) - 1; + assert (levels > 0) : "No resampling required?!? sizereq=" + sizereq + " destsize=" + destsize + " array=" + data.length; + final int step = 1 << levels; + + final int fixpoint = off / (step - 1); + { + // Start positions for in-place bottom-up downsampling. + int oup = fixpoint; + int inp = (oup << levels) - off; + assert (-step < inp && inp <= oup && oup < inp + step) : (inp + " -> " + oup + " s=" + step + " o=" + off + " l=" + levels); + for (; inp < size; inp += step, oup++) { + assert (oup < inp + step && oup < data.length); + data[oup] = downsample(data, Math.max(0, inp), Math.min(size, inp + step), step); + } + // Clean upwards + for (; oup < data.length; oup++) { + data[oup] = 0; + } + } + if (off > 0) { + // Start positions for in-place downsampling top-down: + int oup = fixpoint - 1; + int inp = (oup << levels) - off; + assert (oup > inp) : (inp + " -> " + oup + " s=" + step + " o=" + off + " l=" + levels); + for (; inp > -step; inp -= step, oup--) { + assert (oup >= inp && oup >= 0); + data[oup] = downsample(data, Math.max(0, inp), Math.min(size, inp + step), step); + } + for (; oup >= 0; oup--) { + data[oup] = 0; + } + } + // recalculate histogram base. + base = base - (offset + off) * binsize; + offset = 0; + size = (size + 1) >> levels; + binsize = binsize * (1 << levels); + max = base + binsize * size; + } + + @Override + public Iter iter() { + materialize(); + return super.iter(); + } + + @Override + public int getNumBins() { + materialize(); + return super.getNumBins(); + } + + @Override + public double getBinsize() { + materialize(); + return super.getBinsize(); + } + + @Override + public double getCoverMinimum() { + materialize(); + return super.getCoverMinimum(); + } + + @Override + public double getCoverMaximum() { + materialize(); + return super.getCoverMaximum(); + } + + /** + * Perform downsampling on a number of bins. + * + * @param data Data array (needs cast!) + * @param start Interval start + * @param end Interval end (exclusive) + * @param size Intended size - extra bins are assumed to be empty, should be a + * power of two + * @return New bin value + */ + protected double downsample(double[] data, int start, int end, int size) { + double sum = 0; + for (int i = start; i < end; i++) { + sum += data[i]; + } + return sum; + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleHistogram.java new file mode 100644 index 00000000..d5cee785 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleHistogram.java @@ -0,0 +1,58 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +/** + * Histogram storing double values. + * + * @author Erich Schubert + * + * @apiviz.has Iter + */ +public interface DoubleHistogram extends Histogram { + /** + * Increment the histogram at a given position. + * + * @param coord Coordinate + * @param value Value to increment by + */ + public void increment(double coord, double value); + + @Override + public Iter iter(); + + /** + * Iterator interface. + * + * @author Erich Schubert + */ + public static interface Iter extends Histogram.Iter { + /** + * Get the value of the bin. + * + * @return Bin value + */ + public double getValue(); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleStaticHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleStaticHistogram.java new file mode 100644 index 00000000..db839d10 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleStaticHistogram.java @@ -0,0 +1,132 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Arrays; + +/** + * Histogram class storing double values. + * + * The histogram will start with "bin" bins, but it can grow dynamically to the + * left and right. + * + * @author Erich Schubert + */ +public class DoubleStaticHistogram extends AbstractStaticHistogram implements DoubleHistogram { + /** + * Constructor. + * + * @param bins Number of bins + * @param min Cover minimum + * @param max Cover maximum + */ + public DoubleStaticHistogram(int bins, double min, double max) { + super(bins, min, max); + if (bins >= 0) { + data = new double[bins]; + } else { + data = null; + } + } + + /** + * Data store + */ + double[] data; + + /** + * Increment the value of a bin. + * + * @param coord Coordinate + * @param val Value + */ + @Override + public void increment(double coord, double val) { + int bin = getBinNr(coord); + if (bin < 0) { + if (size - bin > data.length) { + // Reallocate. TODO: use an arraylist-like grow strategy! + double[] tmpdata = new double[growSize(data.length, size - bin)]; + System.arraycopy(data, 0, tmpdata, -bin, size); + data = tmpdata; + } else { + // Shift in place and clear head + System.arraycopy(data, 0, data, -bin, size); + Arrays.fill(data, 0, -bin, (double) 0); + } + data[0] = val; + // Note that bin is negative, -bin is the shift offset! + assert (data.length >= size - bin); + offset -= bin; + size -= bin; + // TODO: modCounter++; and have iterators fast-fail + } else if (bin >= data.length) { + double[] tmpdata = new double[growSize(data.length, bin + 1)]; + System.arraycopy(data, 0, tmpdata, 0, size); + tmpdata[bin] = val; + data = tmpdata; + size = bin + 1; + // TODO: modCounter++; and have iterators fast-fail + // Unset max value when resizing + max = Double.MAX_VALUE; + } else { + if (bin >= size) { + // TODO: reset bins to 0 first? + size = bin + 1; + } + data[bin] += val; + } + } + + /** + * Get the value at a particular position. + * + * @param coord Coordinate + * @return Value + */ + public double get(double coord) { + int bin = getBinNr(coord); + if (bin < 0 || bin >= size) { + return 0; + } + return data[bin]; + } + + @Override + public Iter iter() { + return new Iter(); + } + + /** + * Iterator class. + * + * @author Erich Schubert + */ + public class Iter extends AbstractStaticHistogram.Iter implements DoubleHistogram.Iter { + @Override + public double getValue() { + return data[bin]; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatDynamicHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatDynamicHistogram.java new file mode 100644 index 00000000..a14ed00a --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatDynamicHistogram.java @@ -0,0 +1,248 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.scales.LinearScale; +import de.lmu.ifi.dbs.elki.utilities.BitsUtil; + +/** + * A flexible histogram storing float, that can dynamically adapt the number of + * bins to the data fed into the histogram. + * + * @author Erich Schubert + */ +public class FloatDynamicHistogram extends FloatStaticHistogram { + /** + * Cache for data to be inserted. + */ + private double[] cachec; + + /** + * Cache for data to be inserted. + */ + private float[] cachev; + + /** + * Cache fill size + */ + private int cachefill; + + /** + * Destination (minimum) size of the structure. + * At most destsize * 2 bins are allowed. + */ + private int destsize; + + /** + * Constructor. + * + * @param bins Design number of bins - may become twice as large! + */ + public FloatDynamicHistogram(int bins) { + super(-1, 0.0, 1.0); + this.destsize = bins; + cachec = new double[this.destsize << CACHE_SHIFT]; + cachev = new float[this.destsize << CACHE_SHIFT]; + cachefill = 0; + } + + /** + * Materialize the histogram from the cache. + */ + void materialize() { + // already materialized? + if (cachefill < 0) { + return; + } + // Compute minimum and maximum + double min = Double.MAX_VALUE, max = Double.MIN_VALUE; + for (int i = 0; i < cachefill; i++) { + min = Math.min(min, cachec[i]); + max = Math.max(max, cachec[i]); + } + // use the LinearScale magic to round to "likely suiteable" step sizes. + // TODO: extract into a reusable function? + LinearScale scale = new LinearScale(min, max); + min = scale.getMin(); + max = scale.getMax(); + this.base = min; + this.max = max; + this.binsize = (max - min) / this.destsize; + // initialize array + this.data = new float[this.destsize << 1]; + size = destsize; + // re-insert data we have + final int end = cachefill; + cachefill = -1; // So reinsert works! + for (int i = 0; i < end; i++) { + increment(cachec[i], cachev[i]); + } + // delete cache, signal that we're initialized + cachec = null; + cachev = null; + } + + @Override + public float get(double coord) { + materialize(); + testResample(coord); + return super.get(coord); + } + + /** + * Put fresh data into the histogram (or into the cache) + * + * @param coord Coordinate + * @param value Value + */ + @Override + public void increment(double coord, float value) { + // Store in cache + if (cachefill >= 0) { + if (cachefill < cachec.length) { + cachec[cachefill] = coord; + cachev[cachefill] = value; + cachefill ++; + return; + } else { + materialize(); + // But continue below! + } + } + // Check if we need to resample to accomodate this bin. + testResample(coord); + // super class will handle histogram resizing / shifting + super.increment(coord, value); + } + + /** + * Test (and perform) downsampling when neede. + * + * @param coord coordinate to accomodate. + */ + private void testResample(double coord) { + final int bin = getBinNr(coord); + final int sizereq, off; + if (bin < 0) { + sizereq = size - bin; + off = -bin; + } else if (bin >= data.length) { + sizereq = bin + 1; + off = 0; + } else { + // Within the designated size - nothing to do. + return; + } + if (sizereq < data.length) { + // Accomodate by shifting. Let super do the job in {@link #get} + return; + } + // Resampling, eventually by multiple levels. + final int levels = BitsUtil.magnitude(sizereq / this.destsize) - 1; + assert (levels > 0) : "No resampling required?!? sizereq=" + sizereq + " destsize=" + destsize + " array=" + data.length; + final int step = 1 << levels; + + final int fixpoint = off / (step - 1); + { + // Start positions for in-place bottom-up downsampling. + int oup = fixpoint; + int inp = (oup << levels) - off; + assert (-step < inp && inp <= oup && oup < inp + step) : (inp + " -> " + oup + " s=" + step + " o=" + off + " l=" + levels); + for (; inp < size; inp += step, oup++) { + assert (oup < inp + step && oup < data.length); + data[oup] = downsample(data, Math.max(0, inp), Math.min(size, inp + step), step); + } + // Clean upwards + for (; oup < data.length; oup++) { + data[oup] = 0; + } + } + if (off > 0) { + // Start positions for in-place downsampling top-down: + int oup = fixpoint - 1; + int inp = (oup << levels) - off; + assert (oup > inp) : (inp + " -> " + oup + " s=" + step + " o=" + off + " l=" + levels); + for (; inp > -step; inp -= step, oup--) { + assert (oup >= inp && oup >= 0); + data[oup] = downsample(data, Math.max(0, inp), Math.min(size, inp + step), step); + } + for (; oup >= 0; oup--) { + data[oup] = 0; + } + } + // recalculate histogram base. + base = base - (offset + off) * binsize; + offset = 0; + size = (size + 1) >> levels; + binsize = binsize * (1 << levels); + max = base + binsize * size; + } + + @Override + public Iter iter() { + materialize(); + return super.iter(); + } + + @Override + public int getNumBins() { + materialize(); + return super.getNumBins(); + } + + @Override + public double getBinsize() { + materialize(); + return super.getBinsize(); + } + + @Override + public double getCoverMinimum() { + materialize(); + return super.getCoverMinimum(); + } + + @Override + public double getCoverMaximum() { + materialize(); + return super.getCoverMaximum(); + } + + /** + * Perform downsampling on a number of bins. + * + * @param data Data array (needs cast!) + * @param start Interval start + * @param end Interval end (exclusive) + * @param size Intended size - extra bins are assumed to be empty, should be a + * power of two + * @return New bin value + */ + protected float downsample(float[] data, int start, int end, int size) { + float sum = 0; + for (int i = start; i < end; i++) { + sum += data[i]; + } + return sum; + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatHistogram.java new file mode 100644 index 00000000..f5a65bfa --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatHistogram.java @@ -0,0 +1,58 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +/** + * Histogram storing float values. + * + * @author Erich Schubert + * + * @apiviz.has Iter + */ +public interface FloatHistogram extends Histogram { + /** + * Increment the histogram at a given position. + * + * @param coord Coordinate + * @param value Value to increment by + */ + public void increment(double coord, float value); + + @Override + public Iter iter(); + + /** + * Iterator interface. + * + * @author Erich Schubert + */ + public static interface Iter extends Histogram.Iter { + /** + * Get the value of the bin. + * + * @return Bin value + */ + public float getValue(); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatStaticHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatStaticHistogram.java new file mode 100644 index 00000000..b3f41994 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatStaticHistogram.java @@ -0,0 +1,132 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Arrays; + +/** + * Histogram class storing float values. + * + * The histogram will start with "bin" bins, but it can grow dynamically to the + * left and right. + * + * @author Erich Schubert + */ +public class FloatStaticHistogram extends AbstractStaticHistogram implements FloatHistogram { + /** + * Constructor. + * + * @param bins Number of bins + * @param min Cover minimum + * @param max Cover maximum + */ + public FloatStaticHistogram(int bins, double min, double max) { + super(bins, min, max); + if (bins >= 0) { + data = new float[bins]; + } else { + data = null; + } + } + + /** + * Data store + */ + float[] data; + + /** + * Increment the value of a bin. + * + * @param coord Coordinate + * @param val Value + */ + @Override + public void increment(double coord, float val) { + int bin = getBinNr(coord); + if (bin < 0) { + if (size - bin > data.length) { + // Reallocate. TODO: use an arraylist-like grow strategy! + float[] tmpdata = new float[growSize(data.length, size - bin)]; + System.arraycopy(data, 0, tmpdata, -bin, size); + data = tmpdata; + } else { + // Shift in place and clear head + System.arraycopy(data, 0, data, -bin, size); + Arrays.fill(data, 0, -bin, (float) 0); + } + data[0] = val; + // Note that bin is negative, -bin is the shift offset! + assert (data.length >= size - bin); + offset -= bin; + size -= bin; + // TODO: modCounter++; and have iterators fast-fail + } else if (bin >= data.length) { + float[] tmpdata = new float[growSize(data.length, bin + 1)]; + System.arraycopy(data, 0, tmpdata, 0, size); + tmpdata[bin] = val; + data = tmpdata; + size = bin + 1; + // TODO: modCounter++; and have iterators fast-fail + // Unset max value when resizing + max = Double.MAX_VALUE; + } else { + if (bin >= size) { + // TODO: reset bins to 0 first? + size = bin + 1; + } + data[bin] += val; + } + } + + /** + * Get the value at a particular position. + * + * @param coord Coordinate + * @return Value + */ + public float get(double coord) { + int bin = getBinNr(coord); + if (bin < 0 || bin >= size) { + return 0; + } + return data[bin]; + } + + @Override + public Iter iter() { + return new Iter(); + } + + /** + * Iterator class. + * + * @author Erich Schubert + */ + public class Iter extends AbstractStaticHistogram.Iter implements FloatHistogram.Iter { + @Override + public float getValue() { + return data[bin]; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/Histogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/Histogram.java new file mode 100644 index 00000000..75be6830 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/Histogram.java @@ -0,0 +1,105 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.utilities.iterator.ArrayIter; + +/** + * Abstract API for histograms. Without specific type information, to allow this + * to be shared for primitive types, too! + * + * @author Erich Schubert + * + * @apiviz.has Iter + */ +public interface Histogram { + /** + * This parameter controls the cache size used for dynamic histograms before + * setting the initial thresholds. + */ + public final static int CACHE_SHIFT = 2; + + /** + * Get the number of bins actually in use. + * + * @return number of bins + */ + public abstract int getNumBins(); + + /** + * Get the size (width) of a bin. + * + * @return bin size + */ + public abstract double getBinsize(); + + /** + * Get minimum (covered by bins, not data!) + * + * @return minimum + */ + public abstract double getCoverMinimum(); + + /** + * Get maximum (covered by bins, not data!) + * + * @return maximum + */ + public abstract double getCoverMaximum(); + + /** + * Get an iterator over all histogram bins. + * + * @return Iterator + */ + public abstract Iter iter(); + + /** + * Array iterator. + * + * @author Erich Schubert + */ + public static interface Iter extends ArrayIter { + /** + * Get the bin center. + * + * @return bin center value + */ + public double getCenter(); + + /** + * Get the bin minimum. + * + * @return bin left value + */ + public double getLeft(); + + /** + * Get the bin maximum. + * + * @return bin right value + */ + public double getRight(); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntArrayStaticHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntArrayStaticHistogram.java new file mode 100644 index 00000000..8d00604b --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntArrayStaticHistogram.java @@ -0,0 +1,85 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; + +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2012 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/** + * Histogram containing an array of ints (i.e. {@code int[]}). This is + * actually one of the simpler specializations, as arrays are objects not + * primitive in Java. + * + * @author Erich Schubert + */ +public final class IntArrayStaticHistogram extends AbstractObjStaticHistogram<int[]> { + /** + * Desired number of columns in each bin. + */ + private final int cols; + + /** + * Constructor. + * + * @param bins Number of bins + * @param min Minimum value for the coordinates + * @param max Maximum value for the coordinates + * @param cols Number of columns in each bin. + */ + public IntArrayStaticHistogram(int bins, double min, double max, int cols) { + super(bins, min, max); + this.cols = cols; + for (int i = 0; i < bins; i++) { + data[i] = new int[cols]; + } + } + + /** + * Increment histogram by a double[]. + * + * @param coord Coordinate + * @param data Data to increment by. + */ + public void increment(double coord, int[] data) { + int[] existing = get(coord); + for (int i = 0; i < existing.length; i++) { + existing[i] += data[i]; + } + } + + /** + * {@inheritDoc} + * + * Data is combined by incrementing. + * + * @deprecated use the explicit {@link #increment} instead. + */ + @Deprecated + @Override + public void putData(double coord, int[] data) { + increment(coord, data); + } + + @Override + protected int[] makeObject() { + return new int[cols]; + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntDynamicHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntDynamicHistogram.java new file mode 100644 index 00000000..0967ebd5 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntDynamicHistogram.java @@ -0,0 +1,248 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.scales.LinearScale; +import de.lmu.ifi.dbs.elki.utilities.BitsUtil; + +/** + * A flexible histogram storing int, that can dynamically adapt the number of + * bins to the data fed into the histogram. + * + * @author Erich Schubert + */ +public class IntDynamicHistogram extends IntStaticHistogram { + /** + * Cache for data to be inserted. + */ + private double[] cachec; + + /** + * Cache for data to be inserted. + */ + private int[] cachev; + + /** + * Cache fill size + */ + private int cachefill; + + /** + * Destination (minimum) size of the structure. + * At most destsize * 2 bins are allowed. + */ + private int destsize; + + /** + * Constructor. + * + * @param bins Design number of bins - may become twice as large! + */ + public IntDynamicHistogram(int bins) { + super(-1, 0.0, 1.0); + this.destsize = bins; + cachec = new double[this.destsize << CACHE_SHIFT]; + cachev = new int[this.destsize << CACHE_SHIFT]; + cachefill = 0; + } + + /** + * Materialize the histogram from the cache. + */ + void materialize() { + // already materialized? + if (cachefill < 0) { + return; + } + // Compute minimum and maximum + double min = Double.MAX_VALUE, max = Double.MIN_VALUE; + for (int i = 0; i < cachefill; i++) { + min = Math.min(min, cachec[i]); + max = Math.max(max, cachec[i]); + } + // use the LinearScale magic to round to "likely suiteable" step sizes. + // TODO: extract into a reusable function? + LinearScale scale = new LinearScale(min, max); + min = scale.getMin(); + max = scale.getMax(); + this.base = min; + this.max = max; + this.binsize = (max - min) / this.destsize; + // initialize array + this.data = new int[this.destsize << 1]; + size = destsize; + // re-insert data we have + final int end = cachefill; + cachefill = -1; // So reinsert works! + for (int i = 0; i < end; i++) { + increment(cachec[i], cachev[i]); + } + // delete cache, signal that we're initialized + cachec = null; + cachev = null; + } + + @Override + public int get(double coord) { + materialize(); + testResample(coord); + return super.get(coord); + } + + /** + * Put fresh data into the histogram (or into the cache) + * + * @param coord Coordinate + * @param value Value + */ + @Override + public void increment(double coord, int value) { + // Store in cache + if (cachefill >= 0) { + if (cachefill < cachec.length) { + cachec[cachefill] = coord; + cachev[cachefill] = value; + cachefill ++; + return; + } else { + materialize(); + // But continue below! + } + } + // Check if we need to resample to accomodate this bin. + testResample(coord); + // super class will handle histogram resizing / shifting + super.increment(coord, value); + } + + /** + * Test (and perform) downsampling when neede. + * + * @param coord coordinate to accomodate. + */ + private void testResample(double coord) { + final int bin = getBinNr(coord); + final int sizereq, off; + if (bin < 0) { + sizereq = size - bin; + off = -bin; + } else if (bin >= data.length) { + sizereq = bin + 1; + off = 0; + } else { + // Within the designated size - nothing to do. + return; + } + if (sizereq < data.length) { + // Accomodate by shifting. Let super do the job in {@link #get} + return; + } + // Resampling, eventually by multiple levels. + final int levels = BitsUtil.magnitude(sizereq / this.destsize) - 1; + assert (levels > 0) : "No resampling required?!? sizereq=" + sizereq + " destsize=" + destsize + " array=" + data.length; + final int step = 1 << levels; + + final int fixpoint = off / (step - 1); + { + // Start positions for in-place bottom-up downsampling. + int oup = fixpoint; + int inp = (oup << levels) - off; + assert (-step < inp && inp <= oup && oup < inp + step) : (inp + " -> " + oup + " s=" + step + " o=" + off + " l=" + levels); + for (; inp < size; inp += step, oup++) { + assert (oup < inp + step && oup < data.length); + data[oup] = downsample(data, Math.max(0, inp), Math.min(size, inp + step), step); + } + // Clean upwards + for (; oup < data.length; oup++) { + data[oup] = 0; + } + } + if (off > 0) { + // Start positions for in-place downsampling top-down: + int oup = fixpoint - 1; + int inp = (oup << levels) - off; + assert (oup > inp) : (inp + " -> " + oup + " s=" + step + " o=" + off + " l=" + levels); + for (; inp > -step; inp -= step, oup--) { + assert (oup >= inp && oup >= 0); + data[oup] = downsample(data, Math.max(0, inp), Math.min(size, inp + step), step); + } + for (; oup >= 0; oup--) { + data[oup] = 0; + } + } + // recalculate histogram base. + base = base - (offset + off) * binsize; + offset = 0; + size = (size + 1) >> levels; + binsize = binsize * (1 << levels); + max = base + binsize * size; + } + + @Override + public Iter iter() { + materialize(); + return super.iter(); + } + + @Override + public int getNumBins() { + materialize(); + return super.getNumBins(); + } + + @Override + public double getBinsize() { + materialize(); + return super.getBinsize(); + } + + @Override + public double getCoverMinimum() { + materialize(); + return super.getCoverMinimum(); + } + + @Override + public double getCoverMaximum() { + materialize(); + return super.getCoverMaximum(); + } + + /** + * Perform downsampling on a number of bins. + * + * @param data Data array (needs cast!) + * @param start Interval start + * @param end Interval end (exclusive) + * @param size Intended size - extra bins are assumed to be empty, should be a + * power of two + * @return New bin value + */ + protected int downsample(int[] data, int start, int end, int size) { + int sum = 0; + for (int i = start; i < end; i++) { + sum += data[i]; + } + return sum; + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntHistogram.java new file mode 100644 index 00000000..9ec4ec56 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntHistogram.java @@ -0,0 +1,58 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +/** + * Histogram storing int values. + * + * @author Erich Schubert + * + * @apiviz.has Iter + */ +public interface IntHistogram extends Histogram { + /** + * Increment the histogram at a given position. + * + * @param coord Coordinate + * @param value Value to increment by + */ + public void increment(double coord, int value); + + @Override + public Iter iter(); + + /** + * Iterator interface. + * + * @author Erich Schubert + */ + public static interface Iter extends Histogram.Iter { + /** + * Get the value of the bin. + * + * @return Bin value + */ + public int getValue(); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntStaticHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntStaticHistogram.java new file mode 100644 index 00000000..d4de36d7 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntStaticHistogram.java @@ -0,0 +1,132 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Arrays; + +/** + * Histogram class storing int values. + * + * The histogram will start with "bin" bins, but it can grow dynamically to the + * left and right. + * + * @author Erich Schubert + */ +public class IntStaticHistogram extends AbstractStaticHistogram implements IntHistogram { + /** + * Constructor. + * + * @param bins Number of bins + * @param min Cover minimum + * @param max Cover maximum + */ + public IntStaticHistogram(int bins, double min, double max) { + super(bins, min, max); + if (bins >= 0) { + data = new int[bins]; + } else { + data = null; + } + } + + /** + * Data store + */ + int[] data; + + /** + * Increment the value of a bin. + * + * @param coord Coordinate + * @param val Value + */ + @Override + public void increment(double coord, int val) { + int bin = getBinNr(coord); + if (bin < 0) { + if (size - bin > data.length) { + // Reallocate. TODO: use an arraylist-like grow strategy! + int[] tmpdata = new int[growSize(data.length, size - bin)]; + System.arraycopy(data, 0, tmpdata, -bin, size); + data = tmpdata; + } else { + // Shift in place and clear head + System.arraycopy(data, 0, data, -bin, size); + Arrays.fill(data, 0, -bin, (int) 0); + } + data[0] = val; + // Note that bin is negative, -bin is the shift offset! + assert (data.length >= size - bin); + offset -= bin; + size -= bin; + // TODO: modCounter++; and have iterators fast-fail + } else if (bin >= data.length) { + int[] tmpdata = new int[growSize(data.length, bin + 1)]; + System.arraycopy(data, 0, tmpdata, 0, size); + tmpdata[bin] = val; + data = tmpdata; + size = bin + 1; + // TODO: modCounter++; and have iterators fast-fail + // Unset max value when resizing + max = Double.MAX_VALUE; + } else { + if (bin >= size) { + // TODO: reset bins to 0 first? + size = bin + 1; + } + data[bin] += val; + } + } + + /** + * Get the value at a particular position. + * + * @param coord Coordinate + * @return Value + */ + public int get(double coord) { + int bin = getBinNr(coord); + if (bin < 0 || bin >= size) { + return 0; + } + return data[bin]; + } + + @Override + public Iter iter() { + return new Iter(); + } + + /** + * Iterator class. + * + * @author Erich Schubert + */ + public class Iter extends AbstractStaticHistogram.Iter implements IntHistogram.Iter { + @Override + public int getValue() { + return data[bin]; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongArrayStaticHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongArrayStaticHistogram.java new file mode 100644 index 00000000..efbf751f --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongArrayStaticHistogram.java @@ -0,0 +1,85 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +/** + * Histogram containing an array of longs (i.e. {@code long[]}). This is + * actually one of the simpler specializations, as arrays are objects not + * primitive in Java. + * + * @author Erich Schubert + */ +public final class LongArrayStaticHistogram extends AbstractObjStaticHistogram<long[]> { + /** + * Desired number of columns in each bin. + */ + private final int cols; + + /** + * Constructor. + * + * @param bins Number of bins + * @param min Minimum value for the coordinates + * @param max Maximum value for the coordinates + * @param cols Number of columns in each bin. + */ + public LongArrayStaticHistogram(int bins, double min, double max, int cols) { + super(bins, min, max); + this.cols = cols; + for (int i = 0; i < bins; i++) { + data[i] = new long[cols]; + } + } + + /** + * Increment histogram by a double[]. + * + * @param coord Coordinate + * @param data Data to increment by. + */ + public void increment(double coord, long[] data) { + long[] existing = get(coord); + for (int i = 0; i < existing.length; i++) { + existing[i] += data[i]; + } + } + + /** + * {@inheritDoc} + * + * Data is combined by incrementing. + * + * @deprecated use the explicit {@link #increment} instead. + */ + @Deprecated + @Override + public void putData(double coord, long[] data) { + increment(coord, data); + } + + @Override + protected long[] makeObject() { + return new long[cols]; + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongDynamicHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongDynamicHistogram.java new file mode 100644 index 00000000..676a5e8f --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongDynamicHistogram.java @@ -0,0 +1,248 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.scales.LinearScale; +import de.lmu.ifi.dbs.elki.utilities.BitsUtil; + +/** + * A flexible histogram storing long, that can dynamically adapt the number of + * bins to the data fed into the histogram. + * + * @author Erich Schubert + */ +public class LongDynamicHistogram extends LongStaticHistogram { + /** + * Cache for data to be inserted. + */ + private double[] cachec; + + /** + * Cache for data to be inserted. + */ + private long[] cachev; + + /** + * Cache fill size + */ + private int cachefill; + + /** + * Destination (minimum) size of the structure. + * At most destsize * 2 bins are allowed. + */ + private int destsize; + + /** + * Constructor. + * + * @param bins Design number of bins - may become twice as large! + */ + public LongDynamicHistogram(int bins) { + super(-1, 0.0, 1.0); + this.destsize = bins; + cachec = new double[this.destsize << CACHE_SHIFT]; + cachev = new long[this.destsize << CACHE_SHIFT]; + cachefill = 0; + } + + /** + * Materialize the histogram from the cache. + */ + void materialize() { + // already materialized? + if (cachefill < 0) { + return; + } + // Compute minimum and maximum + double min = Double.MAX_VALUE, max = Double.MIN_VALUE; + for (int i = 0; i < cachefill; i++) { + min = Math.min(min, cachec[i]); + max = Math.max(max, cachec[i]); + } + // use the LinearScale magic to round to "likely suiteable" step sizes. + // TODO: extract into a reusable function? + LinearScale scale = new LinearScale(min, max); + min = scale.getMin(); + max = scale.getMax(); + this.base = min; + this.max = max; + this.binsize = (max - min) / this.destsize; + // initialize array + this.data = new long[this.destsize << 1]; + size = destsize; + // re-insert data we have + final int end = cachefill; + cachefill = -1; // So reinsert works! + for (int i = 0; i < end; i++) { + increment(cachec[i], cachev[i]); + } + // delete cache, signal that we're initialized + cachec = null; + cachev = null; + } + + @Override + public long get(double coord) { + materialize(); + testResample(coord); + return super.get(coord); + } + + /** + * Put fresh data into the histogram (or into the cache) + * + * @param coord Coordinate + * @param value Value + */ + @Override + public void increment(double coord, long value) { + // Store in cache + if (cachefill >= 0) { + if (cachefill < cachec.length) { + cachec[cachefill] = coord; + cachev[cachefill] = value; + cachefill ++; + return; + } else { + materialize(); + // But continue below! + } + } + // Check if we need to resample to accomodate this bin. + testResample(coord); + // super class will handle histogram resizing / shifting + super.increment(coord, value); + } + + /** + * Test (and perform) downsampling when neede. + * + * @param coord coordinate to accomodate. + */ + private void testResample(double coord) { + final int bin = getBinNr(coord); + final int sizereq, off; + if (bin < 0) { + sizereq = size - bin; + off = -bin; + } else if (bin >= data.length) { + sizereq = bin + 1; + off = 0; + } else { + // Within the designated size - nothing to do. + return; + } + if (sizereq < data.length) { + // Accomodate by shifting. Let super do the job in {@link #get} + return; + } + // Resampling, eventually by multiple levels. + final int levels = BitsUtil.magnitude(sizereq / this.destsize) - 1; + assert (levels > 0) : "No resampling required?!? sizereq=" + sizereq + " destsize=" + destsize + " array=" + data.length; + final int step = 1 << levels; + + final int fixpoint = off / (step - 1); + { + // Start positions for in-place bottom-up downsampling. + int oup = fixpoint; + int inp = (oup << levels) - off; + assert (-step < inp && inp <= oup && oup < inp + step) : (inp + " -> " + oup + " s=" + step + " o=" + off + " l=" + levels); + for (; inp < size; inp += step, oup++) { + assert (oup < inp + step && oup < data.length); + data[oup] = downsample(data, Math.max(0, inp), Math.min(size, inp + step), step); + } + // Clean upwards + for (; oup < data.length; oup++) { + data[oup] = 0; + } + } + if (off > 0) { + // Start positions for in-place downsampling top-down: + int oup = fixpoint - 1; + int inp = (oup << levels) - off; + assert (oup > inp) : (inp + " -> " + oup + " s=" + step + " o=" + off + " l=" + levels); + for (; inp > -step; inp -= step, oup--) { + assert (oup >= inp && oup >= 0); + data[oup] = downsample(data, Math.max(0, inp), Math.min(size, inp + step), step); + } + for (; oup >= 0; oup--) { + data[oup] = 0; + } + } + // recalculate histogram base. + base = base - (offset + off) * binsize; + offset = 0; + size = (size + 1) >> levels; + binsize = binsize * (1 << levels); + max = base + binsize * size; + } + + @Override + public Iter iter() { + materialize(); + return super.iter(); + } + + @Override + public int getNumBins() { + materialize(); + return super.getNumBins(); + } + + @Override + public double getBinsize() { + materialize(); + return super.getBinsize(); + } + + @Override + public double getCoverMinimum() { + materialize(); + return super.getCoverMinimum(); + } + + @Override + public double getCoverMaximum() { + materialize(); + return super.getCoverMaximum(); + } + + /** + * Perform downsampling on a number of bins. + * + * @param data Data array (needs cast!) + * @param start Interval start + * @param end Interval end (exclusive) + * @param size Intended size - extra bins are assumed to be empty, should be a + * power of two + * @return New bin value + */ + protected long downsample(long[] data, int start, int end, int size) { + long sum = 0; + for (int i = start; i < end; i++) { + sum += data[i]; + } + return sum; + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongHistogram.java new file mode 100644 index 00000000..9be15e65 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongHistogram.java @@ -0,0 +1,58 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +/** + * Histogram storing long values. + * + * @author Erich Schubert + * + * @apiviz.has Iter + */ +public interface LongHistogram extends Histogram { + /** + * Increment the histogram at a given position. + * + * @param coord Coordinate + * @param value Value to increment by + */ + public void increment(double coord, long value); + + @Override + public Iter iter(); + + /** + * Iterator interface. + * + * @author Erich Schubert + */ + public static interface Iter extends Histogram.Iter { + /** + * Get the value of the bin. + * + * @return Bin value + */ + public long getValue(); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongStaticHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongStaticHistogram.java new file mode 100644 index 00000000..63e15599 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongStaticHistogram.java @@ -0,0 +1,132 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Arrays; + +/** + * Histogram class storing long values. + * + * The histogram will start with "bin" bins, but it can grow dynamically to the + * left and right. + * + * @author Erich Schubert + */ +public class LongStaticHistogram extends AbstractStaticHistogram implements LongHistogram { + /** + * Constructor. + * + * @param bins Number of bins + * @param min Cover minimum + * @param max Cover maximum + */ + public LongStaticHistogram(int bins, double min, double max) { + super(bins, min, max); + if (bins >= 0) { + data = new long[bins]; + } else { + data = null; + } + } + + /** + * Data store + */ + long[] data; + + /** + * Increment the value of a bin. + * + * @param coord Coordinate + * @param val Value + */ + @Override + public void increment(double coord, long val) { + int bin = getBinNr(coord); + if (bin < 0) { + if (size - bin > data.length) { + // Reallocate. TODO: use an arraylist-like grow strategy! + long[] tmpdata = new long[growSize(data.length, size - bin)]; + System.arraycopy(data, 0, tmpdata, -bin, size); + data = tmpdata; + } else { + // Shift in place and clear head + System.arraycopy(data, 0, data, -bin, size); + Arrays.fill(data, 0, -bin, (long) 0); + } + data[0] = val; + // Note that bin is negative, -bin is the shift offset! + assert (data.length >= size - bin); + offset -= bin; + size -= bin; + // TODO: modCounter++; and have iterators fast-fail + } else if (bin >= data.length) { + long[] tmpdata = new long[growSize(data.length, bin + 1)]; + System.arraycopy(data, 0, tmpdata, 0, size); + tmpdata[bin] = val; + data = tmpdata; + size = bin + 1; + // TODO: modCounter++; and have iterators fast-fail + // Unset max value when resizing + max = Double.MAX_VALUE; + } else { + if (bin >= size) { + // TODO: reset bins to 0 first? + size = bin + 1; + } + data[bin] += val; + } + } + + /** + * Get the value at a particular position. + * + * @param coord Coordinate + * @return Value + */ + public long get(double coord) { + int bin = getBinNr(coord); + if (bin < 0 || bin >= size) { + return 0; + } + return data[bin]; + } + + @Override + public Iter iter() { + return new Iter(); + } + + /** + * Iterator class. + * + * @author Erich Schubert + */ + public class Iter extends AbstractStaticHistogram.Iter implements LongHistogram.Iter { + @Override + public long getValue() { + return data[bin]; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/MeanVarianceStaticHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/MeanVarianceStaticHistogram.java new file mode 100644 index 00000000..2a464382 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/MeanVarianceStaticHistogram.java @@ -0,0 +1,80 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.math.MeanVariance; + +/** + * Histogram class storing MeanVaraince object. + * + * The histogram will start with "bin" bins, but it can grow dynamically to the + * left and right. + * + * @author Erich Schubert + * + * @apiviz.composedOf MeanVariance + */ +public class MeanVarianceStaticHistogram extends AbstractObjStaticHistogram<MeanVariance> { + /** + * Constructor. + * + * @param bins Number of bins + * @param min Cover minimum + * @param max Cover maximum + */ + public MeanVarianceStaticHistogram(int bins, double min, double max) { + super(bins, min, max); + } + + /** + * Data store + */ + MeanVariance[] data; + + /** + * Update the value of a bin with new data. + * + * @param coord Coordinate + * @param val Value + */ + public void put(double coord, double val) { + get(coord).put(val); + } + + /** + * {@inheritDoc} + * + * Data is combined by using {@link MeanVariance#put(Mean)}. + */ + @Override + public void putData(double coord, MeanVariance data) { + get(coord).put(data); + } + + + @Override + protected MeanVariance makeObject() { + return new MeanVariance(); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ObjHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ObjHistogram.java new file mode 100644 index 00000000..ac4d4e4b --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ObjHistogram.java @@ -0,0 +1,69 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +/** + * Basic interface for object based histograms (static and flexible). + * + * @author Erich Schubert + * + * @apiviz.has Iter + * + * @param <T> data type + */ +public interface ObjHistogram<T> extends Histogram { + /** + * Get a histogram iterator. + * + * @return Iterator + */ + @Override + public Iter<T> iter(); + + /** + * Aggregate new data into the histogram. + * + * Note that the actual definition of "aggregate" depends on the application. + * While a static histogram will likely perform this immediately, a flexible + * histogram will cache a number of observations. + * + * @param coord Coordinate + * @param data Data + */ + public void putData(double coord, T data); + + /** + * Histogram iterator. + * + * @author Erich Schubert + */ + public static interface Iter<T> extends Histogram.Iter { + /** + * Get the value of the bin. + * + * @return Bin value + */ + public T getValue(); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortDynamicHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortDynamicHistogram.java new file mode 100644 index 00000000..ff94928b --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortDynamicHistogram.java @@ -0,0 +1,248 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.scales.LinearScale; +import de.lmu.ifi.dbs.elki.utilities.BitsUtil; + +/** + * A flexible histogram storing short, that can dynamically adapt the number of + * bins to the data fed into the histogram. + * + * @author Erich Schubert + */ +public class ShortDynamicHistogram extends ShortStaticHistogram { + /** + * Cache for data to be inserted. + */ + private double[] cachec; + + /** + * Cache for data to be inserted. + */ + private short[] cachev; + + /** + * Cache fill size + */ + private int cachefill; + + /** + * Destination (minimum) size of the structure. + * At most destsize * 2 bins are allowed. + */ + private int destsize; + + /** + * Constructor. + * + * @param bins Design number of bins - may become twice as large! + */ + public ShortDynamicHistogram(int bins) { + super(-1, 0.0, 1.0); + this.destsize = bins; + cachec = new double[this.destsize << CACHE_SHIFT]; + cachev = new short[this.destsize << CACHE_SHIFT]; + cachefill = 0; + } + + /** + * Materialize the histogram from the cache. + */ + void materialize() { + // already materialized? + if (cachefill < 0) { + return; + } + // Compute minimum and maximum + double min = Double.MAX_VALUE, max = Double.MIN_VALUE; + for (int i = 0; i < cachefill; i++) { + min = Math.min(min, cachec[i]); + max = Math.max(max, cachec[i]); + } + // use the LinearScale magic to round to "likely suiteable" step sizes. + // TODO: extract into a reusable function? + LinearScale scale = new LinearScale(min, max); + min = scale.getMin(); + max = scale.getMax(); + this.base = min; + this.max = max; + this.binsize = (max - min) / this.destsize; + // initialize array + this.data = new short[this.destsize << 1]; + size = destsize; + // re-insert data we have + final int end = cachefill; + cachefill = -1; // So reinsert works! + for (int i = 0; i < end; i++) { + increment(cachec[i], cachev[i]); + } + // delete cache, signal that we're initialized + cachec = null; + cachev = null; + } + + @Override + public short get(double coord) { + materialize(); + testResample(coord); + return super.get(coord); + } + + /** + * Put fresh data into the histogram (or into the cache) + * + * @param coord Coordinate + * @param value Value + */ + @Override + public void increment(double coord, short value) { + // Store in cache + if (cachefill >= 0) { + if (cachefill < cachec.length) { + cachec[cachefill] = coord; + cachev[cachefill] = value; + cachefill ++; + return; + } else { + materialize(); + // But continue below! + } + } + // Check if we need to resample to accomodate this bin. + testResample(coord); + // super class will handle histogram resizing / shifting + super.increment(coord, value); + } + + /** + * Test (and perform) downsampling when neede. + * + * @param coord coordinate to accomodate. + */ + private void testResample(double coord) { + final int bin = getBinNr(coord); + final int sizereq, off; + if (bin < 0) { + sizereq = size - bin; + off = -bin; + } else if (bin >= data.length) { + sizereq = bin + 1; + off = 0; + } else { + // Within the designated size - nothing to do. + return; + } + if (sizereq < data.length) { + // Accomodate by shifting. Let super do the job in {@link #get} + return; + } + // Resampling, eventually by multiple levels. + final int levels = BitsUtil.magnitude(sizereq / this.destsize) - 1; + assert (levels > 0) : "No resampling required?!? sizereq=" + sizereq + " destsize=" + destsize + " array=" + data.length; + final int step = 1 << levels; + + final int fixpoint = off / (step - 1); + { + // Start positions for in-place bottom-up downsampling. + int oup = fixpoint; + int inp = (oup << levels) - off; + assert (-step < inp && inp <= oup && oup < inp + step) : (inp + " -> " + oup + " s=" + step + " o=" + off + " l=" + levels); + for (; inp < size; inp += step, oup++) { + assert (oup < inp + step && oup < data.length); + data[oup] = downsample(data, Math.max(0, inp), Math.min(size, inp + step), step); + } + // Clean upwards + for (; oup < data.length; oup++) { + data[oup] = 0; + } + } + if (off > 0) { + // Start positions for in-place downsampling top-down: + int oup = fixpoint - 1; + int inp = (oup << levels) - off; + assert (oup > inp) : (inp + " -> " + oup + " s=" + step + " o=" + off + " l=" + levels); + for (; inp > -step; inp -= step, oup--) { + assert (oup >= inp && oup >= 0); + data[oup] = downsample(data, Math.max(0, inp), Math.min(size, inp + step), step); + } + for (; oup >= 0; oup--) { + data[oup] = 0; + } + } + // recalculate histogram base. + base = base - (offset + off) * binsize; + offset = 0; + size = (size + 1) >> levels; + binsize = binsize * (1 << levels); + max = base + binsize * size; + } + + @Override + public Iter iter() { + materialize(); + return super.iter(); + } + + @Override + public int getNumBins() { + materialize(); + return super.getNumBins(); + } + + @Override + public double getBinsize() { + materialize(); + return super.getBinsize(); + } + + @Override + public double getCoverMinimum() { + materialize(); + return super.getCoverMinimum(); + } + + @Override + public double getCoverMaximum() { + materialize(); + return super.getCoverMaximum(); + } + + /** + * Perform downsampling on a number of bins. + * + * @param data Data array (needs cast!) + * @param start Interval start + * @param end Interval end (exclusive) + * @param size Intended size - extra bins are assumed to be empty, should be a + * power of two + * @return New bin value + */ + protected short downsample(short[] data, int start, int end, int size) { + short sum = 0; + for (int i = start; i < end; i++) { + sum += data[i]; + } + return sum; + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortHistogram.java new file mode 100644 index 00000000..699df896 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortHistogram.java @@ -0,0 +1,58 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +/** + * Histogram storing short values. + * + * @author Erich Schubert + * + * @apiviz.has Iter + */ +public interface ShortHistogram extends Histogram { + /** + * Increment the histogram at a given position. + * + * @param coord Coordinate + * @param value Value to increment by + */ + public void increment(double coord, short value); + + @Override + public Iter iter(); + + /** + * Iterator interface. + * + * @author Erich Schubert + */ + public static interface Iter extends Histogram.Iter { + /** + * Get the value of the bin. + * + * @return Bin value + */ + public short getValue(); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortStaticHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortStaticHistogram.java new file mode 100644 index 00000000..b2809e1e --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortStaticHistogram.java @@ -0,0 +1,132 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Arrays; + +/** + * Histogram class storing short values. + * + * The histogram will start with "bin" bins, but it can grow dynamically to the + * left and right. + * + * @author Erich Schubert + */ +public class ShortStaticHistogram extends AbstractStaticHistogram implements ShortHistogram { + /** + * Constructor. + * + * @param bins Number of bins + * @param min Cover minimum + * @param max Cover maximum + */ + public ShortStaticHistogram(int bins, double min, double max) { + super(bins, min, max); + if (bins >= 0) { + data = new short[bins]; + } else { + data = null; + } + } + + /** + * Data store + */ + short[] data; + + /** + * Increment the value of a bin. + * + * @param coord Coordinate + * @param val Value + */ + @Override + public void increment(double coord, short val) { + int bin = getBinNr(coord); + if (bin < 0) { + if (size - bin > data.length) { + // Reallocate. TODO: use an arraylist-like grow strategy! + short[] tmpdata = new short[growSize(data.length, size - bin)]; + System.arraycopy(data, 0, tmpdata, -bin, size); + data = tmpdata; + } else { + // Shift in place and clear head + System.arraycopy(data, 0, data, -bin, size); + Arrays.fill(data, 0, -bin, (short) 0); + } + data[0] = val; + // Note that bin is negative, -bin is the shift offset! + assert (data.length >= size - bin); + offset -= bin; + size -= bin; + // TODO: modCounter++; and have iterators fast-fail + } else if (bin >= data.length) { + short[] tmpdata = new short[growSize(data.length, bin + 1)]; + System.arraycopy(data, 0, tmpdata, 0, size); + tmpdata[bin] = val; + data = tmpdata; + size = bin + 1; + // TODO: modCounter++; and have iterators fast-fail + // Unset max value when resizing + max = Double.MAX_VALUE; + } else { + if (bin >= size) { + // TODO: reset bins to 0 first? + size = bin + 1; + } + data[bin] += val; + } + } + + /** + * Get the value at a particular position. + * + * @param coord Coordinate + * @return Value + */ + public short get(double coord) { + int bin = getBinNr(coord); + if (bin < 0 || bin >= size) { + return 0; + } + return data[bin]; + } + + @Override + public Iter iter() { + return new Iter(); + } + + /** + * Iterator class. + * + * @author Erich Schubert + */ + public class Iter extends AbstractStaticHistogram.Iter implements ShortHistogram.Iter { + @Override + public short getValue() { + return data[bin]; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/package-info.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/package-info.java new file mode 100644 index 00000000..65dd6446 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/package-info.java @@ -0,0 +1,34 @@ +/** + * <p>Classes for computing histograms.</p> + * + * This package contains two families of histograms. Static histograms have a fixed initial number of bins. + * When encountering values outside of their range, they will grow similar to an ArrayList by adding additional bins. + * + * "Dynamic" histograms are more useful when you do not know the value range of the data: + * they start by collecting a number of sample data, then use this to estimate the initial histogram range. + * If they grow to twice their initial size they will downsample, to keep the histogram size in the bounds n to 2n-1, + * which effectively limits the memory use and histogram complexity. + */ +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram;
\ No newline at end of file |