summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/utilities/datastructures
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/utilities/datastructures')
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/QuickSelect.java526
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ArrayLikeUtil.java33
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/DoubleArrayAdapter.java3
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ExtendedArray.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/FeatureVectorAdapter.java8
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/FloatArrayAdapter.java3
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ListArrayAdapter.java22
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/NumberVectorAdapter.java37
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/SubsetArrayAdapter.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/TDoubleListAdapter.java3
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/IntegerArrayQuickSort.java225
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/IntegerComparator.java40
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/package-info.java26
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/AbstractHeap.java103
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMaxHeap.java63
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMinHeap.java63
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleHeap.java264
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMaxHeap.java62
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMinHeap.java62
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjMaxHeap.java328
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjMinHeap.java328
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoublePriorityObject.java7
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/Heap.java264
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerHeap.java264
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMaxHeap.java62
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMinHeap.java62
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerPriorityObject.java7
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/KNNHeap.java153
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/KNNList.java181
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ObjectHeap.java267
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TiedTopBoundedHeap.java47
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TiedTopBoundedUpdatableHeap.java19
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TopBoundedHeap.java58
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TopBoundedUpdatableHeap.java39
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/UpdatableHeap.java51
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/HierarchyHashmapList.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractObjDynamicHistogram.java271
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractObjStaticHistogram.java129
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractStaticHistogram.java220
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleArrayStaticHistogram.java85
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleDynamicHistogram.java248
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleHistogram.java58
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleStaticHistogram.java132
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatDynamicHistogram.java248
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatHistogram.java58
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatStaticHistogram.java132
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/Histogram.java105
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntArrayStaticHistogram.java85
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntDynamicHistogram.java248
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntHistogram.java58
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntStaticHistogram.java132
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongArrayStaticHistogram.java85
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongDynamicHistogram.java248
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongHistogram.java58
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongStaticHistogram.java132
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/MeanVarianceStaticHistogram.java80
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ObjHistogram.java69
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortDynamicHistogram.java248
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortHistogram.java58
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortStaticHistogram.java132
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/package-info.java34
61 files changed, 6163 insertions, 884 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/QuickSelect.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/QuickSelect.java
index a191dde2..bfa7950d 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/QuickSelect.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/QuickSelect.java
@@ -5,6 +5,8 @@ import java.util.List;
import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
/*
This file is part of ELKI:
@@ -90,14 +92,14 @@ public class QuickSelect {
final int length = end - begin;
assert (length > 0);
// Integer division is "floor" since we are non-negative.
- final int left = begin + (length - 1) / 2;
+ final int left = begin + ((length - 1) >> 1);
quickSelect(data, begin, end, left);
if(length % 2 == 1) {
return data[left];
}
else {
quickSelect(data, begin, end, left + 1);
- return data[left] + (data[left + 1] - data[left]) / 2;
+ return data[left] + .5 * (data[left + 1] - data[left]);
}
}
@@ -155,59 +157,64 @@ public class QuickSelect {
* @param rank rank position we are interested in (starting at 0)
*/
public static void quickSelect(double[] data, int start, int end, int rank) {
- // Optimization for small arrays
- // This also ensures a minimum size below
- if(start + SMALL > end) {
- insertionSort(data, start, end);
- return;
- }
+ while(true) {
+ // Optimization for small arrays
+ // This also ensures a minimum size below
+ if(start + SMALL > end) {
+ insertionSort(data, start, end);
+ return;
+ }
- // Pick pivot from three candidates: start, middle, end
- // Since we compare them, we can also just "bubble sort" them.
- final int middle = (start + end) / 2;
- if(data[start] > data[middle]) {
- swap(data, start, middle);
- }
- if(data[start] > data[end - 1]) {
- swap(data, start, end - 1);
- }
- if(data[middle] > data[end - 1]) {
- swap(data, middle, end - 1);
- }
- // TODO: use more candidates for larger arrays?
+ // Pick pivot from three candidates: start, middle, end
+ // Since we compare them, we can also just "bubble sort" them.
+ final int middle = (start + end) >> 1;
+ if(data[start] > data[middle]) {
+ swap(data, start, middle);
+ }
+ if(data[start] > data[end - 1]) {
+ swap(data, start, end - 1);
+ }
+ if(data[middle] > data[end - 1]) {
+ swap(data, middle, end - 1);
+ }
+ // TODO: use more candidates for larger arrays?
+
+ final double pivot = data[middle];
+ // Move middle element out of the way, just before end
+ // (Since we already know that "end" is bigger)
+ swap(data, middle, end - 2);
+
+ // Begin partitioning
+ int i = start + 1, j = end - 3;
+ // This is classic quicksort stuff
+ while(true) {
+ while(data[i] <= pivot && i <= j) {
+ i++;
+ }
+ while(data[j] >= pivot && j >= i) {
+ j--;
+ }
+ if(i >= j) {
+ break;
+ }
+ swap(data, i, j);
+ }
- final double pivot = data[middle];
- // Move middle element out of the way, just before end
- // (Since we already know that "end" is bigger)
- swap(data, middle, end - 2);
+ // Move pivot (former middle element) back into the appropriate place
+ swap(data, i, end - 2);
- // Begin partitioning
- int i = start + 1, j = end - 3;
- // This is classic quicksort stuff
- while(true) {
- while(data[i] <= pivot && i <= j) {
- i++;
+ // In contrast to quicksort, we only need to recurse into the half we are
+ // interested in. Instead of recursion we now use iteration.
+ if(rank < i) {
+ end = i;
}
- while(data[j] >= pivot && j >= i) {
- j--;
+ else if(rank > i) {
+ start = i + 1;
}
- if(i >= j) {
+ else {
break;
}
- swap(data, i, j);
- }
-
- // Move pivot (former middle element) back into the appropriate place
- swap(data, i, end - 2);
-
- // In contrast to quicksort, we only need to recurse into the half we are
- // interested in.
- if(rank < i) {
- quickSelect(data, start, i, rank);
- }
- else if(rank > i) {
- quickSelect(data, i + 1, end, rank);
- }
+ } // Loop until rank==i
}
/**
@@ -283,7 +290,7 @@ public class QuickSelect {
final int length = end - begin;
assert (length > 0);
// Integer division is "floor" since we are non-negative.
- final int left = begin + (length - 1) / 2;
+ final int left = begin + ((length - 1) >> 1);
quickSelect(data, begin, end, left);
return data[left];
}
@@ -338,59 +345,64 @@ public class QuickSelect {
* @param rank rank position we are interested in (starting at 0)
*/
public static <T extends Comparable<? super T>> void quickSelect(T[] data, int start, int end, int rank) {
- // Optimization for small arrays
- // This also ensures a minimum size below
- if(start + SMALL > end) {
- insertionSort(data, start, end);
- return;
- }
+ while(true) {
+ // Optimization for small arrays
+ // This also ensures a minimum size below
+ if(start + SMALL > end) {
+ insertionSort(data, start, end);
+ return;
+ }
- // Pick pivot from three candidates: start, middle, end
- // Since we compare them, we can also just "bubble sort" them.
- final int middle = (start + end) / 2;
- if(data[start].compareTo(data[middle]) > 0) {
- swap(data, start, middle);
- }
- if(data[start].compareTo(data[end - 1]) > 0) {
- swap(data, start, end - 1);
- }
- if(data[middle].compareTo(data[end - 1]) > 0) {
- swap(data, middle, end - 1);
- }
- // TODO: use more candidates for larger arrays?
+ // Pick pivot from three candidates: start, middle, end
+ // Since we compare them, we can also just "bubble sort" them.
+ final int middle = (start + end) >> 1;
+ if(data[start].compareTo(data[middle]) > 0) {
+ swap(data, start, middle);
+ }
+ if(data[start].compareTo(data[end - 1]) > 0) {
+ swap(data, start, end - 1);
+ }
+ if(data[middle].compareTo(data[end - 1]) > 0) {
+ swap(data, middle, end - 1);
+ }
+ // TODO: use more candidates for larger arrays?
+
+ final T pivot = data[middle];
+ // Move middle element out of the way, just before end
+ // (Since we already know that "end" is bigger)
+ swap(data, middle, end - 2);
+
+ // Begin partitioning
+ int i = start + 1, j = end - 3;
+ // This is classic quicksort stuff
+ while(true) {
+ while(data[i].compareTo(pivot) <= 0 && i <= j) {
+ i++;
+ }
+ while(data[j].compareTo(pivot) >= 0 && j >= i) {
+ j--;
+ }
+ if(i >= j) {
+ break;
+ }
+ swap(data, i, j);
+ }
- final T pivot = data[middle];
- // Move middle element out of the way, just before end
- // (Since we already know that "end" is bigger)
- swap(data, middle, end - 2);
+ // Move pivot (former middle element) back into the appropriate place
+ swap(data, i, end - 2);
- // Begin partitioning
- int i = start + 1, j = end - 3;
- // This is classic quicksort stuff
- while(true) {
- while(data[i].compareTo(pivot) <= 0 && i <= j) {
- i++;
+ // In contrast to quicksort, we only need to recurse into the half we are
+ // interested in. Instead of recursion we now use iteration.
+ if(rank < i) {
+ end = i;
}
- while(data[j].compareTo(pivot) >= 0 && j >= i) {
- j--;
+ else if(rank > i) {
+ start = i + 1;
}
- if(i >= j) {
+ else {
break;
}
- swap(data, i, j);
- }
-
- // Move pivot (former middle element) back into the appropriate place
- swap(data, i, end - 2);
-
- // In contrast to quicksort, we only need to recurse into the half we are
- // interested in.
- if(rank < i) {
- quickSelect(data, start, i, rank);
- }
- else if(rank > i) {
- quickSelect(data, i + 1, end, rank);
- }
+ } // Loop until rank==i
}
/**
@@ -469,7 +481,7 @@ public class QuickSelect {
final int length = end - begin;
assert (length > 0);
// Integer division is "floor" since we are non-negative.
- final int left = begin + (length - 1) / 2;
+ final int left = begin + ((length - 1) >> 1);
quickSelect(data, begin, end, left);
return data.get(left);
}
@@ -524,59 +536,64 @@ public class QuickSelect {
* @param rank rank position we are interested in (starting at 0)
*/
public static <T extends Comparable<? super T>> void quickSelect(List<? extends T> data, int start, int end, int rank) {
- // Optimization for small arrays
- // This also ensures a minimum size below
- if(start + SMALL > end) {
- insertionSort(data, start, end);
- return;
- }
+ while(true) {
+ // Optimization for small arrays
+ // This also ensures a minimum size below
+ if(start + SMALL > end) {
+ insertionSort(data, start, end);
+ return;
+ }
- // Pick pivot from three candidates: start, middle, end
- // Since we compare them, we can also just "bubble sort" them.
- final int middle = (start + end) / 2;
- if(data.get(start).compareTo(data.get(middle)) > 0) {
- swap(data, start, middle);
- }
- if(data.get(start).compareTo(data.get(end - 1)) > 0) {
- swap(data, start, end - 1);
- }
- if(data.get(middle).compareTo(data.get(end - 1)) > 0) {
- swap(data, middle, end - 1);
- }
- // TODO: use more candidates for larger arrays?
+ // Pick pivot from three candidates: start, middle, end
+ // Since we compare them, we can also just "bubble sort" them.
+ final int middle = (start + end) >> 1;
+ if(data.get(start).compareTo(data.get(middle)) > 0) {
+ swap(data, start, middle);
+ }
+ if(data.get(start).compareTo(data.get(end - 1)) > 0) {
+ swap(data, start, end - 1);
+ }
+ if(data.get(middle).compareTo(data.get(end - 1)) > 0) {
+ swap(data, middle, end - 1);
+ }
+ // TODO: use more candidates for larger arrays?
+
+ final T pivot = data.get(middle);
+ // Move middle element out of the way, just before end
+ // (Since we already know that "end" is bigger)
+ swap(data, middle, end - 2);
+
+ // Begin partitioning
+ int i = start + 1, j = end - 3;
+ // This is classic quicksort stuff
+ while(true) {
+ while(data.get(i).compareTo(pivot) <= 0 && i <= j) {
+ i++;
+ }
+ while(data.get(j).compareTo(pivot) >= 0 && j >= i) {
+ j--;
+ }
+ if(i >= j) {
+ break;
+ }
+ swap(data, i, j);
+ }
- final T pivot = data.get(middle);
- // Move middle element out of the way, just before end
- // (Since we already know that "end" is bigger)
- swap(data, middle, end - 2);
+ // Move pivot (former middle element) back into the appropriate place
+ swap(data, i, end - 2);
- // Begin partitioning
- int i = start + 1, j = end - 3;
- // This is classic quicksort stuff
- while(true) {
- while(data.get(i).compareTo(pivot) <= 0 && i <= j) {
- i++;
+ // In contrast to quicksort, we only need to recurse into the half we are
+ // interested in. Instead of recursion we now use iteration.
+ if(rank < i) {
+ end = i;
}
- while(data.get(j).compareTo(pivot) >= 0 && j >= i) {
- j--;
+ else if(rank > i) {
+ start = i + 1;
}
- if(i >= j) {
+ else {
break;
}
- swap(data, i, j);
- }
-
- // Move pivot (former middle element) back into the appropriate place
- swap(data, i, end - 2);
-
- // In contrast to quicksort, we only need to recurse into the half we are
- // interested in.
- if(rank < i) {
- quickSelect(data, start, i, rank);
- }
- else if(rank > i) {
- quickSelect(data, i + 1, end, rank);
- }
+ } // Loop until rank==i
}
/**
@@ -656,7 +673,7 @@ public class QuickSelect {
final int length = end - begin;
assert (length > 0);
// Integer division is "floor" since we are non-negative.
- final int left = begin + (length - 1) / 2;
+ final int left = begin + ((length - 1) >> 1);
quickSelect(data, comparator, begin, end, left);
return data.get(left);
}
@@ -714,59 +731,64 @@ public class QuickSelect {
* @param rank rank position we are interested in (starting at 0)
*/
public static <T> void quickSelect(List<? extends T> data, Comparator<? super T> comparator, int start, int end, int rank) {
- // Optimization for small arrays
- // This also ensures a minimum size below
- if(start + SMALL > end) {
- insertionSort(data, comparator, start, end);
- return;
- }
+ while(true) {
+ // Optimization for small arrays
+ // This also ensures a minimum size below
+ if(start + SMALL > end) {
+ insertionSort(data, comparator, start, end);
+ return;
+ }
- // Pick pivot from three candidates: start, middle, end
- // Since we compare them, we can also just "bubble sort" them.
- final int middle = (start + end) / 2;
- if(comparator.compare(data.get(start), data.get(middle)) > 0) {
- swap(data, start, middle);
- }
- if(comparator.compare(data.get(start), data.get(end - 1)) > 0) {
- swap(data, start, end - 1);
- }
- if(comparator.compare(data.get(middle), data.get(end - 1)) > 0) {
- swap(data, middle, end - 1);
- }
- // TODO: use more candidates for larger arrays?
+ // Pick pivot from three candidates: start, middle, end
+ // Since we compare them, we can also just "bubble sort" them.
+ final int middle = (start + end) >> 1;
+ if(comparator.compare(data.get(start), data.get(middle)) > 0) {
+ swap(data, start, middle);
+ }
+ if(comparator.compare(data.get(start), data.get(end - 1)) > 0) {
+ swap(data, start, end - 1);
+ }
+ if(comparator.compare(data.get(middle), data.get(end - 1)) > 0) {
+ swap(data, middle, end - 1);
+ }
+ // TODO: use more candidates for larger arrays?
+
+ final T pivot = data.get(middle);
+ // Move middle element out of the way, just before end
+ // (Since we already know that "end" is bigger)
+ swap(data, middle, end - 2);
+
+ // Begin partitioning
+ int i = start + 1, j = end - 3;
+ // This is classic quicksort stuff
+ while(true) {
+ while(comparator.compare(data.get(i), pivot) <= 0 && i <= j) {
+ i++;
+ }
+ while(comparator.compare(data.get(j), pivot) >= 0 && j >= i) {
+ j--;
+ }
+ if(i >= j) {
+ break;
+ }
+ swap(data, i, j);
+ }
- final T pivot = data.get(middle);
- // Move middle element out of the way, just before end
- // (Since we already know that "end" is bigger)
- swap(data, middle, end - 2);
+ // Move pivot (former middle element) back into the appropriate place
+ swap(data, i, end - 2);
- // Begin partitioning
- int i = start + 1, j = end - 3;
- // This is classic quicksort stuff
- while(true) {
- while(comparator.compare(data.get(i), pivot) <= 0 && i <= j) {
- i++;
+ // In contrast to quicksort, we only need to recurse into the half we are
+ // interested in. Instead of recursion we now use iteration.
+ if(rank < i) {
+ end = i;
}
- while(comparator.compare(data.get(j), pivot) >= 0 && j >= i) {
- j--;
+ else if(rank > i) {
+ start = i + 1;
}
- if(i >= j) {
+ else {
break;
}
- swap(data, i, j);
- }
-
- // Move pivot (former middle element) back into the appropriate place
- swap(data, i, end - 2);
-
- // In contrast to quicksort, we only need to recurse into the half we are
- // interested in.
- if(rank < i) {
- quickSelect(data, comparator, start, i, rank);
- }
- else if(rank > i) {
- quickSelect(data, comparator, i + 1, end, rank);
- }
+ } // Loop until rank==i
}
/**
@@ -796,7 +818,7 @@ public class QuickSelect {
* @param rank Rank position that we are interested in (integer!)
* @return Value at the given rank
*/
- public static DBID quickSelect(ArrayModifiableDBIDs data, Comparator<? super DBID> comparator, int rank) {
+ public static DBID quickSelect(ArrayModifiableDBIDs data, Comparator<? super DBIDRef> comparator, int rank) {
quickSelect(data, comparator, 0, data.size(), rank);
return data.get(rank);
}
@@ -810,7 +832,7 @@ public class QuickSelect {
* @param comparator Comparator to use
* @return Median value
*/
- public static DBID median(ArrayModifiableDBIDs data, Comparator<? super DBID> comparator) {
+ public static DBID median(ArrayModifiableDBIDs data, Comparator<? super DBIDRef> comparator) {
return median(data, comparator, 0, data.size());
}
@@ -827,11 +849,11 @@ public class QuickSelect {
* @param end End of valid values (exclusive!)
* @return Median value
*/
- public static DBID median(ArrayModifiableDBIDs data, Comparator<? super DBID> comparator, int begin, int end) {
+ public static DBID median(ArrayModifiableDBIDs data, Comparator<? super DBIDRef> comparator, int begin, int end) {
final int length = end - begin;
assert (length > 0);
// Integer division is "floor" since we are non-negative.
- final int left = begin + (length - 1) / 2;
+ final int left = begin + ((length - 1) >> 1);
quickSelect(data, comparator, begin, end, left);
return data.get(left);
}
@@ -846,7 +868,7 @@ public class QuickSelect {
* @param quant Quantile to compute
* @return Value at quantile
*/
- public static DBID quantile(ArrayModifiableDBIDs data, Comparator<? super DBID> comparator, double quant) {
+ public static DBID quantile(ArrayModifiableDBIDs data, Comparator<? super DBIDRef> comparator, double quant) {
return quantile(data, comparator, 0, data.size(), quant);
}
@@ -864,7 +886,7 @@ public class QuickSelect {
* @param quant Quantile to compute
* @return Value at quantile
*/
- public static DBID quantile(ArrayModifiableDBIDs data, Comparator<? super DBID> comparator, int begin, int end, double quant) {
+ public static DBID quantile(ArrayModifiableDBIDs data, Comparator<? super DBIDRef> comparator, int begin, int end, double quant) {
final int length = end - begin;
assert (length > 0) : "Quantile on empty set?";
// Integer division is "floor" since we are non-negative.
@@ -885,60 +907,70 @@ public class QuickSelect {
* @param end Interval end (inclusive)
* @param rank rank position we are interested in (starting at 0)
*/
- public static void quickSelect(ArrayModifiableDBIDs data, Comparator<? super DBID> comparator, int start, int end, int rank) {
- // Optimization for small arrays
- // This also ensures a minimum size below
- if(start + SMALL > end) {
- insertionSort(data, comparator, start, end);
- return;
- }
+ public static void quickSelect(ArrayModifiableDBIDs data, Comparator<? super DBIDRef> comparator, int start, int end, int rank) {
+ while(true) {
+ // Optimization for small arrays
+ // This also ensures a minimum size below
+ if(start + SMALL > end) {
+ insertionSort(data, comparator, start, end);
+ return;
+ }
- // Pick pivot from three candidates: start, middle, end
- // Since we compare them, we can also just "bubble sort" them.
- final int middle = (start + end) / 2;
- if(comparator.compare(data.get(start), data.get(middle)) > 0) {
- data.swap(start, middle);
- }
- if(comparator.compare(data.get(start), data.get(end - 1)) > 0) {
- data.swap(start, end - 1);
- }
- if(comparator.compare(data.get(middle), data.get(end - 1)) > 0) {
- data.swap(middle, end - 1);
- }
- // TODO: use more candidates for larger arrays?
+ // Pick pivot from three candidates: start, middle, end
+ // Since we compare them, we can also just "bubble sort" them.
+ final int middle = (start + end) >> 1;
+ if(comparator.compare(data.get(start), data.get(middle)) > 0) {
+ data.swap(start, middle);
+ }
+ if(comparator.compare(data.get(start), data.get(end - 1)) > 0) {
+ data.swap(start, end - 1);
+ }
+ if(comparator.compare(data.get(middle), data.get(end - 1)) > 0) {
+ data.swap(middle, end - 1);
+ }
+ // TODO: use more candidates for larger arrays?
+
+ final DBID pivot = data.get(middle);
+ // Move middle element out of the way, just before end
+ // (Since we already know that "end" is bigger)
+ data.swap(middle, end - 2);
+
+ // Begin partitioning
+ int i = start + 1, j = end - 3;
+ DBIDArrayIter refi = data.iter(), refj = data.iter();
+ refi.seek(i);
+ refj.seek(j);
+ // This is classic quicksort stuff
+ while(true) {
+ while(comparator.compare(refi, pivot) <= 0 && i <= j) {
+ i++;
+ refi.advance();
+ }
+ while(comparator.compare(refj, pivot) >= 0 && j >= i) {
+ j--;
+ refj.retract();
+ }
+ if(i >= j) {
+ break;
+ }
+ data.swap(i, j);
+ }
- final DBID pivot = data.get(middle);
- // Move middle element out of the way, just before end
- // (Since we already know that "end" is bigger)
- data.swap(middle, end - 2);
+ // Move pivot (former middle element) back into the appropriate place
+ data.swap(i, end - 2);
- // Begin partitioning
- int i = start + 1, j = end - 3;
- // This is classic quicksort stuff
- while(true) {
- while(comparator.compare(data.get(i), pivot) <= 0 && i <= j) {
- i++;
+ // In contrast to quicksort, we only need to recurse into the half we are
+ // interested in. Instead of recursion we now use iteration.
+ if(rank < i) {
+ end = i;
}
- while(comparator.compare(data.get(j), pivot) >= 0 && j >= i) {
- j--;
+ else if(rank > i) {
+ start = i + 1;
}
- if(i >= j) {
+ else {
break;
}
- data.swap(i, j);
- }
-
- // Move pivot (former middle element) back into the appropriate place
- data.swap(i, end - 2);
-
- // In contrast to quicksort, we only need to recurse into the half we are
- // interested in.
- if(rank < i) {
- quickSelect(data, comparator, start, i, rank);
- }
- else if(rank > i) {
- quickSelect(data, comparator, i + 1, end, rank);
- }
+ } // Loop until rank==i
}
/**
@@ -948,9 +980,15 @@ public class QuickSelect {
* @param start Interval start
* @param end Interval end
*/
- private static void insertionSort(ArrayModifiableDBIDs data, Comparator<? super DBID> comparator, int start, int end) {
+ private static void insertionSort(ArrayModifiableDBIDs data, Comparator<? super DBIDRef> comparator, int start, int end) {
+ DBIDArrayIter iter1 = data.iter(), iter2 = data.iter();
for(int i = start + 1; i < end; i++) {
- for(int j = i; j > start && comparator.compare(data.get(j - 1), data.get(j)) > 0; j--) {
+ iter1.seek(i - 1);
+ iter2.seek(i);
+ for(int j = i; j > start; j--, iter1.retract(), iter2.retract()) {
+ if(comparator.compare(iter1, iter2) > 0) {
+ break;
+ }
data.swap(j, j - 1);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ArrayLikeUtil.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ArrayLikeUtil.java
index f485e214..a08feb1a 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ArrayLikeUtil.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ArrayLikeUtil.java
@@ -40,29 +40,29 @@ import de.lmu.ifi.dbs.elki.data.NumberVector;
*/
public final class ArrayLikeUtil {
/**
- * Static instance for lists
+ * Static instance for lists.
*/
private static final ListArrayAdapter<Object> LISTADAPTER = new ListArrayAdapter<Object>();
/**
- * Static instance for lists of numbers
+ * Static instance for lists of numbers.
*/
private static final NumberListArrayAdapter<Number> NUMBERLISTADAPTER = new NumberListArrayAdapter<Number>();
/**
- * Static instance
+ * Static instance.
*/
- private final static IdentityArrayAdapter<?> IDENTITYADAPTER = new IdentityArrayAdapter<Object>();
+ private static final IdentityArrayAdapter<?> IDENTITYADAPTER = new IdentityArrayAdapter<Object>();
/**
- * Static instance
+ * Static instance.
*/
- private static final FeatureVectorAdapter<?> FEATUREVECTORADAPTER = new FeatureVectorAdapter<Number>();
+ public static final FeatureVectorAdapter<?> FEATUREVECTORADAPTER = new FeatureVectorAdapter<Number>();
/**
* Use a number vector in the array API.
*/
- private static final NumberVectorAdapter<?> NUMBERVECTORADAPTER = new NumberVectorAdapter<Double>();
+ public static final NumberVectorAdapter<?> NUMBERVECTORADAPTER = new NumberVectorAdapter<Double>();
/**
* Use a double array in the array API.
@@ -83,6 +83,13 @@ public final class ArrayLikeUtil {
* Use ArrayDBIDs as array.
*/
public static final ArrayDBIDsAdapter ARRAYDBIDADAPTER = new ArrayDBIDsAdapter();
+
+ /**
+ * Fake constructor. Do not instantiate!
+ */
+ private ArrayLikeUtil() {
+ // Do not instantiate
+ }
/**
* Cast the static instance.
@@ -124,7 +131,7 @@ public final class ArrayLikeUtil {
* @return Instance
*/
@SuppressWarnings("unchecked")
- public static <F> FeatureVectorAdapter<F> featureVectorAdapter(FeatureVector<?, F> prototype) {
+ public static <F> FeatureVectorAdapter<F> featureVectorAdapter(FeatureVector<F> prototype) {
return (FeatureVectorAdapter<F>) FEATUREVECTORADAPTER;
}
@@ -135,7 +142,7 @@ public final class ArrayLikeUtil {
* @return Instance
*/
@SuppressWarnings("unchecked")
- public static <N extends Number> NumberVectorAdapter<N> numberVectorAdapter(NumberVector<?, N> prototype) {
+ public static <N extends Number> NumberVectorAdapter<N> numberVectorAdapter(NumberVector<N> prototype) {
return (NumberVectorAdapter<N>) NUMBERVECTORADAPTER;
}
@@ -185,7 +192,7 @@ public final class ArrayLikeUtil {
}
/**
- * Convert a numeric array-like to a <code>double[]</code>
+ * Convert a numeric array-like to a <code>double[]</code>.
*
* @param array Array-like
* @param adapter Adapter
@@ -215,12 +222,12 @@ public final class ArrayLikeUtil {
* @param obj Object to convert
* @return primitive double array
*/
- public static <N extends Number> double[] toPrimitiveDoubleArray(NumberVector<?, N> obj) {
+ public static <N extends Number> double[] toPrimitiveDoubleArray(NumberVector<N> obj) {
return toPrimitiveDoubleArray(obj, numberVectorAdapter(obj));
}
/**
- * Convert a numeric array-like to a <code>float[]</code>
+ * Convert a numeric array-like to a <code>float[]</code>.
*
* @param array Array-like
* @param adapter Adapter
@@ -250,7 +257,7 @@ public final class ArrayLikeUtil {
* @param obj Object to convert
* @return primitive float array
*/
- public static <N extends Number> float[] toPrimitiveFloatArray(NumberVector<?, N> obj) {
+ public static <N extends Number> float[] toPrimitiveFloatArray(NumberVector<N> obj) {
return toPrimitiveFloatArray(obj, numberVectorAdapter(obj));
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/DoubleArrayAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/DoubleArrayAdapter.java
index 43419103..117f3845 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/DoubleArrayAdapter.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/DoubleArrayAdapter.java
@@ -45,8 +45,9 @@ class DoubleArrayAdapter implements NumberArrayAdapter<Double, double[]> {
}
@Override
+ @Deprecated
public Double get(double[] array, int off) throws IndexOutOfBoundsException {
- return array[off];
+ return Double.valueOf(array[off]);
}
@Override
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ExtendedArray.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ExtendedArray.java
index a195360b..491c4f95 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ExtendedArray.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ExtendedArray.java
@@ -68,13 +68,13 @@ public class ExtendedArray<T> implements ArrayAdapter<T, ExtendedArray<T>> {
@Override
public int size(ExtendedArray<T> array) {
- assert (array == this);
+ assert (this == array);
return size;
}
@Override
public T get(ExtendedArray<T> array, int off) throws IndexOutOfBoundsException {
- assert (array == this);
+ assert (this == array);
if(off == size - 1) {
return extra;
}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/FeatureVectorAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/FeatureVectorAdapter.java
index 19c2ec19..38b662e8 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/FeatureVectorAdapter.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/FeatureVectorAdapter.java
@@ -34,7 +34,7 @@ import de.lmu.ifi.dbs.elki.data.FeatureVector;
*
* @param <F> Feature type
*/
-public class FeatureVectorAdapter<F> implements ArrayAdapter<F, FeatureVector<?, F>> {
+public class FeatureVectorAdapter<F> implements ArrayAdapter<F, FeatureVector<F>> {
/**
* Constructor.
*
@@ -45,12 +45,12 @@ public class FeatureVectorAdapter<F> implements ArrayAdapter<F, FeatureVector<?,
}
@Override
- public int size(FeatureVector<?, F> array) {
+ public int size(FeatureVector<F> array) {
return array.getDimensionality();
}
@Override
- public F get(FeatureVector<?, F> array, int off) throws IndexOutOfBoundsException {
+ public F get(FeatureVector<F> array, int off) throws IndexOutOfBoundsException {
return array.getValue(off);
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/FloatArrayAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/FloatArrayAdapter.java
index 14d42dc5..ae501039 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/FloatArrayAdapter.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/FloatArrayAdapter.java
@@ -46,8 +46,9 @@ class FloatArrayAdapter implements NumberArrayAdapter<Float, float[]> {
}
@Override
+ @Deprecated
public Float get(float[] array, int off) throws IndexOutOfBoundsException {
- return array[off];
+ return Float.valueOf(array[off]);
}
@Override
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ListArrayAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ListArrayAdapter.java
index 792fad0b..cba1e706 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ListArrayAdapter.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ListArrayAdapter.java
@@ -1,4 +1,26 @@
package de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike;
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
import java.util.List;
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/NumberVectorAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/NumberVectorAdapter.java
index c75acd64..fd1e6636 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/NumberVectorAdapter.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/NumberVectorAdapter.java
@@ -1,7 +1,5 @@
package de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike;
-import de.lmu.ifi.dbs.elki.data.NumberVector;
-
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
@@ -25,6 +23,8 @@ import de.lmu.ifi.dbs.elki.data.NumberVector;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+
/**
* Adapter to use a feature vector as an array of features.
*
@@ -34,7 +34,7 @@ import de.lmu.ifi.dbs.elki.data.NumberVector;
*
* @param <N> Number type
*/
-public class NumberVectorAdapter<N extends Number> implements NumberArrayAdapter<N, NumberVector<?, N>> {
+public class NumberVectorAdapter<N extends Number> implements NumberArrayAdapter<N, NumberVector<N>> {
/**
* Constructor.
*
@@ -45,42 +45,43 @@ public class NumberVectorAdapter<N extends Number> implements NumberArrayAdapter
}
@Override
- public int size(NumberVector<?, N> array) {
+ public int size(NumberVector<N> array) {
return array.getDimensionality();
}
@Override
- public N get(NumberVector<?, N> array, int off) throws IndexOutOfBoundsException {
+ @Deprecated
+ public N get(NumberVector<N> array, int off) throws IndexOutOfBoundsException {
return array.getValue(off + 1);
}
@Override
- public double getDouble(NumberVector<?, N> array, int off) throws IndexOutOfBoundsException {
- return array.doubleValue(off + 1);
+ public double getDouble(NumberVector<N> array, int off) throws IndexOutOfBoundsException {
+ return array.doubleValue(off);
}
@Override
- public float getFloat(NumberVector<?, N> array, int off) throws IndexOutOfBoundsException {
- return array.floatValue(off + 1);
+ public float getFloat(NumberVector<N> array, int off) throws IndexOutOfBoundsException {
+ return array.floatValue(off);
}
@Override
- public int getInteger(NumberVector<?, N> array, int off) throws IndexOutOfBoundsException {
- return array.intValue(off + 1);
+ public int getInteger(NumberVector<N> array, int off) throws IndexOutOfBoundsException {
+ return array.intValue(off);
}
@Override
- public short getShort(NumberVector<?, N> array, int off) throws IndexOutOfBoundsException {
- return array.shortValue(off + 1);
+ public short getShort(NumberVector<N> array, int off) throws IndexOutOfBoundsException {
+ return array.shortValue(off);
}
@Override
- public long getLong(NumberVector<?, N> array, int off) throws IndexOutOfBoundsException {
- return array.longValue(off + 1);
+ public long getLong(NumberVector<N> array, int off) throws IndexOutOfBoundsException {
+ return array.longValue(off);
}
@Override
- public byte getByte(NumberVector<?, N> array, int off) throws IndexOutOfBoundsException {
- return array.byteValue(off + 1);
+ public byte getByte(NumberVector<N> array, int off) throws IndexOutOfBoundsException {
+ return array.byteValue(off);
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/SubsetArrayAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/SubsetArrayAdapter.java
index b2958358..c607759f 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/SubsetArrayAdapter.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/SubsetArrayAdapter.java
@@ -23,7 +23,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike;
*/
/**
- * Subset array adapter (allows reordering and projection)
+ * Subset array adapter (allows reordering and projection).
*
* @author Erich Schubert
*
@@ -32,12 +32,12 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike;
*/
public class SubsetArrayAdapter<T, A> implements ArrayAdapter<T, A> {
/**
- * Wrapped adapter
+ * Wrapped adapter.
*/
ArrayAdapter<T, ? super A> wrapped;
/**
- * Offsets to return
+ * Offsets to return.
*/
int[] offs;
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/TDoubleListAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/TDoubleListAdapter.java
index 889c64e8..cee393ac 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/TDoubleListAdapter.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/TDoubleListAdapter.java
@@ -45,8 +45,9 @@ public class TDoubleListAdapter implements NumberArrayAdapter<Double, TDoubleLis
}
@Override
+ @Deprecated
public Double get(TDoubleList array, int off) throws IndexOutOfBoundsException {
- return array.get(off);
+ return Double.valueOf(array.get(off));
}
@Override
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/IntegerArrayQuickSort.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/IntegerArrayQuickSort.java
new file mode 100644
index 00000000..a0c93997
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/IntegerArrayQuickSort.java
@@ -0,0 +1,225 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.arrays;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+
+/**
+ * Class to sort an int array, using a modified quicksort.
+ *
+ * The implementation is closely based on:
+ * <p>
+ * Dual-Pivot Quicksort<br />
+ * Vladimir Yaroslavskiy
+ * </p>
+ *
+ * and differs mostly in that we sort different kinds of arrays,
+ * and allow the use of comparators - useful in particular when
+ * the array references external objects.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.uses IntegerComparator
+ */
+@Reference(authors = "Vladimir Yaroslavskiy", title = "Dual-Pivot Quicksort", booktitle = "http://iaroslavski.narod.ru/quicksort/", url = "http://iaroslavski.narod.ru/quicksort/")
+public class IntegerArrayQuickSort {
+ /**
+ * Threshold for using insertion sort. Value taken from Javas QuickSort,
+ * assuming that it will be similar for our data sets.
+ */
+ private static final int INSERTION_THRESHOLD = 47;
+
+ /**
+ * Sort the full array using the given comparator.
+ *
+ * @param data Data to sort
+ * @param comp Comparator
+ */
+ public static void sort(int[] data, IntegerComparator comp) {
+ sort(data, 0, data.length, comp);
+ }
+
+ /**
+ * Sort the array using the given comparator.
+ *
+ * @param data Data to sort
+ * @param start First index
+ * @param end Last index (exclusive)
+ * @param comp Comparator
+ */
+ public static void sort(int[] data, int start, int end, IntegerComparator comp) {
+ quickSort(data, start, end - 1, comp);
+ }
+
+ /**
+ * Actual recursive sort function.
+ *
+ * @param data Data to sort
+ * @param start First index
+ * @param end Last index (inclusive!)
+ * @param comp Comparator
+ */
+ private static void quickSort(int[] data, final int start, final int end, IntegerComparator comp) {
+ final int len = end - start;
+ if (len < INSERTION_THRESHOLD) {
+ // Classic insertion sort.
+ for (int i = start + 1; i <= end; i++) {
+ for (int j = i; j > start; j--) {
+ if (comp.compare(data[j], data[j - 1]) < 0) {
+ final int tmp = data[j - 1];
+ data[j - 1] = data[j];
+ data[j] = tmp;
+ } else {
+ break;
+ }
+ }
+ }
+ return;
+ }
+
+ // Choose pivots by looking at five candidates.
+ final int seventh = (len >> 3) + (len >> 6) + 1;
+ final int m3 = (start + end) >> 1; // middle
+ final int m2 = m3 - seventh;
+ final int m1 = m2 - seventh;
+ final int m4 = m3 + seventh;
+ final int m5 = m4 + seventh;
+
+ // Explicit (and optimal) sorting network for 5 elements
+ // See Knuth for details.
+ if (comp.compare(data[m1], data[m2]) > 0) {
+ final int tmp = data[m2];
+ data[m2] = data[m1];
+ data[m1] = tmp;
+ }
+ if (comp.compare(data[m1], data[m3]) > 0) {
+ final int tmp = data[m3];
+ data[m3] = data[m1];
+ data[m1] = tmp;
+ }
+ if (comp.compare(data[m2], data[m3]) > 0) {
+ final int tmp = data[m3];
+ data[m3] = data[m2];
+ data[m2] = tmp;
+ }
+ if (comp.compare(data[m4], data[m5]) > 0) {
+ final int tmp = data[m5];
+ data[m5] = data[m4];
+ data[m4] = tmp;
+ }
+ if (comp.compare(data[m1], data[m4]) > 0) {
+ final int tmp = data[m4];
+ data[m4] = data[m1];
+ data[m1] = tmp;
+ }
+ if (comp.compare(data[m3], data[m4]) > 0) {
+ final int tmp = data[m4];
+ data[m4] = data[m3];
+ data[m3] = tmp;
+ }
+ if (comp.compare(data[m2], data[m5]) > 0) {
+ final int tmp = data[m5];
+ data[m5] = data[m2];
+ data[m2] = tmp;
+ }
+ if (comp.compare(data[m2], data[m3]) > 0) {
+ final int tmp = data[m3];
+ data[m3] = data[m2];
+ data[m2] = tmp;
+ }
+ if (comp.compare(data[m4], data[m5]) > 0) {
+ final int tmp = data[m5];
+ data[m5] = data[m4];
+ data[m4] = tmp;
+ }
+
+ // Choose the 2 and 4th as pivots, as we want to get three parts
+ // Copy to variables v1 and v3, replace them with the start and end
+ // Note: do not modify v1 or v3 until we put them back!
+ final int lpivot = data[m2];
+ final int rpivot = data[m4];
+ data[m2] = data[start];
+ data[m4] = data[end];
+
+ // A tie is when the two chosen pivots are the same
+ final boolean tied = comp.compare(lpivot, rpivot) == 0;
+
+ // Insertion points for pivot areas.
+ int left = start + 1;
+ int right = end - 1;
+
+ // Note: we merged the ties and no ties cases.
+ // This likely is marginally slower, but not at a macro level
+ // And you never know with hotspot.
+ for (int k = left; k <= right; k++) {
+ final int tmp = data[k];
+ final int c = comp.compare(tmp, lpivot);
+ if (c == 0) {
+ continue;
+ } else if (c < 0) {
+ // Traditional quicksort
+ data[k] = data[left];
+ data[left] = tmp;
+ left++;
+ } else if (tied || comp.compare(tmp, rpivot) > 0) {
+ // Now look at the right. First skip correct entries there, too
+ while (true) {
+ final int tmp2 = data[right];
+ if (comp.compare(tmp2, rpivot) > 0 && k < right) {
+ right--;
+ } else {
+ break;
+ }
+ }
+ // Now move tmp from k to the right.
+ data[k] = data[right];
+ data[right] = tmp;
+ right--;
+ // Test the element we just inserted: left or center?
+ if (comp.compare(data[k], lpivot) < 0) {
+ final int tmp2 = data[k];
+ data[k] = data[left];
+ data[left] = tmp2;
+ left++;
+ } // else: center. cannot be on right.
+ }
+ }
+ // Put the pivot elements back in.
+ // Remember: we must not modify v1 and v3 above.
+ data[start] = data[left - 1];
+ data[left - 1] = lpivot;
+ data[end] = data[right + 1];
+ data[right + 1] = rpivot;
+ // v1 and v3 are now safe to modify again. Perform recursion:
+ quickSort(data, start, left - 2, comp);
+ // Handle the middle part - if necessary:
+ if (!tied) {
+ // TODO: the original publication had a special tie handling here.
+ // It shouldn't affect correctness, but probably improves situations
+ // with a lot of tied elements.
+ quickSort(data, left, right, comp);
+ }
+ quickSort(data, right + 2, end, comp);
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/IntegerComparator.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/IntegerComparator.java
new file mode 100644
index 00000000..51164425
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/IntegerComparator.java
@@ -0,0 +1,40 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.arrays;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/**
+ * Interface for comparing two integers.
+ *
+ * @author Erich Schubert
+ */
+public interface IntegerComparator {
+ /**
+ * Compare two integers.
+ *
+ * @param x First int
+ * @param y Second int
+ * @return Comparison result
+ */
+ int compare(int x, int y);
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/package-info.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/package-info.java
new file mode 100644
index 00000000..3db78032
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/package-info.java
@@ -0,0 +1,26 @@
+/**
+ * <p>Utilities for arrays: advanced sorting for primitvie arrays.</p>
+ */
+/*
+This file is part of ELKI:
+Environment for Developing KDD-Applications Supported by Index-Structures
+
+Copyright (C) 2012
+Ludwig-Maximilians-Universität München
+Lehr- und Forschungseinheit für Datenbanksysteme
+ELKI Development Team
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+package de.lmu.ifi.dbs.elki.utilities.datastructures.arrays; \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/AbstractHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/AbstractHeap.java
new file mode 100644
index 00000000..b6f098e6
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/AbstractHeap.java
@@ -0,0 +1,103 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/**
+ * Abstract base class for heaps.
+ *
+ * @author Erich Schubert
+ */
+public class AbstractHeap {
+ /**
+ * Default initial capacity
+ */
+ public static final int DEFAULT_INITIAL_CAPACITY = 11;
+
+ /**
+ * Current number of objects
+ */
+ public int size = 0;
+
+ /**
+ * Indicate up to where the heap is valid
+ */
+ public int validSize = 0;
+
+ /**
+ * (Structural) modification counter. Used to invalidate iterators.
+ */
+ public int modCount = 0;
+
+ /**
+ * Constructor.
+ */
+ public AbstractHeap() {
+ super();
+ }
+
+ /**
+ * Query the size
+ *
+ * @return Size
+ */
+ public int size() {
+ return this.size;
+ }
+
+ /**
+ * Delete all elements from the heap.
+ */
+ public void clear() {
+ this.size = 0;
+ this.validSize = -1;
+ heapModified();
+ }
+
+ /**
+ * Test whether we need to resize to have the requested capacity.
+ *
+ * @param requiredSize required capacity
+ * @param capacity Current capacity
+ * @return new capacity
+ */
+ protected final int desiredSize(int requiredSize, int capacity) {
+ // Double until 64, then increase by 50% each time.
+ int newCapacity = ((capacity < 64) ? ((capacity + 1) * 2) : ((capacity / 2) * 3));
+ // overflow?
+ if (newCapacity < 0) {
+ throw new OutOfMemoryError();
+ }
+ if (requiredSize > newCapacity) {
+ newCapacity = requiredSize;
+ }
+ return newCapacity;
+ }
+
+ /**
+ * Called at the end of each heap modification.
+ */
+ protected void heapModified() {
+ modCount++;
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMaxHeap.java
new file mode 100644
index 00000000..ab8ef1bb
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMaxHeap.java
@@ -0,0 +1,63 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/**
+ * Basic in-memory heap structure.
+ *
+ * This heap is built lazily: if you first add many elements, then poll the
+ * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log
+ * n). This is implemented via a simple validTo counter.
+ *
+ * @author Erich Schubert
+ */
+public class ComparableMaxHeap<K extends Comparable<K>> extends ObjectHeap<K> {
+ /**
+ * Constructor with default capacity.
+ */
+ public ComparableMaxHeap() {
+ super(DEFAULT_INITIAL_CAPACITY);
+ }
+
+ /**
+ * Constructor with initial capacity.
+ *
+ * @param size initial capacity
+ */
+ public ComparableMaxHeap(int size) {
+ super(size);
+ }
+
+ /**
+ * Compare two objects
+ *
+ * @param o1 First object
+ * @param o2 Second object
+ */
+ @Override
+ @SuppressWarnings("unchecked")
+ protected boolean comp(Object o1, Object o2) {
+ return ((K) o1).compareTo((K) o2) < 0;
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMinHeap.java
new file mode 100644
index 00000000..06d2cb32
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMinHeap.java
@@ -0,0 +1,63 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/**
+ * Basic in-memory heap structure.
+ *
+ * This heap is built lazily: if you first add many elements, then poll the
+ * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log
+ * n). This is implemented via a simple validTo counter.
+ *
+ * @author Erich Schubert
+ */
+public class ComparableMinHeap<K extends Comparable<K>> extends ObjectHeap<K> {
+ /**
+ * Constructor with default capacity.
+ */
+ public ComparableMinHeap() {
+ super(DEFAULT_INITIAL_CAPACITY);
+ }
+
+ /**
+ * Constructor with initial capacity.
+ *
+ * @param size initial capacity
+ */
+ public ComparableMinHeap(int size) {
+ super(size);
+ }
+
+ /**
+ * Compare two objects
+ *
+ * @param o1 First object
+ * @param o2 Second object
+ */
+ @Override
+ @SuppressWarnings("unchecked")
+ protected boolean comp(Object o1, Object o2) {
+ return ((K) o1).compareTo((K) o2) > 0;
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleHeap.java
new file mode 100644
index 00000000..f9f928bd
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleHeap.java
@@ -0,0 +1,264 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.Arrays;
+
+import de.lmu.ifi.dbs.elki.math.MathUtil;
+
+/**
+ * Basic in-memory heap structure.
+ *
+ * This heap is built lazily: if you first add many elements, then poll the
+ * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log
+ * n). This is implemented via a simple validTo counter.
+ *
+ * @author Erich Schubert
+ */
+public abstract class DoubleHeap extends AbstractHeap {
+ /**
+ * Heap storage: queue
+ */
+ protected transient double[] queue;
+
+ /**
+ * Constructor with initial capacity.
+ *
+ * @param size initial capacity
+ */
+ public DoubleHeap(int size) {
+ super();
+ this.size = 0;
+ this.queue = new double[size];
+ }
+
+ /**
+ * Add a key-value pair to the heap
+ *
+ * @param key Key
+ */
+ public void add(double key) {
+ // resize when needed
+ if (size + 1 > queue.length) {
+ resize(size + 1);
+ }
+ // final int pos = size;
+ this.queue[size] = key;
+ this.size += 1;
+ heapifyUp(size - 1, key);
+ validSize += 1;
+ heapModified();
+ }
+
+ /**
+ * Add a key-value pair to the heap, except if the new element is larger than
+ * the top, and we are at design size (overflow)
+ *
+ * @param key Key
+ * @param max Maximum size of heap
+ */
+ public void add(double key, int max) {
+ if (size < max) {
+ add(key);
+ } else if (comp(key, peek())) {
+ replaceTopElement(key);
+ }
+ }
+
+ /**
+ * Combined operation that removes the top element, and inserts a new element
+ * instead.
+ *
+ * @param e New element to insert
+ * @return Previous top element of the heap
+ */
+ public double replaceTopElement(double e) {
+ ensureValid();
+ double oldroot = queue[0];
+ heapifyDown(0, e);
+ heapModified();
+ return oldroot;
+ }
+
+ /**
+ * Get the current top key
+ *
+ * @return Top key
+ */
+ public double peek() {
+ if (size == 0) {
+ throw new ArrayIndexOutOfBoundsException("Peek() on an empty heap!");
+ }
+ ensureValid();
+ return queue[0];
+ }
+
+ /**
+ * Remove the first element
+ *
+ * @return Top element
+ */
+ public double poll() {
+ return removeAt(0);
+ }
+
+ /**
+ * Repair the heap
+ */
+ protected void ensureValid() {
+ if (validSize != size) {
+ if (size > 1) {
+ // Parent of first invalid
+ int nextmin = validSize > 0 ? ((validSize - 1) >>> 1) : 0;
+ int curmin = MathUtil.nextAllOnesInt(nextmin); // Next line
+ int nextmax = curmin - 1; // End of valid line
+ int pos = (size - 2) >>> 1; // Parent of last element
+ // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin+", "+nextmin);
+ while (pos >= nextmin) {
+ // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin);
+ while (pos >= curmin) {
+ if (!heapifyDown(pos, queue[pos])) {
+ final int parent = (pos - 1) >>> 1;
+ if (parent < curmin) {
+ nextmin = Math.min(nextmin, parent);
+ nextmax = Math.max(nextmax, parent);
+ }
+ }
+ pos--;
+ }
+ curmin = nextmin;
+ pos = Math.min(pos, nextmax);
+ nextmax = -1;
+ }
+ }
+ validSize = size;
+ }
+ }
+
+ /**
+ * Remove the element at the given position.
+ *
+ * @param pos Element position.
+ * @return Removed element
+ */
+ protected double removeAt(int pos) {
+ if (pos < 0 || pos >= size) {
+ return 0.0;
+ }
+ final double top = queue[0];
+ // Replacement object:
+ final double reinkey = queue[size - 1];
+ // Keep heap in sync
+ if (validSize == size) {
+ size -= 1;
+ validSize -= 1;
+ heapifyDown(pos, reinkey);
+ } else {
+ size -= 1;
+ validSize = Math.min(pos >>> 1, validSize);
+ queue[pos] = reinkey;
+ }
+ heapModified();
+ return top;
+ }
+
+ /**
+ * Execute a "Heapify Upwards" aka "SiftUp". Used in insertions.
+ *
+ * @param pos insertion position
+ * @param curkey Current key
+ */
+ protected void heapifyUp(int pos, double curkey) {
+ while (pos > 0) {
+ final int parent = (pos - 1) >>> 1;
+ double parkey = queue[parent];
+
+ if (comp(curkey, parkey)) { // Compare
+ break;
+ }
+ queue[pos] = parkey;
+ pos = parent;
+ }
+ queue[pos] = curkey;
+ }
+
+ /**
+ * Execute a "Heapify Downwards" aka "SiftDown". Used in deletions.
+ *
+ * @param ipos re-insertion position
+ * @param curkey Current key
+ * @return true when the order was changed
+ */
+ protected boolean heapifyDown(final int ipos, double curkey) {
+ int pos = ipos;
+ final int half = size >>> 1;
+ while (pos < half) {
+ // Get left child (must exist!)
+ int cpos = (pos << 1) + 1;
+ double chikey = queue[cpos];
+ // Test right child, if present
+ final int rchild = cpos + 1;
+ if (rchild < size) {
+ double right = queue[rchild];
+ if (comp(chikey, right)) { // Compare
+ cpos = rchild;
+ chikey = right;
+ }
+ }
+
+ if (comp(chikey, curkey)) { // Compare
+ break;
+ }
+ queue[pos] = chikey;
+ pos = cpos;
+ }
+ queue[pos] = curkey;
+ return (pos == ipos);
+ }
+
+ /**
+ * Test whether we need to resize to have the requested capacity.
+ *
+ * @param requiredSize required capacity
+ */
+ protected final void resize(int requiredSize) {
+ queue = Arrays.copyOf(queue, desiredSize(requiredSize, queue.length));
+ }
+
+ /**
+ * Delete all elements from the heap.
+ */
+ @Override
+ public void clear() {
+ super.clear();
+ for (int i = 0; i < size; i++) {
+ queue[i] = 0.0;
+ }
+ }
+
+ /**
+ * Compare two objects
+ */
+ abstract protected boolean comp(double o1, double o2);
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMaxHeap.java
new file mode 100644
index 00000000..1b7d6037
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMaxHeap.java
@@ -0,0 +1,62 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/**
+ * Basic in-memory heap structure.
+ *
+ * This heap is built lazily: if you first add many elements, then poll the
+ * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log
+ * n). This is implemented via a simple validTo counter.
+ *
+ * @author Erich Schubert
+ */
+public class DoubleMaxHeap extends DoubleHeap {
+ /**
+ * Constructor with default capacity.
+ */
+ public DoubleMaxHeap() {
+ super(DEFAULT_INITIAL_CAPACITY);
+ }
+
+ /**
+ * Constructor with initial capacity.
+ *
+ * @param size initial capacity
+ */
+ public DoubleMaxHeap(int size) {
+ super(size);
+ }
+
+ /**
+ * Compare two objects
+ *
+ * @param o1 First object
+ * @param o2 Second object
+ */
+ @Override
+ protected boolean comp(double o1, double o2) {
+ return o1 < o2;
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMinHeap.java
new file mode 100644
index 00000000..2ce05ff9
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMinHeap.java
@@ -0,0 +1,62 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/**
+ * Basic in-memory heap structure.
+ *
+ * This heap is built lazily: if you first add many elements, then poll the
+ * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log
+ * n). This is implemented via a simple validTo counter.
+ *
+ * @author Erich Schubert
+ */
+public class DoubleMinHeap extends DoubleHeap {
+ /**
+ * Constructor with default capacity.
+ */
+ public DoubleMinHeap() {
+ super(DEFAULT_INITIAL_CAPACITY);
+ }
+
+ /**
+ * Constructor with initial capacity.
+ *
+ * @param size initial capacity
+ */
+ public DoubleMinHeap(int size) {
+ super(size);
+ }
+
+ /**
+ * Compare two objects
+ *
+ * @param o1 First object
+ * @param o2 Second object
+ */
+ @Override
+ protected boolean comp(double o1, double o2) {
+ return o1 > o2;
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjMaxHeap.java
new file mode 100644
index 00000000..8417309a
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjMaxHeap.java
@@ -0,0 +1,328 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.Arrays;
+import java.util.Comparator;
+
+import de.lmu.ifi.dbs.elki.math.MathUtil;
+
+/**
+ * Basic in-memory heap structure.
+ *
+ * This heap is built lazily: if you first add many elements, then poll the
+ * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log
+ * n). This is implemented via a simple validTo counter.
+ *
+ * @author Erich Schubert
+ *
+ * @param <V> value type
+ */
+public class DoubleObjMaxHeap<V> {
+ /**
+ * Heap storage: keys
+ */
+ protected double[] keys;
+
+ /**
+ * Heap storage: values
+ */
+ protected Object[] values;
+
+ /**
+ * Current number of objects
+ */
+ protected int size = 0;
+
+ /**
+ * Indicate up to where the heap is valid
+ */
+ protected int validSize = 0;
+
+ /**
+ * (Structural) modification counter. Used to invalidate iterators.
+ */
+ public transient int modCount = 0;
+
+ /**
+ * Default initial capacity
+ */
+ private static final int DEFAULT_INITIAL_CAPACITY = 11;
+
+ /**
+ * Default constructor: default capacity, natural ordering.
+ */
+ public DoubleObjMaxHeap() {
+ this(DEFAULT_INITIAL_CAPACITY);
+ }
+
+ /**
+ * Constructor with initial capacity and {@link Comparator}.
+ *
+ * @param size initial capacity
+ */
+ public DoubleObjMaxHeap(int size) {
+ super();
+ this.size = 0;
+ this.keys = new double[size];
+ this.values = new Object[size];
+ }
+
+ /**
+ * Add a key-value pair to the heap
+ *
+ * @param key Key
+ * @param val Value
+ * @return Success code
+ */
+ public boolean add(double key, V val) {
+ // resize when needed
+ if(size + 1 > keys.length) {
+ resize(size + 1);
+ }
+ // final int pos = size;
+ this.keys[size] = key;
+ this.values[size] = val;
+ this.size += 1;
+ heapifyUp(size - 1, key, val);
+ validSize += 1;
+ // We have changed - return true according to {@link Collection#put}
+ modCount++;
+ return true;
+ }
+
+ /**
+ * Get the current top key
+ *
+ * @return Top key
+ */
+ public double peekKey() {
+ if(size == 0) {
+ throw new ArrayIndexOutOfBoundsException("Peek() on an empty heap!");
+ }
+ ensureValid();
+ return keys[0];
+ }
+
+ /**
+ * Get the current top value
+ *
+ * @return Value
+ */
+ @SuppressWarnings("unchecked")
+ public V peekValue() {
+ if(size == 0) {
+ throw new ArrayIndexOutOfBoundsException("Peek() on an empty heap!");
+ }
+ ensureValid();
+ return (V) values[0];
+ }
+
+ /**
+ * Remove the first element
+ */
+ public void poll() {
+ removeAt(0);
+ }
+
+ /**
+ * Repair the heap
+ */
+ protected void ensureValid() {
+ if(validSize != size) {
+ if(size > 1) {
+ // Parent of first invalid
+ int nextmin = validSize > 0 ? ((validSize - 1) >>> 1) : 0;
+ int curmin = MathUtil.nextAllOnesInt(nextmin); // Next line
+ int nextmax = curmin - 1; // End of valid line
+ int pos = (size - 2) >>> 1; // Parent of last element
+ // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin+", "+nextmin);
+ while(pos >= nextmin) {
+ // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin);
+ while(pos >= curmin) {
+ if(!heapifyDown(pos, keys[pos], values[pos])) {
+ final int parent = (pos - 1) >>> 1;
+ if(parent < curmin) {
+ nextmin = Math.min(nextmin, parent);
+ nextmax = Math.max(nextmax, parent);
+ }
+ }
+ pos--;
+ }
+ curmin = nextmin;
+ pos = Math.min(pos, nextmax);
+ nextmax = -1;
+ }
+ }
+ validSize = size;
+ }
+ }
+
+ /**
+ * Remove the element at the given position.
+ *
+ * @param pos Element position.
+ */
+ protected void removeAt(int pos) {
+ if(pos < 0 || pos >= size) {
+ return;
+ }
+ // Replacement object:
+ final double reinkey = keys[size - 1];
+ final Object reinval = values[size - 1];
+ values[size - 1] = null;
+ // Keep heap in sync
+ if(validSize == size) {
+ size -= 1;
+ validSize -= 1;
+ heapifyDown(pos, reinkey, reinval);
+ }
+ else {
+ size -= 1;
+ validSize = Math.min(pos >>> 1, validSize);
+ keys[pos] = reinkey;
+ values[pos] = reinval;
+ }
+ modCount++;
+ }
+
+ /**
+ * Execute a "Heapify Upwards" aka "SiftUp". Used in insertions.
+ *
+ * @param pos insertion position
+ * @param curkey Current key
+ * @param curval Current value
+ */
+ protected void heapifyUp(int pos, double curkey, Object curval) {
+ while(pos > 0) {
+ final int parent = (pos - 1) >>> 1;
+ double parkey = keys[parent];
+
+ if(curkey <= parkey) { // Compare
+ break;
+ }
+ keys[pos] = parkey;
+ values[pos] = values[parent];
+ pos = parent;
+ }
+ keys[pos] = curkey;
+ values[pos] = curval;
+ }
+
+ /**
+ * Execute a "Heapify Downwards" aka "SiftDown". Used in deletions.
+ *
+ * @param ipos re-insertion position
+ * @param curkey Current key
+ * @param curval Current value
+ * @return true when the order was changed
+ */
+ protected boolean heapifyDown(final int ipos, double curkey, Object curval) {
+ int pos = ipos;
+ final int half = size >>> 1;
+ while(pos < half) {
+ // Get left child (must exist!)
+ int cpos = (pos << 1) + 1;
+ double chikey = keys[cpos];
+ Object chival = values[cpos];
+ // Test right child, if present
+ final int rchild = cpos + 1;
+ if(rchild < size) {
+ double right = keys[rchild];
+ if(chikey < right) { // Compare
+ cpos = rchild;
+ chikey = right;
+ chival = values[rchild];
+ }
+ }
+
+ if(curkey >= chikey) { // Compare
+ break;
+ }
+ keys[pos] = chikey;
+ values[pos] = chival;
+ pos = cpos;
+ }
+ keys[pos] = curkey;
+ values[pos] = curval;
+ return (pos == ipos);
+ }
+
+ /**
+ * Query the size
+ *
+ * @return Size
+ */
+ public int size() {
+ return this.size;
+ }
+
+ /**
+ * Test whether we need to resize to have the requested capacity.
+ *
+ * @param requiredSize required capacity
+ */
+ protected final void resize(int requiredSize) {
+ // Double until 64, then increase by 50% each time.
+ int newCapacity = ((keys.length < 64) ? ((keys.length + 1) << 1) : ((keys.length >> 1) * 3));
+ // overflow?
+ if(newCapacity < 0) {
+ throw new OutOfMemoryError();
+ }
+ if(requiredSize > newCapacity) {
+ newCapacity = requiredSize;
+ }
+ keys = Arrays.copyOf(keys, newCapacity);
+ values = Arrays.copyOf(values, newCapacity);
+ }
+
+ /**
+ * Delete all elements from the heap.
+ */
+ public void clear() {
+ // clean up references in the array for memory management
+ Arrays.fill(values, null);
+ this.size = 0;
+ this.validSize = -1;
+ modCount++;
+ }
+
+ /**
+ * Test whether the heap is still valid.
+ *
+ * Debug method.
+ *
+ * @return {@code null} when the heap is correct
+ */
+ protected String checkHeap() {
+ ensureValid();
+ for(int i = 1; i < size; i++) {
+ final int parent = (i - 1) >>> 1;
+ if(keys[parent] < keys[i]) { // Compare
+ return "@" + parent + ": " + keys[parent] + " < @" + i + ": " + keys[i];
+ }
+ }
+ return null;
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjMinHeap.java
new file mode 100644
index 00000000..244277e8
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjMinHeap.java
@@ -0,0 +1,328 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.Arrays;
+import java.util.Comparator;
+
+import de.lmu.ifi.dbs.elki.math.MathUtil;
+
+/**
+ * Basic in-memory heap structure.
+ *
+ * This heap is built lazily: if you first add many elements, then poll the
+ * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log
+ * n). This is implemented via a simple validTo counter.
+ *
+ * @author Erich Schubert
+ *
+ * @param <V> value type
+ */
+public class DoubleObjMinHeap<V> {
+ /**
+ * Heap storage: keys
+ */
+ protected double[] keys;
+
+ /**
+ * Heap storage: values
+ */
+ protected Object[] values;
+
+ /**
+ * Current number of objects
+ */
+ protected int size = 0;
+
+ /**
+ * Indicate up to where the heap is valid
+ */
+ protected int validSize = 0;
+
+ /**
+ * (Structural) modification counter. Used to invalidate iterators.
+ */
+ public transient int modCount = 0;
+
+ /**
+ * Default initial capacity
+ */
+ private static final int DEFAULT_INITIAL_CAPACITY = 11;
+
+ /**
+ * Default constructor: default capacity, natural ordering.
+ */
+ public DoubleObjMinHeap() {
+ this(DEFAULT_INITIAL_CAPACITY);
+ }
+
+ /**
+ * Constructor with initial capacity and {@link Comparator}.
+ *
+ * @param size initial capacity
+ */
+ public DoubleObjMinHeap(int size) {
+ super();
+ this.size = 0;
+ this.keys = new double[size];
+ this.values = new Object[size];
+ }
+
+ /**
+ * Add a key-value pair to the heap
+ *
+ * @param key Key
+ * @param val Value
+ * @return Success code
+ */
+ public boolean add(double key, V val) {
+ // resize when needed
+ if(size + 1 > keys.length) {
+ resize(size + 1);
+ }
+ // final int pos = size;
+ this.keys[size] = key;
+ this.values[size] = val;
+ this.size += 1;
+ heapifyUp(size - 1, key, val);
+ validSize += 1;
+ // We have changed - return true according to {@link Collection#put}
+ modCount++;
+ return true;
+ }
+
+ /**
+ * Get the current top key
+ *
+ * @return Top key
+ */
+ public double peekKey() {
+ if(size == 0) {
+ throw new ArrayIndexOutOfBoundsException("Peek() on an empty heap!");
+ }
+ ensureValid();
+ return keys[0];
+ }
+
+ /**
+ * Get the current top value
+ *
+ * @return Value
+ */
+ @SuppressWarnings("unchecked")
+ public V peekValue() {
+ if(size == 0) {
+ throw new ArrayIndexOutOfBoundsException("Peek() on an empty heap!");
+ }
+ ensureValid();
+ return (V) values[0];
+ }
+
+ /**
+ * Remove the first element
+ */
+ public void poll() {
+ removeAt(0);
+ }
+
+ /**
+ * Repair the heap
+ */
+ protected void ensureValid() {
+ if(validSize != size) {
+ if(size > 1) {
+ // Parent of first invalid
+ int nextmin = validSize > 0 ? ((validSize - 1) >>> 1) : 0;
+ int curmin = MathUtil.nextAllOnesInt(nextmin); // Next line
+ int nextmax = curmin - 1; // End of valid line
+ int pos = (size - 2) >>> 1; // Parent of last element
+ // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin+", "+nextmin);
+ while(pos >= nextmin) {
+ // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin);
+ while(pos >= curmin) {
+ if(!heapifyDown(pos, keys[pos], values[pos])) {
+ final int parent = (pos - 1) >>> 1;
+ if(parent < curmin) {
+ nextmin = Math.min(nextmin, parent);
+ nextmax = Math.max(nextmax, parent);
+ }
+ }
+ pos--;
+ }
+ curmin = nextmin;
+ pos = Math.min(pos, nextmax);
+ nextmax = -1;
+ }
+ }
+ validSize = size;
+ }
+ }
+
+ /**
+ * Remove the element at the given position.
+ *
+ * @param pos Element position.
+ */
+ protected void removeAt(int pos) {
+ if(pos < 0 || pos >= size) {
+ return;
+ }
+ // Replacement object:
+ final double reinkey = keys[size - 1];
+ final Object reinval = values[size - 1];
+ values[size - 1] = null;
+ // Keep heap in sync
+ if(validSize == size) {
+ size -= 1;
+ validSize -= 1;
+ heapifyDown(pos, reinkey, reinval);
+ }
+ else {
+ size -= 1;
+ validSize = Math.min(pos >>> 1, validSize);
+ keys[pos] = reinkey;
+ values[pos] = reinval;
+ }
+ modCount++;
+ }
+
+ /**
+ * Execute a "Heapify Upwards" aka "SiftUp". Used in insertions.
+ *
+ * @param pos insertion position
+ * @param curkey Current key
+ * @param curval Current value
+ */
+ protected void heapifyUp(int pos, double curkey, Object curval) {
+ while(pos > 0) {
+ final int parent = (pos - 1) >>> 1;
+ double parkey = keys[parent];
+
+ if(curkey >= parkey) { // Compare
+ break;
+ }
+ keys[pos] = parkey;
+ values[pos] = values[parent];
+ pos = parent;
+ }
+ keys[pos] = curkey;
+ values[pos] = curval;
+ }
+
+ /**
+ * Execute a "Heapify Downwards" aka "SiftDown". Used in deletions.
+ *
+ * @param ipos re-insertion position
+ * @param curkey Current key
+ * @param curval Current value
+ * @return true when the order was changed
+ */
+ protected boolean heapifyDown(final int ipos, double curkey, Object curval) {
+ int pos = ipos;
+ final int half = size >>> 1;
+ while(pos < half) {
+ // Get left child (must exist!)
+ int cpos = (pos << 1) + 1;
+ double chikey = keys[cpos];
+ Object chival = values[cpos];
+ // Test right child, if present
+ final int rchild = cpos + 1;
+ if(rchild < size) {
+ double right = keys[rchild];
+ if(chikey > right) { // Compare
+ cpos = rchild;
+ chikey = right;
+ chival = values[rchild];
+ }
+ }
+
+ if(curkey <= chikey) { // Compare
+ break;
+ }
+ keys[pos] = chikey;
+ values[pos] = chival;
+ pos = cpos;
+ }
+ keys[pos] = curkey;
+ values[pos] = curval;
+ return (pos == ipos);
+ }
+
+ /**
+ * Query the size
+ *
+ * @return Size
+ */
+ public int size() {
+ return this.size;
+ }
+
+ /**
+ * Test whether we need to resize to have the requested capacity.
+ *
+ * @param requiredSize required capacity
+ */
+ protected final void resize(int requiredSize) {
+ // Double until 64, then increase by 50% each time.
+ int newCapacity = ((keys.length < 64) ? ((keys.length + 1) << 1) : ((keys.length >> 1) * 3));
+ // overflow?
+ if(newCapacity < 0) {
+ throw new OutOfMemoryError();
+ }
+ if(requiredSize > newCapacity) {
+ newCapacity = requiredSize;
+ }
+ keys = Arrays.copyOf(keys, newCapacity);
+ values = Arrays.copyOf(values, newCapacity);
+ }
+
+ /**
+ * Delete all elements from the heap.
+ */
+ public void clear() {
+ // clean up references in the array for memory management
+ Arrays.fill(values, null);
+ this.size = 0;
+ this.validSize = -1;
+ modCount++;
+ }
+
+ /**
+ * Test whether the heap is still valid.
+ *
+ * Debug method.
+ *
+ * @return {@code null} when the heap is correct
+ */
+ protected String checkHeap() {
+ ensureValid();
+ for(int i = 1; i < size; i++) {
+ final int parent = (i - 1) >>> 1;
+ if(keys[parent] > keys[i]) { // Compare
+ return "@" + parent + ": " + keys[parent] + " < @" + i + ": " + keys[i];
+ }
+ }
+ return null;
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoublePriorityObject.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoublePriorityObject.java
index 976a4d0c..92d548cb 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoublePriorityObject.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoublePriorityObject.java
@@ -79,8 +79,9 @@ public class DoublePriorityObject<O> implements PairInterface<Double, O>, Compar
}
@Override
+ @Deprecated
public Double getFirst() {
- return priority;
+ return Double.valueOf(priority);
}
@Override
@@ -120,8 +121,8 @@ public class DoublePriorityObject<O> implements PairInterface<Double, O>, Compar
@Override
public String toString() {
- StringBuffer buf = new StringBuffer();
- buf.append(priority).append(":").append(object.toString());
+ StringBuilder buf = new StringBuilder();
+ buf.append(priority).append(':').append(object.toString());
return buf.toString();
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/Heap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/Heap.java
index 307a0807..86d3ae08 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/Heap.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/Heap.java
@@ -23,11 +23,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import java.io.Serializable;
-import java.util.AbstractQueue;
-import java.util.ArrayList;
import java.util.Arrays;
-import java.util.Collection;
import java.util.Comparator;
import java.util.ConcurrentModificationException;
import java.util.Iterator;
@@ -49,39 +45,34 @@ import de.lmu.ifi.dbs.elki.math.MathUtil;
* @param <E> Element type. Should be {@link java.lang.Comparable} or a
* {@link java.util.Comparator} needs to be given.
*/
-public class Heap<E> extends AbstractQueue<E> implements Serializable {
+public class Heap<E> implements Iterable<E> {
/**
- * Serial version
- */
- private static final long serialVersionUID = 1L;
-
- /**
- * Heap storage
+ * Heap storage.
*/
protected transient Object[] queue;
/**
- * Current number of objects
+ * Current number of objects.
*/
protected int size = 0;
/**
- * Indicate up to where the heap is valid
+ * Indicate up to where the heap is valid.
*/
protected int validSize = 0;
/**
- * The comparator or {@code null}
+ * The comparator or {@code null}.
*/
protected final Comparator<Object> comparator;
/**
* (Structural) modification counter. Used to invalidate iterators.
*/
- public transient int modCount = 0;
+ private transient int modCount = 0;
/**
- * Default initial capacity
+ * Default initial capacity.
*/
private static final int DEFAULT_INITIAL_CAPACITY = 11;
@@ -124,16 +115,14 @@ public class Heap<E> extends AbstractQueue<E> implements Serializable {
this.comparator = (Comparator<Object>) comparator;
}
- @Override
- public boolean add(E e) {
- // Never full - overriding probably slightly faster
- return offer(e);
- }
-
- @Override
- public boolean offer(E e) {
+ /**
+ * Add an element to the heap.
+ *
+ * @param e Element to add
+ */
+ public void add(E e) {
// resize when needed
- if(size + 1 > queue.length) {
+ if (size + 1 > queue.length) {
resize(size + 1);
}
// final int pos = size;
@@ -141,46 +130,69 @@ public class Heap<E> extends AbstractQueue<E> implements Serializable {
this.size += 1;
heapifyUp(size - 1, e);
validSize += 1;
- // We have changed - return true according to {@link Collection#put}
- modCount++;
- return true;
+ heapModified();
}
- @Override
+ /**
+ * Combined operation that removes the top element, and inserts a new element
+ * instead.
+ *
+ * @param e New element to insert
+ * @return Previous top element of the heap
+ */
+ @SuppressWarnings("unchecked")
+ public E replaceTopElement(E e) {
+ ensureValid();
+ E oldroot = (E) queue[0];
+ heapifyDown(0, e);
+ heapModified();
+ return oldroot;
+ }
+
+ /**
+ * Peek at the top element.
+ *
+ * @return Top element.
+ */
+ @SuppressWarnings("unchecked")
public E peek() {
- if(size == 0) {
+ if (size == 0) {
return null;
}
ensureValid();
- return castQueueElement(0);
+ return (E) queue[0];
}
- @Override
+ /**
+ * Remove the top element.
+ *
+ * @return Top element.
+ */
public E poll() {
ensureValid();
return removeAt(0);
}
/**
- * Repair the heap
+ * Perform pending heap repair operations in a single bulk operation.
*/
protected void ensureValid() {
- if(validSize != size) {
- if(size > 1) {
+ if (validSize != size) {
+ if (size > 1) {
// Bottom up heap update.
- if(comparator != null) {
+ if (comparator != null) {
// Parent of first invalid
int nextmin = validSize > 0 ? ((validSize - 1) >>> 1) : 0;
int curmin = MathUtil.nextAllOnesInt(nextmin); // Next line
int nextmax = curmin - 1; // End of valid line
int pos = (size - 2) >>> 1; // Parent of last element
// System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin+", "+nextmin);
- while(pos >= nextmin) {
+ while (pos >= nextmin) {
// System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin);
- while(pos >= curmin) {
- if(!heapifyDownComparator(pos, queue[pos])) {
+ while (pos >= curmin) {
+ if (!heapifyDownComparator(pos, queue[pos])) {
final int parent = (pos - 1) >>> 1;
- if(parent < curmin) {
+ if (parent < curmin) {
nextmin = Math.min(nextmin, parent);
nextmax = Math.max(nextmax, parent);
}
@@ -191,20 +203,19 @@ public class Heap<E> extends AbstractQueue<E> implements Serializable {
pos = Math.min(pos, nextmax);
nextmax = -1;
}
- }
- else {
+ } else {
// Parent of first invalid
int nextmin = validSize > 0 ? ((validSize - 1) >>> 1) : 0;
int curmin = MathUtil.nextAllOnesInt(nextmin); // Next line
int nextmax = curmin - 1; // End of valid line
int pos = (size - 2) >>> 1; // Parent of last element
// System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin+", "+nextmin);
- while(pos >= nextmin) {
+ while (pos >= nextmin) {
// System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin);
- while(pos >= curmin) {
- if(!heapifyDownComparable(pos, queue[pos])) {
+ while (pos >= curmin) {
+ if (!heapifyDownComparable(pos, queue[pos])) {
final int parent = (pos - 1) >>> 1;
- if(parent < curmin) {
+ if (parent < curmin) {
nextmin = Math.min(nextmin, parent);
nextmax = Math.max(nextmax, parent);
}
@@ -225,27 +236,28 @@ public class Heap<E> extends AbstractQueue<E> implements Serializable {
* Remove the element at the given position.
*
* @param pos Element position.
+ * @return Element that was at this position.
*/
+ @SuppressWarnings("unchecked")
protected E removeAt(int pos) {
- if(pos < 0 || pos >= size) {
+ if (pos < 0 || pos >= size) {
return null;
}
- final E ret = castQueueElement(pos);
+ final E ret = (E) queue[pos];
// Replacement object:
final Object reinsert = queue[size - 1];
queue[size - 1] = null;
// Keep heap in sync
- if(validSize == size) {
+ if (validSize == size) {
size -= 1;
validSize -= 1;
heapifyDown(pos, reinsert);
- }
- else {
+ } else {
size -= 1;
validSize = Math.min(pos >>> 1, validSize);
queue[pos] = reinsert;
}
- modCount++;
+ heapModified();
return ret;
}
@@ -257,10 +269,9 @@ public class Heap<E> extends AbstractQueue<E> implements Serializable {
*/
protected void heapifyUp(int pos, E elem) {
assert (pos < size && pos >= 0);
- if(comparator != null) {
+ if (comparator != null) {
heapifyUpComparator(pos, elem);
- }
- else {
+ } else {
heapifyUpComparable(pos, elem);
}
}
@@ -274,11 +285,11 @@ public class Heap<E> extends AbstractQueue<E> implements Serializable {
@SuppressWarnings("unchecked")
protected void heapifyUpComparable(int pos, Object elem) {
final Comparable<Object> cur = (Comparable<Object>) elem; // queue[pos];
- while(pos > 0) {
+ while (pos > 0) {
final int parent = (pos - 1) >>> 1;
Object par = queue[parent];
- if(cur.compareTo(par) >= 0) {
+ if (cur.compareTo(par) >= 0) {
break;
}
queue[pos] = par;
@@ -294,11 +305,11 @@ public class Heap<E> extends AbstractQueue<E> implements Serializable {
* @param cur Element to insert
*/
protected void heapifyUpComparator(int pos, Object cur) {
- while(pos > 0) {
+ while (pos > 0) {
final int parent = (pos - 1) >>> 1;
Object par = queue[parent];
- if(comparator.compare(cur, par) >= 0) {
+ if (comparator.compare(cur, par) >= 0) {
break;
}
queue[pos] = par;
@@ -316,10 +327,9 @@ public class Heap<E> extends AbstractQueue<E> implements Serializable {
*/
protected boolean heapifyDown(int pos, Object reinsert) {
assert (pos >= 0);
- if(comparator != null) {
+ if (comparator != null) {
return heapifyDownComparator(pos, reinsert);
- }
- else {
+ } else {
return heapifyDownComparable(pos, reinsert);
}
}
@@ -328,6 +338,7 @@ public class Heap<E> extends AbstractQueue<E> implements Serializable {
* Execute a "Heapify Downwards" aka "SiftDown". Used in deletions.
*
* @param ipos re-insertion position
+ * @param reinsert Object to reinsert
* @return true when the order was changed
*/
@SuppressWarnings("unchecked")
@@ -335,21 +346,21 @@ public class Heap<E> extends AbstractQueue<E> implements Serializable {
Comparable<Object> cur = (Comparable<Object>) reinsert;
int pos = ipos;
final int half = size >>> 1;
- while(pos < half) {
+ while (pos < half) {
// Get left child (must exist!)
int cpos = (pos << 1) + 1;
Object child = queue[cpos];
// Test right child, if present
final int rchild = cpos + 1;
- if(rchild < size) {
+ if (rchild < size) {
Object right = queue[rchild];
- if(((Comparable<Object>) child).compareTo(right) > 0) {
+ if (((Comparable<Object>) child).compareTo(right) > 0) {
cpos = rchild;
child = right;
}
}
- if(cur.compareTo(child) <= 0) {
+ if (cur.compareTo(child) <= 0) {
break;
}
queue[pos] = child;
@@ -363,30 +374,31 @@ public class Heap<E> extends AbstractQueue<E> implements Serializable {
* Execute a "Heapify Downwards" aka "SiftDown". Used in deletions.
*
* @param ipos re-insertion position
+ * @param cur Object to reinsert
* @return true when the order was changed
*/
protected boolean heapifyDownComparator(final int ipos, Object cur) {
int pos = ipos;
final int half = size >>> 1;
- while(pos < half) {
+ while (pos < half) {
int min = pos;
Object best = cur;
final int lchild = (pos << 1) + 1;
Object left = queue[lchild];
- if(comparator.compare(best, left) > 0) {
+ if (comparator.compare(best, left) > 0) {
min = lchild;
best = left;
}
final int rchild = lchild + 1;
- if(rchild < size) {
+ if (rchild < size) {
Object right = queue[rchild];
- if(comparator.compare(best, right) > 0) {
+ if (comparator.compare(best, right) > 0) {
min = rchild;
best = right;
}
}
- if(min == pos) {
+ if (min == pos) {
break;
}
queue[pos] = best;
@@ -396,56 +408,53 @@ public class Heap<E> extends AbstractQueue<E> implements Serializable {
return (pos == ipos);
}
- @SuppressWarnings("unchecked")
- protected final E castQueueElement(int n) {
- return (E) queue[n];
- }
-
- @Override
+ /**
+ * Get the heap size.
+ *
+ * @return Heap size
+ */
public int size() {
return this.size;
}
/**
+ * Test for emptiness.
+ *
+ * @return true when the heap is empty
+ */
+ public boolean isEmpty() {
+ return size == 0;
+ }
+
+ /**
* Test whether we need to resize to have the requested capacity.
*
* @param requiredSize required capacity
*/
protected final void resize(int requiredSize) {
// Double until 64, then increase by 50% each time.
- int newCapacity = ((queue.length < 64) ? ((queue.length + 1) * 2) : ((queue.length / 2) * 3));
+ int newCapacity = ((queue.length < 64) ? ((queue.length + 1) << 1) : ((queue.length >> 1) * 3));
// overflow?
- if(newCapacity < 0) {
+ if (newCapacity < 0) {
throw new OutOfMemoryError();
}
- if(requiredSize > newCapacity) {
+ if (requiredSize > newCapacity) {
newCapacity = requiredSize;
}
queue = Arrays.copyOf(queue, newCapacity);
}
- @Override
+ /**
+ * Clear the heap.
+ */
public void clear() {
// clean up references in the array for memory management
- for(int i = 0; i < size; i++) {
+ for (int i = 0; i < size; i++) {
queue[i] = null;
}
this.size = 0;
this.validSize = -1;
- modCount++;
- }
-
- @Override
- public boolean contains(Object o) {
- if(o != null) {
- // TODO: use order to reduce search space?
- for(int i = 0; i < size; i++) {
- if(o.equals(queue[i])) {
- return true;
- }
- }
- }
- return false;
+ heapModified();
}
@Override
@@ -453,19 +462,11 @@ public class Heap<E> extends AbstractQueue<E> implements Serializable {
return new Itr();
}
- @Override
- public boolean addAll(Collection<? extends E> c) {
- final int addsize = c.size();
- if(addsize <= 0) {
- return false;
- }
- if(size + addsize > queue.length) {
- resize(size + addsize);
- }
- for(E elem : c) {
- add(elem);
- }
- return true;
+ /**
+ * Called at the end of each heap modification.
+ */
+ protected void heapModified() {
+ modCount++;
}
/**
@@ -477,7 +478,7 @@ public class Heap<E> extends AbstractQueue<E> implements Serializable {
*/
protected final class Itr implements Iterator<E> {
/**
- * Cursor position
+ * Cursor position.
*/
private int cursor = 0;
@@ -491,26 +492,26 @@ public class Heap<E> extends AbstractQueue<E> implements Serializable {
return cursor < size;
}
+ @SuppressWarnings("unchecked")
@Override
public E next() {
- if(expectedModCount != modCount) {
+ if (expectedModCount != modCount) {
throw new ConcurrentModificationException();
}
- if(cursor < size) {
- return castQueueElement(cursor++);
+ if (cursor < size) {
+ return (E) queue[cursor++];
}
throw new NoSuchElementException();
}
@Override
public void remove() {
- if(expectedModCount != modCount) {
+ if (expectedModCount != modCount) {
throw new ConcurrentModificationException();
}
- if(cursor > 0) {
+ if (cursor > 0) {
cursor--;
- }
- else {
+ } else {
throw new IllegalStateException();
}
expectedModCount = modCount;
@@ -518,20 +519,6 @@ public class Heap<E> extends AbstractQueue<E> implements Serializable {
}
/**
- * Return the heap as a sorted array list, by repeated polling. This will
- * empty the heap!
- *
- * @return new array list
- */
- public ArrayList<E> toSortedArrayList() {
- ArrayList<E> ret = new ArrayList<E>(size());
- while(!isEmpty()) {
- ret.add(poll());
- }
- return ret;
- }
-
- /**
* Test whether the heap is still valid.
*
* Debug method.
@@ -540,24 +527,23 @@ public class Heap<E> extends AbstractQueue<E> implements Serializable {
*/
protected String checkHeap() {
ensureValid();
- if(comparator == null) {
- for(int i = 1; i < size; i++) {
+ if (comparator == null) {
+ for (int i = 1; i < size; i++) {
final int parent = (i - 1) >>> 1;
@SuppressWarnings("unchecked")
Comparable<Object> po = (Comparable<Object>) queue[parent];
- if(po.compareTo(queue[i]) > 0) {
+ if (po.compareTo(queue[i]) > 0) {
return "@" + parent + ": " + queue[parent] + " < @" + i + ": " + queue[i];
}
}
- }
- else {
- for(int i = 1; i < size; i++) {
+ } else {
+ for (int i = 1; i < size; i++) {
final int parent = (i - 1) >>> 1;
- if(comparator.compare(queue[parent], queue[i]) > 0) {
+ if (comparator.compare(queue[parent], queue[i]) > 0) {
return "@" + parent + ": " + queue[parent] + " < @" + i + ": " + queue[i];
}
}
}
return null;
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerHeap.java
new file mode 100644
index 00000000..6203ad96
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerHeap.java
@@ -0,0 +1,264 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.Arrays;
+
+import de.lmu.ifi.dbs.elki.math.MathUtil;
+
+/**
+ * Basic in-memory heap structure.
+ *
+ * This heap is built lazily: if you first add many elements, then poll the
+ * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log
+ * n). This is implemented via a simple validTo counter.
+ *
+ * @author Erich Schubert
+ */
+public abstract class IntegerHeap extends AbstractHeap {
+ /**
+ * Heap storage: queue
+ */
+ protected transient int[] queue;
+
+ /**
+ * Constructor with initial capacity.
+ *
+ * @param size initial capacity
+ */
+ public IntegerHeap(int size) {
+ super();
+ this.size = 0;
+ this.queue = new int[size];
+ }
+
+ /**
+ * Add a key-value pair to the heap
+ *
+ * @param key Key
+ */
+ public void add(int key) {
+ // resize when needed
+ if (size + 1 > queue.length) {
+ resize(size + 1);
+ }
+ // final int pos = size;
+ this.queue[size] = key;
+ this.size += 1;
+ heapifyUp(size - 1, key);
+ validSize += 1;
+ heapModified();
+ }
+
+ /**
+ * Add a key-value pair to the heap, except if the new element is larger than
+ * the top, and we are at design size (overflow)
+ *
+ * @param key Key
+ * @param max Maximum size of heap
+ */
+ public void add(int key, int max) {
+ if (size < max) {
+ add(key);
+ } else if (comp(key, peek())) {
+ replaceTopElement(key);
+ }
+ }
+
+ /**
+ * Combined operation that removes the top element, and inserts a new element
+ * instead.
+ *
+ * @param e New element to insert
+ * @return Previous top element of the heap
+ */
+ public int replaceTopElement(int e) {
+ ensureValid();
+ int oldroot = queue[0];
+ heapifyDown(0, e);
+ heapModified();
+ return oldroot;
+ }
+
+ /**
+ * Get the current top key
+ *
+ * @return Top key
+ */
+ public int peek() {
+ if (size == 0) {
+ throw new ArrayIndexOutOfBoundsException("Peek() on an empty heap!");
+ }
+ ensureValid();
+ return queue[0];
+ }
+
+ /**
+ * Remove the first element
+ *
+ * @return Top element
+ */
+ public int poll() {
+ return removeAt(0);
+ }
+
+ /**
+ * Repair the heap
+ */
+ protected void ensureValid() {
+ if (validSize != size) {
+ if (size > 1) {
+ // Parent of first invalid
+ int nextmin = validSize > 0 ? ((validSize - 1) >>> 1) : 0;
+ int curmin = MathUtil.nextAllOnesInt(nextmin); // Next line
+ int nextmax = curmin - 1; // End of valid line
+ int pos = (size - 2) >>> 1; // Parent of last element
+ // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin+", "+nextmin);
+ while (pos >= nextmin) {
+ // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin);
+ while (pos >= curmin) {
+ if (!heapifyDown(pos, queue[pos])) {
+ final int parent = (pos - 1) >>> 1;
+ if (parent < curmin) {
+ nextmin = Math.min(nextmin, parent);
+ nextmax = Math.max(nextmax, parent);
+ }
+ }
+ pos--;
+ }
+ curmin = nextmin;
+ pos = Math.min(pos, nextmax);
+ nextmax = -1;
+ }
+ }
+ validSize = size;
+ }
+ }
+
+ /**
+ * Remove the element at the given position.
+ *
+ * @param pos Element position.
+ * @return Removed element
+ */
+ protected int removeAt(int pos) {
+ if (pos < 0 || pos >= size) {
+ return 0;
+ }
+ final int top = queue[0];
+ // Replacement object:
+ final int reinkey = queue[size - 1];
+ // Keep heap in sync
+ if (validSize == size) {
+ size -= 1;
+ validSize -= 1;
+ heapifyDown(pos, reinkey);
+ } else {
+ size -= 1;
+ validSize = Math.min(pos >>> 1, validSize);
+ queue[pos] = reinkey;
+ }
+ heapModified();
+ return top;
+ }
+
+ /**
+ * Execute a "Heapify Upwards" aka "SiftUp". Used in insertions.
+ *
+ * @param pos insertion position
+ * @param curkey Current key
+ */
+ protected void heapifyUp(int pos, int curkey) {
+ while (pos > 0) {
+ final int parent = (pos - 1) >>> 1;
+ int parkey = queue[parent];
+
+ if (comp(curkey, parkey)) { // Compare
+ break;
+ }
+ queue[pos] = parkey;
+ pos = parent;
+ }
+ queue[pos] = curkey;
+ }
+
+ /**
+ * Execute a "Heapify Downwards" aka "SiftDown". Used in deletions.
+ *
+ * @param ipos re-insertion position
+ * @param curkey Current key
+ * @return true when the order was changed
+ */
+ protected boolean heapifyDown(final int ipos, int curkey) {
+ int pos = ipos;
+ final int half = size >>> 1;
+ while (pos < half) {
+ // Get left child (must exist!)
+ int cpos = (pos << 1) + 1;
+ int chikey = queue[cpos];
+ // Test right child, if present
+ final int rchild = cpos + 1;
+ if (rchild < size) {
+ int right = queue[rchild];
+ if (comp(chikey, right)) { // Compare
+ cpos = rchild;
+ chikey = right;
+ }
+ }
+
+ if (comp(chikey, curkey)) { // Compare
+ break;
+ }
+ queue[pos] = chikey;
+ pos = cpos;
+ }
+ queue[pos] = curkey;
+ return (pos == ipos);
+ }
+
+ /**
+ * Test whether we need to resize to have the requested capacity.
+ *
+ * @param requiredSize required capacity
+ */
+ protected final void resize(int requiredSize) {
+ queue = Arrays.copyOf(queue, desiredSize(requiredSize, queue.length));
+ }
+
+ /**
+ * Delete all elements from the heap.
+ */
+ @Override
+ public void clear() {
+ super.clear();
+ for (int i = 0; i < size; i++) {
+ queue[i] = 0;
+ }
+ }
+
+ /**
+ * Compare two objects
+ */
+ abstract protected boolean comp(int o1, int o2);
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMaxHeap.java
new file mode 100644
index 00000000..383eb727
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMaxHeap.java
@@ -0,0 +1,62 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/**
+ * Basic in-memory heap structure.
+ *
+ * This heap is built lazily: if you first add many elements, then poll the
+ * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log
+ * n). This is implemented via a simple validTo counter.
+ *
+ * @author Erich Schubert
+ */
+public class IntegerMaxHeap extends IntegerHeap {
+ /**
+ * Constructor with default capacity.
+ */
+ public IntegerMaxHeap() {
+ super(DEFAULT_INITIAL_CAPACITY);
+ }
+
+ /**
+ * Constructor with initial capacity.
+ *
+ * @param size initial capacity
+ */
+ public IntegerMaxHeap(int size) {
+ super(size);
+ }
+
+ /**
+ * Compare two objects
+ *
+ * @param o1 First object
+ * @param o2 Second object
+ */
+ @Override
+ protected boolean comp(int o1, int o2) {
+ return o1 < o2;
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMinHeap.java
new file mode 100644
index 00000000..f81fe275
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMinHeap.java
@@ -0,0 +1,62 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/**
+ * Basic in-memory heap structure.
+ *
+ * This heap is built lazily: if you first add many elements, then poll the
+ * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log
+ * n). This is implemented via a simple validTo counter.
+ *
+ * @author Erich Schubert
+ */
+public class IntegerMinHeap extends IntegerHeap {
+ /**
+ * Constructor with default capacity.
+ */
+ public IntegerMinHeap() {
+ super(DEFAULT_INITIAL_CAPACITY);
+ }
+
+ /**
+ * Constructor with initial capacity.
+ *
+ * @param size initial capacity
+ */
+ public IntegerMinHeap(int size) {
+ super(size);
+ }
+
+ /**
+ * Compare two objects
+ *
+ * @param o1 First object
+ * @param o2 Second object
+ */
+ @Override
+ protected boolean comp(int o1, int o2) {
+ return o1 > o2;
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerPriorityObject.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerPriorityObject.java
index 9dd941ec..2014de65 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerPriorityObject.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerPriorityObject.java
@@ -79,8 +79,9 @@ public class IntegerPriorityObject<O> implements PairInterface<Integer, O>, Comp
}
@Override
+ @Deprecated
public Integer getFirst() {
- return priority;
+ return Integer.valueOf(priority);
}
@Override
@@ -120,8 +121,8 @@ public class IntegerPriorityObject<O> implements PairInterface<Integer, O>, Comp
@Override
public String toString() {
- StringBuffer buf = new StringBuffer();
- buf.append(priority).append(":").append(object.toString());
+ StringBuilder buf = new StringBuilder();
+ buf.append(priority).append(':').append(object.toString());
return buf.toString();
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/KNNHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/KNNHeap.java
deleted file mode 100644
index 6c59123b..00000000
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/KNNHeap.java
+++ /dev/null
@@ -1,153 +0,0 @@
-package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
-
-/*
- This file is part of ELKI:
- Environment for Developing KDD-Applications Supported by Index-Structures
-
- Copyright (C) 2012
- Ludwig-Maximilians-Universität München
- Lehr- und Forschungseinheit für Datenbanksysteme
- ELKI Development Team
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Comparator;
-
-import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
-import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
-import de.lmu.ifi.dbs.elki.database.query.GenericDistanceResultPair;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
-
-/**
- * Heap used for KNN management.
- *
- * @author Erich Schubert
- *
- * @apiviz.has KNNList oneway - - serializes to
- *
- * @param <D> distance type
- */
-public class KNNHeap<D extends Distance<D>> extends TiedTopBoundedHeap<DistanceResultPair<D>> {
- /**
- * Serial version
- */
- private static final long serialVersionUID = 1L;
-
- /**
- * Maximum distance, usually infiniteDistance
- */
- private final D maxdist;
-
- /**
- * Constructor.
- *
- * @param k k Parameter
- * @param maxdist k-distance to return for less than k neighbors - usually
- * infiniteDistance
- */
- public KNNHeap(int k, D maxdist) {
- super(k, new Comp<D>());
- this.maxdist = maxdist;
- }
-
- /**
- * Simplified constructor. Will return {@code null} as kNN distance with less
- * than k entries.
- *
- * @param k k Parameter
- */
- public KNNHeap(int k) {
- this(k, null);
- }
-
- @Override
- public ArrayList<DistanceResultPair<D>> toSortedArrayList() {
- ArrayList<DistanceResultPair<D>> list = super.toSortedArrayList();
- Collections.reverse(list);
- return list;
- }
-
- /**
- * Serialize to a {@link KNNList}. This empties the heap!
- *
- * @return KNNList with the heaps contents.
- */
- public KNNList<D> toKNNList() {
- return new KNNList<D>(this);
- }
-
- /**
- * Get the K parameter ("maxsize" internally).
- *
- * @return K
- */
- public int getK() {
- return super.getMaxSize();
- }
-
- /**
- * Get the distance to the k nearest neighbor, or maxdist otherwise.
- *
- * @return Maximum distance
- */
- public D getKNNDistance() {
- if(size() < getK()) {
- return maxdist;
- }
- return peek().getDistance();
- }
-
- /**
- * Get maximum distance in heap
- */
- public D getMaximumDistance() {
- if(isEmpty()) {
- return maxdist;
- }
- return peek().getDistance();
- }
-
- /**
- * Add a distance-id pair to the heap unless the distance is too large.
- *
- * Compared to the super.add() method, this often saves the pair construction.
- *
- * @param distance Distance value
- * @param id ID number
- * @return success code
- */
- public boolean add(D distance, DBIDRef id) {
- if(size() < maxsize || peek().getDistance().compareTo(distance) >= 0) {
- return super.add(new GenericDistanceResultPair<D>(distance, id.getDBID()));
- }
- return true; /* "success" */
- }
-
- /**
- * Comparator to use.
- *
- * @author Erich Schubert
- *
- * @apiviz.exclude
- */
- public static class Comp<D extends Distance<D>> implements Comparator<DistanceResultPair<D>> {
- @Override
- public int compare(DistanceResultPair<D> o1, DistanceResultPair<D> o2) {
- return -o1.compareByDistance(o2);
- }
- }
-} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/KNNList.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/KNNList.java
deleted file mode 100644
index 3e6ecc9d..00000000
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/KNNList.java
+++ /dev/null
@@ -1,181 +0,0 @@
-package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
-
-/*
- This file is part of ELKI:
- Environment for Developing KDD-Applications Supported by Index-Structures
-
- Copyright (C) 2012
- Ludwig-Maximilians-Universität München
- Lehr- und Forschungseinheit für Datenbanksysteme
- ELKI Development Team
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-import java.util.AbstractList;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Queue;
-
-import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
-import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
-import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
-import de.lmu.ifi.dbs.elki.database.query.knn.KNNUtil;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
-
-/**
- * Finalized KNN List.
- *
- * @author Erich Schubert
- *
- * @param <D> Distance type
- */
-public class KNNList<D extends Distance<D>> extends AbstractList<DistanceResultPair<D>> implements KNNResult<D> {
- /**
- * The value of k this was materialized for.
- */
- private final int k;
-
- /**
- * The actual data array.
- */
- private final Object[] data;
-
- /**
- * Constructor, to be called from KNNHeap only. Use {@link KNNHeap#toKNNList}
- * instead!
- *
- * @param heap Calling heap
- */
- protected KNNList(KNNHeap<D> heap) {
- super();
- this.data = new Object[heap.size()];
- this.k = heap.getK();
- assert(heap.size() >= this.k) : "Heap doesn't contain enough objects!";
- // Get sorted data from heap; but in reverse.
- int i = heap.size();
- while(!heap.isEmpty()) {
- i--;
- assert (i >= 0);
- data[i] = heap.poll();
- }
- assert (data.length == 0 || data[0] != null);
- assert (heap.size() == 0);
- }
-
- /**
- * Constructor. With a KNNHeap, use {@link KNNHeap#toKNNList} instead!
- *
- * @param heap Calling heap
- * @param k K value
- */
- public KNNList(Queue<D> heap, int k) {
- super();
- this.data = new Object[heap.size()];
- this.k = k;
- assert(heap.size() >= this.k) : "Heap doesn't contain enough objects!";
- // Get sorted data from heap; but in reverse.
- int i = heap.size();
- while(!heap.isEmpty()) {
- i--;
- assert (i >= 0);
- data[i] = heap.poll();
- }
- assert (data.length == 0 || data[0] != null);
- assert (heap.size() == 0);
- }
-
- @Override
- public int getK() {
- return k;
- }
-
- @Override
- public D getKNNDistance() {
- return get(getK() - 1).getDistance();
- }
-
- @Override
- public ArrayDBIDs asDBIDs() {
- return KNNUtil.asDBIDs(this);
- }
-
- @Override
- public List<D> asDistanceList() {
- return KNNUtil.asDistanceList(this);
- }
-
- @Override
- public String toString() {
- StringBuffer buf = new StringBuffer();
- buf.append("kNNList[");
- Iterator<DistanceResultPair<D>> iter = this.iterator();
- while(iter.hasNext()) {
- DistanceResultPair<D> pair = iter.next();
- buf.append(pair.getDistance()).append(":").append(pair.getDBID());
- if(iter.hasNext()) {
- buf.append(",");
- }
- }
- buf.append("]");
- return buf.toString();
- }
-
- @SuppressWarnings("unchecked")
- @Override
- public DistanceResultPair<D> get(int index) {
- return (DistanceResultPair<D>) data[index];
- }
-
- @Override
- public Iterator<DistanceResultPair<D>> iterator() {
- return new Itr();
- }
-
- @Override
- public int size() {
- return data.length;
- }
-
- /**
- * Iterator
- *
- * @author Erich Schubert
- *
- * @apiviz.exclude
- */
- private class Itr implements Iterator<DistanceResultPair<D>> {
- /**
- * Cursor position
- */
- private int pos = -1;
-
- @Override
- public boolean hasNext() {
- return pos + 1 < data.length;
- }
-
- @SuppressWarnings("unchecked")
- @Override
- public DistanceResultPair<D> next() {
- pos++;
- return (DistanceResultPair<D>) data[pos];
- }
-
- @Override
- public void remove() {
- throw new UnsupportedOperationException("kNN results are unmodifiable.");
- }
- }
-} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ObjectHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ObjectHeap.java
new file mode 100644
index 00000000..2e20ed56
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ObjectHeap.java
@@ -0,0 +1,267 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.Arrays;
+
+import de.lmu.ifi.dbs.elki.math.MathUtil;
+
+/**
+ * Basic in-memory heap structure.
+ *
+ * This heap is built lazily: if you first add many elements, then poll the
+ * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log
+ * n). This is implemented via a simple validTo counter.
+ *
+ * @author Erich Schubert
+ */
+public abstract class ObjectHeap<K> extends AbstractHeap {
+ /**
+ * Heap storage: queue
+ */
+ protected transient Object[] queue;
+
+ /**
+ * Constructor with initial capacity.
+ *
+ * @param size initial capacity
+ */
+ public ObjectHeap(int size) {
+ super();
+ this.size = 0;
+ this.queue = new Object[size];
+ }
+
+ /**
+ * Add a key-value pair to the heap
+ *
+ * @param key Key
+ */
+ public void add(Object key) {
+ // resize when needed
+ if (size + 1 > queue.length) {
+ resize(size + 1);
+ }
+ // final int pos = size;
+ this.queue[size] = key;
+ this.size += 1;
+ heapifyUp(size - 1, key);
+ validSize += 1;
+ heapModified();
+ }
+
+ /**
+ * Add a key-value pair to the heap, except if the new element is larger than
+ * the top, and we are at design size (overflow)
+ *
+ * @param key Key
+ * @param max Maximum size of heap
+ */
+ public void add(Object key, int max) {
+ if (size < max) {
+ add(key);
+ } else if (comp(key, peek())) {
+ replaceTopElement(key);
+ }
+ }
+
+ /**
+ * Combined operation that removes the top element, and inserts a new element
+ * instead.
+ *
+ * @param e New element to insert
+ * @return Previous top element of the heap
+ */
+ @SuppressWarnings("unchecked")
+ public Object replaceTopElement(Object e) {
+ ensureValid();
+ Object oldroot = (K) queue[0];
+ heapifyDown(0, e);
+ heapModified();
+ return oldroot;
+ }
+
+ /**
+ * Get the current top key
+ *
+ * @return Top key
+ */
+ @SuppressWarnings("unchecked")
+ public Object peek() {
+ if (size == 0) {
+ throw new ArrayIndexOutOfBoundsException("Peek() on an empty heap!");
+ }
+ ensureValid();
+ return (K) queue[0];
+ }
+
+ /**
+ * Remove the first element
+ *
+ * @return Top element
+ */
+ public Object poll() {
+ return removeAt(0);
+ }
+
+ /**
+ * Repair the heap
+ */
+ protected void ensureValid() {
+ if (validSize != size) {
+ if (size > 1) {
+ // Parent of first invalid
+ int nextmin = validSize > 0 ? ((validSize - 1) >>> 1) : 0;
+ int curmin = MathUtil.nextAllOnesInt(nextmin); // Next line
+ int nextmax = curmin - 1; // End of valid line
+ int pos = (size - 2) >>> 1; // Parent of last element
+ // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin+", "+nextmin);
+ while (pos >= nextmin) {
+ // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin);
+ while (pos >= curmin) {
+ if (!heapifyDown(pos, queue[pos])) {
+ final int parent = (pos - 1) >>> 1;
+ if (parent < curmin) {
+ nextmin = Math.min(nextmin, parent);
+ nextmax = Math.max(nextmax, parent);
+ }
+ }
+ pos--;
+ }
+ curmin = nextmin;
+ pos = Math.min(pos, nextmax);
+ nextmax = -1;
+ }
+ }
+ validSize = size;
+ }
+ }
+
+ /**
+ * Remove the element at the given position.
+ *
+ * @param pos Element position.
+ * @return Removed element
+ */
+ @SuppressWarnings("unchecked")
+ protected Object removeAt(int pos) {
+ if (pos < 0 || pos >= size) {
+ return null;
+ }
+ final Object top = (K) queue[0];
+ // Replacement object:
+ final Object reinkey = queue[size - 1];
+ // Keep heap in sync
+ if (validSize == size) {
+ size -= 1;
+ validSize -= 1;
+ heapifyDown(pos, reinkey);
+ } else {
+ size -= 1;
+ validSize = Math.min(pos >>> 1, validSize);
+ queue[pos] = reinkey;
+ }
+ heapModified();
+ return top;
+ }
+
+ /**
+ * Execute a "Heapify Upwards" aka "SiftUp". Used in insertions.
+ *
+ * @param pos insertion position
+ * @param curkey Current key
+ */
+ protected void heapifyUp(int pos, Object curkey) {
+ while (pos > 0) {
+ final int parent = (pos - 1) >>> 1;
+ Object parkey = queue[parent];
+
+ if (comp(curkey, parkey)) { // Compare
+ break;
+ }
+ queue[pos] = parkey;
+ pos = parent;
+ }
+ queue[pos] = curkey;
+ }
+
+ /**
+ * Execute a "Heapify Downwards" aka "SiftDown". Used in deletions.
+ *
+ * @param ipos re-insertion position
+ * @param curkey Current key
+ * @return true when the order was changed
+ */
+ protected boolean heapifyDown(final int ipos, Object curkey) {
+ int pos = ipos;
+ final int half = size >>> 1;
+ while (pos < half) {
+ // Get left child (must exist!)
+ int cpos = (pos << 1) + 1;
+ Object chikey = queue[cpos];
+ // Test right child, if present
+ final int rchild = cpos + 1;
+ if (rchild < size) {
+ Object right = queue[rchild];
+ if (comp(chikey, right)) { // Compare
+ cpos = rchild;
+ chikey = right;
+ }
+ }
+
+ if (comp(chikey, curkey)) { // Compare
+ break;
+ }
+ queue[pos] = chikey;
+ pos = cpos;
+ }
+ queue[pos] = curkey;
+ return (pos == ipos);
+ }
+
+ /**
+ * Test whether we need to resize to have the requested capacity.
+ *
+ * @param requiredSize required capacity
+ */
+ protected final void resize(int requiredSize) {
+ queue = Arrays.copyOf(queue, desiredSize(requiredSize, queue.length));
+ }
+
+ /**
+ * Delete all elements from the heap.
+ */
+ @Override
+ public void clear() {
+ super.clear();
+ for (int i = 0; i < size; i++) {
+ queue[i] = null;
+ }
+ }
+
+ /**
+ * Compare two objects
+ */
+ abstract protected boolean comp(Object o1, Object o2);
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TiedTopBoundedHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TiedTopBoundedHeap.java
index c0ce3acf..2daaafa4 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TiedTopBoundedHeap.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TiedTopBoundedHeap.java
@@ -41,11 +41,6 @@ import de.lmu.ifi.dbs.elki.utilities.iterator.MergedIterator;
*/
public class TiedTopBoundedHeap<E> extends TopBoundedHeap<E> {
/**
- * Serial version
- */
- private static final long serialVersionUID = 1L;
-
- /**
* List to keep ties in.
*/
private List<E> ties = new ArrayList<E>();
@@ -80,11 +75,6 @@ public class TiedTopBoundedHeap<E> extends TopBoundedHeap<E> {
ties.clear();
}
- @Override
- public boolean contains(Object o) {
- return ties.contains(o) || super.contains(o);
- }
-
@SuppressWarnings("unchecked")
@Override
public Iterator<E> iterator() {
@@ -93,45 +83,52 @@ public class TiedTopBoundedHeap<E> extends TopBoundedHeap<E> {
@Override
public E peek() {
- if(ties.isEmpty()) {
+ if (ties.isEmpty()) {
return super.peek();
- }
- else {
+ } else {
return ties.get(ties.size() - 1);
}
}
@Override
public E poll() {
- if(ties.isEmpty()) {
+ if (ties.isEmpty()) {
return super.poll();
- }
- else {
+ } else {
return ties.remove(ties.size() - 1);
}
}
@Override
+ public E replaceTopElement(E e) {
+ if (ties.isEmpty()) {
+ return super.replaceTopElement(e);
+ }
+ // Fall back to classic emulation via poll and offer:
+ E prev = poll();
+ add(e);
+ return prev;
+ }
+
+ @Override
protected void handleOverflow(E e) {
boolean tied = false;
- if(comparator == null) {
+ if (comparator == null) {
@SuppressWarnings("unchecked")
Comparable<Object> c = (Comparable<Object>) e;
- if(c.compareTo(queue[0]) == 0) {
+ if (c.compareTo(queue[0]) == 0) {
tied = true;
}
- }
- else {
- if(comparator.compare(e, queue[0]) == 0) {
+ } else {
+ if (comparator.compare(e, queue[0]) == 0) {
tied = true;
}
}
- if(tied) {
+ if (tied) {
ties.add(e);
- }
- else {
+ } else {
// Also remove old ties.
ties.clear();
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TiedTopBoundedUpdatableHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TiedTopBoundedUpdatableHeap.java
index 4fbc852e..8e39af1d 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TiedTopBoundedUpdatableHeap.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TiedTopBoundedUpdatableHeap.java
@@ -42,11 +42,6 @@ import de.lmu.ifi.dbs.elki.utilities.iterator.MergedIterator;
*/
public class TiedTopBoundedUpdatableHeap<E> extends TopBoundedUpdatableHeap<E> {
/**
- * Serial version
- */
- private static final long serialVersionUID = 1L;
-
- /**
* List to keep ties in.
*/
private List<E> ties = new ArrayList<E>();
@@ -81,11 +76,6 @@ public class TiedTopBoundedUpdatableHeap<E> extends TopBoundedUpdatableHeap<E> {
ties.clear();
}
- @Override
- public boolean contains(Object o) {
- return ties.contains(o) || super.contains(o);
- }
-
@SuppressWarnings("unchecked")
@Override
public Iterator<E> iterator() {
@@ -93,7 +83,7 @@ public class TiedTopBoundedUpdatableHeap<E> extends TopBoundedUpdatableHeap<E> {
}
@Override
- public boolean offerAt(int pos, E e) {
+ public void offerAt(int pos, E e) {
if(pos == IN_TIES) {
for(Iterator<E> i = ties.iterator(); i.hasNext();) {
E e2 = i.next();
@@ -102,8 +92,7 @@ public class TiedTopBoundedUpdatableHeap<E> extends TopBoundedUpdatableHeap<E> {
i.remove();
index.remove(e2);
}
- // while we did not change, this still was "successful".
- return true;
+ return;
}
}
throw new AbortException("Heap corrupt - should not be reached");
@@ -117,9 +106,9 @@ public class TiedTopBoundedUpdatableHeap<E> extends TopBoundedUpdatableHeap<E> {
final E e2 = ties.remove(ties.size() - 1);
// index.remove(e2);
super.offerAt(NO_VALUE, e2);
- return true;
+ return;
}
- return super.offerAt(pos, e);
+ super.offerAt(pos, e);
}
@Override
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TopBoundedHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TopBoundedHeap.java
index 5b61e6f3..07b595f6 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TopBoundedHeap.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TopBoundedHeap.java
@@ -36,12 +36,7 @@ import java.util.Comparator;
*/
public class TopBoundedHeap<E> extends Heap<E> {
/**
- * Serial version
- */
- private static final long serialVersionUID = 1L;
-
- /**
- * Maximum size
+ * Maximum size.
*/
protected int maxsize;
@@ -67,31 +62,32 @@ public class TopBoundedHeap<E> extends Heap<E> {
}
@Override
- public boolean offer(E e) {
- // don't add if we hit maxsize and are worse
- if(super.size() >= maxsize) {
- ensureValid();
- if(comparator == null) {
- @SuppressWarnings("unchecked")
- Comparable<Object> c = (Comparable<Object>) e;
- if(c.compareTo(queue[0]) < 0) {
- // while we did not change, this still was "successful".
- return true;
- }
- }
- else {
- if(comparator.compare(e, queue[0]) < 0) {
- // while we did not change, this still was "successful".
- return true;
- }
- }
+ public void add(E e) {
+ if (super.size() < maxsize) {
+ // Just offer.
+ super.add(e);
+ return;
}
- boolean result = super.offer(e);
- // purge unneeded entry(s)
- while(super.size() > maxsize) {
- handleOverflow(super.poll());
+ // Peek at the top element, return if we are worse.
+ ensureValid();
+ final int comp;
+ if (comparator == null) {
+ @SuppressWarnings("unchecked")
+ Comparable<Object> c = (Comparable<Object>) e;
+ comp = c.compareTo(queue[0]);
+ } else {
+ comp = comparator.compare(e, queue[0]);
+ }
+ if (comp < 0) {
+ return;
+ }
+ if (comp == 0) {
+ handleOverflow(e);
+ } else {
+ // Otherwise, replace and repair:
+ E prev = super.replaceTopElement(e);
+ handleOverflow(prev);
}
- return result;
}
/**
@@ -105,9 +101,11 @@ public class TopBoundedHeap<E> extends Heap<E> {
}
/**
+ * Get the maximum size.
+ *
* @return the maximum size
*/
public int getMaxSize() {
return maxsize;
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TopBoundedUpdatableHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TopBoundedUpdatableHeap.java
index 2b22eba2..75f2abcf 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TopBoundedUpdatableHeap.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TopBoundedUpdatableHeap.java
@@ -36,12 +36,7 @@ import java.util.Comparator;
*/
public class TopBoundedUpdatableHeap<E> extends UpdatableHeap<E> {
/**
- * Serial version
- */
- private static final long serialVersionUID = 1L;
-
- /**
- * Maximum size
+ * Maximum size.
*/
protected int maxsize;
@@ -67,22 +62,19 @@ public class TopBoundedUpdatableHeap<E> extends UpdatableHeap<E> {
}
@Override
- public boolean offerAt(int pos, E e) {
+ public void offerAt(int pos, E e) {
// don't add if we hit maxsize and are worse
- if(pos == NO_VALUE && super.size() >= maxsize) {
- ensureValid();
- if(compare(e, queue[0]) < 0) {
- // while we did not change, this still was "successful".
- return true;
- }
- // pos = index.get(e); // Should not be needed.
+ if (pos != NO_VALUE || super.size() < maxsize) {
+ super.offerAt(pos, e);
+ return;
}
- boolean result = super.offerAt(pos, e);
- // purge unneeded entry(s)
- while(super.size() > maxsize) {
- handleOverflow(super.poll());
+ ensureValid();
+ if (compare(e, queue[0]) < 0) {
+ // while we did not change, this still was "successful".
+ return;
}
- return result;
+ E prev = super.replaceTopElement(e);
+ handleOverflow(prev);
}
/**
@@ -93,12 +85,11 @@ public class TopBoundedUpdatableHeap<E> extends UpdatableHeap<E> {
* @return True when an update is needed
*/
protected int compare(Object e, Object object) {
- if(comparator == null) {
+ if (comparator == null) {
@SuppressWarnings("unchecked")
Comparable<Object> c = (Comparable<Object>) e;
return c.compareTo(queue[0]);
- }
- else {
+ } else {
return comparator.compare(e, queue[0]);
}
}
@@ -114,9 +105,11 @@ public class TopBoundedUpdatableHeap<E> extends UpdatableHeap<E> {
}
/**
+ * Get the maximum size.
+ *
* @return the maximum size
*/
public int getMaxSize() {
return maxsize;
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/UpdatableHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/UpdatableHeap.java
index 05858981..1ab5f4df 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/UpdatableHeap.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/UpdatableHeap.java
@@ -37,7 +37,7 @@ import java.util.Comparator;
*/
public class UpdatableHeap<O> extends Heap<O> {
/**
- * Constant for "not in heap"
+ * Constant for "not in heap".
*/
protected static final int NO_VALUE = Integer.MIN_VALUE;
@@ -47,11 +47,6 @@ public class UpdatableHeap<O> extends Heap<O> {
protected static final int IN_TIES = -1;
/**
- * Serial version
- */
- private static final long serialVersionUID = 1L;
-
- /**
* Holds the indices in the heap of each element.
*/
protected final TObjectIntMap<Object> index = new TObjectIntHashMap<Object>(100, 0.5f, NO_VALUE);
@@ -73,7 +68,7 @@ public class UpdatableHeap<O> extends Heap<O> {
}
/**
- * Constructor with comparator
+ * Constructor with comparator.
*
* @param comparator Comparator
*/
@@ -82,7 +77,7 @@ public class UpdatableHeap<O> extends Heap<O> {
}
/**
- * Constructor with predefined size and comparator
+ * Constructor with predefined size and comparator.
*
* @param size Size
* @param comparator Comparator
@@ -98,12 +93,18 @@ public class UpdatableHeap<O> extends Heap<O> {
}
@Override
- public boolean offer(O e) {
+ public void add(O e) {
final int pos = index.get(e);
- return offerAt(pos, e);
+ offerAt(pos, e);
}
- protected boolean offerAt(final int pos, O e) {
+ /**
+ * Offer element at the given position.
+ *
+ * @param pos Position
+ * @param e Element
+ */
+ protected void offerAt(final int pos, O e) {
if(pos == NO_VALUE) {
// resize when needed
if(size + 1 > queue.length) {
@@ -114,9 +115,8 @@ public class UpdatableHeap<O> extends Heap<O> {
index.put(e, size);
size += 1;
// We do NOT YET update the heap. This is done lazily.
- // We have changed - return true according to {@link Collection#put}
- modCount++;
- return true;
+ heapModified();
+ return;
}
else {
assert (pos >= 0) : "Unexpected negative position.";
@@ -126,14 +126,12 @@ public class UpdatableHeap<O> extends Heap<O> {
@SuppressWarnings("unchecked")
Comparable<Object> c = (Comparable<Object>) e;
if(c.compareTo(queue[pos]) >= 0) {
- // Ignore, but return true according to {@link Collection#put}
- return true;
+ return;
}
}
else {
if(comparator.compare(e, queue[pos]) >= 0) {
- // Ignore, but return true according to {@link Collection#put}
- return true;
+ return;
}
}
if(pos >= validSize) {
@@ -144,9 +142,8 @@ public class UpdatableHeap<O> extends Heap<O> {
// ensureValid();
heapifyUp(pos, e);
}
- modCount++;
- // We have changed - return true according to {@link Collection#put}
- return true;
+ heapModified();
+ return;
}
}
@@ -155,7 +152,8 @@ public class UpdatableHeap<O> extends Heap<O> {
if(pos < 0 || pos >= size) {
return null;
}
- final O ret = castQueueElement(pos);
+ @SuppressWarnings("unchecked")
+ final O ret = (O) queue[pos];
// Replacement object:
final Object reinsert = queue[size - 1];
queue[size - 1] = null;
@@ -188,7 +186,7 @@ public class UpdatableHeap<O> extends Heap<O> {
queue[pos] = reinsert;
index.put(reinsert, pos);
}
- modCount++;
+ heapModified();
// Keep index up to date
index.remove(ret);
return ret;
@@ -216,6 +214,13 @@ public class UpdatableHeap<O> extends Heap<O> {
index.remove(node);
return node;
}
+
+ @Override
+ public O replaceTopElement(O e) {
+ O node = super.replaceTopElement(e);
+ index.remove(node);
+ return node;
+ }
/**
* Execute a "Heapify Upwards" aka "SiftUp". Used in insertions.
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/HierarchyHashmapList.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/HierarchyHashmapList.java
index 76bee0f9..bd6d67bf 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/HierarchyHashmapList.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/HierarchyHashmapList.java
@@ -71,7 +71,7 @@ public class HierarchyHashmapList<O> implements ModifiableHierarchy<O> {
if(!pchi.contains(child)) {
pchi.add(child);
} else {
- LoggingUtil.warning("Result added twice: "+parent+" -> "+child);
+ LoggingUtil.warning("Result added twice: "+parent+" -> "+child, new Throwable());
}
}
// Add child to parent
@@ -84,7 +84,7 @@ public class HierarchyHashmapList<O> implements ModifiableHierarchy<O> {
if(!cpar.contains(parent)) {
cpar.add(parent);
} else {
- LoggingUtil.warning("Result added twice: "+parent+" <- "+child);
+ LoggingUtil.warning("Result added twice: "+parent+" <- "+child, new Throwable());
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractObjDynamicHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractObjDynamicHistogram.java
new file mode 100644
index 00000000..9d0dba0d
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractObjDynamicHistogram.java
@@ -0,0 +1,271 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+import de.lmu.ifi.dbs.elki.math.scales.LinearScale;
+import de.lmu.ifi.dbs.elki.utilities.BitsUtil;
+
+/**
+ * A dynamic histogram can dynamically adapt the number of bins to the data fed
+ * into the histogram.
+ *
+ * @author Erich Schubert
+ *
+ * @param <T> Data type
+ */
+public abstract class AbstractObjDynamicHistogram<T> extends AbstractObjStaticHistogram<T> {
+ /**
+ * Cache for positions to be inserted.
+ */
+ private double[] cacheposs;
+
+ /**
+ * Cache for data to be inserted.
+ */
+ private Object[] cachevals;
+
+ /**
+ * Cache fill size
+ */
+ private int cachefill;
+
+ /**
+ * Destination (minimum) size of the structure. At most 2*destsize bins are
+ * allowed.
+ */
+ private int destsize;
+
+ /**
+ * Constructor.
+ *
+ * @param bins Design number of bins - may become twice as large!
+ */
+ public AbstractObjDynamicHistogram(int bins) {
+ super(-1, 0.0, 1.0);
+ this.destsize = bins;
+ cacheposs = new double[this.destsize << 1];
+ cachevals = new Object[this.destsize << 1];
+ cachefill = 0;
+ }
+
+ /**
+ * Materialize the histogram from the cache.
+ */
+ @SuppressWarnings("unchecked")
+ void materialize() {
+ // already materialized?
+ if (cachefill <= 0) {
+ return;
+ }
+ // Compute minimum and maximum
+ double min = Double.MAX_VALUE, max = Double.MIN_VALUE;
+ for (int i = 0; i < cachefill; i++) {
+ min = Math.min(min, cacheposs[i]);
+ max = Math.max(max, cacheposs[i]);
+ }
+ // use the LinearScale magic to round to "likely suiteable" step sizes.
+ // TODO: extract into a reusable function?
+ LinearScale scale = new LinearScale(min, max);
+ min = scale.getMin();
+ max = scale.getMax();
+ this.base = min;
+ this.max = max;
+ this.binsize = (max - min) / this.destsize;
+ // initialize array
+ this.data = new Object[this.destsize << 1];
+ for (int i = 0; i < this.destsize; i++) {
+ this.data[i] = makeObject();
+ }
+ size = destsize;
+ // re-insert data we have
+ final int end = cachefill;
+ cachefill = -1; // So reinsert works!
+ for (int i = 0; i < end; i++) {
+ putData(cacheposs[i], (T) cachevals[i]);
+ }
+ // delete cache, signal that we're initialized
+ cacheposs = null;
+ cachevals = null;
+ }
+
+ @Override
+ public T get(double coord) {
+ materialize();
+ testResample(coord);
+ T ret = super.get(coord);
+ return ret;
+ }
+
+ /**
+ * Put fresh data into the histogram (or into the cache)
+ *
+ * @param coord Coordinate
+ * @param value Value
+ */
+ @Override
+ public void putData(double coord, T value) {
+ // Store in cache
+ if (cachefill >= 0) {
+ if (cachefill < cacheposs.length) {
+
+ cacheposs[cachefill] = coord;
+ cachevals[cachefill] = cloneForCache(value);
+ cachefill++;
+ return;
+ } else {
+ materialize();
+ // But continue below!
+ }
+ }
+ // Check if we need to resample to accomodate this bin.
+ testResample(coord);
+ // super class will handle histogram resizing / shifting
+ T exist = get(coord);
+ data[getBinNr(coord)] = aggregate(exist, value);
+ }
+
+ /**
+ * Test (and perform) downsampling when neede.
+ *
+ * @param coord coordinate to accomodate.
+ */
+ private void testResample(double coord) {
+ final int bin = getBinNr(coord);
+ final int sizereq, off;
+ if (bin < 0) {
+ sizereq = size - bin;
+ off = -bin;
+ } else if (bin >= data.length) {
+ sizereq = bin + 1;
+ off = 0;
+ } else {
+ // Within the designated size - nothing to do.
+ return;
+ }
+ if (sizereq < data.length) {
+ // Accomodate by shifting. Let super do the job in {@link #get}
+ return;
+ }
+ // Resampling, eventually by multiple levels.
+ final int levels = BitsUtil.magnitude(sizereq / this.destsize) - 1;
+ assert (levels > 0) : "No resampling required?!?";
+ final int step = 1 << levels;
+
+ final int fixpoint = off / (step - 1);
+ {
+ // Start positions for in-place bottom-up downsampling.
+ int oup = fixpoint;
+ int inp = (oup << levels) - off;
+ assert (-step < inp && inp <= oup && oup < inp + step) : (inp + " -> " + oup + " s=" + step + " o=" + off + " l=" + levels);
+ for (; inp < size; inp += step, oup++) {
+ assert (oup < inp + step && oup < data.length);
+ data[oup] = downsample(data, Math.max(0, inp), Math.min(size, inp + step), step);
+ }
+ // Clean upwards
+ for (; oup < data.length; oup++) {
+ data[oup] = null;
+ }
+ }
+ if (off >= step) {
+ // Start positions for in-place downsampling top-down:
+ int oup = fixpoint - 1;
+ int inp = (oup << levels) - off;
+ assert (oup > inp) : (inp + " -> " + oup + " s=" + step + " o=" + off + " l=" + levels);
+ for (; inp > -step; inp -= step, oup--) {
+ assert (oup >= inp && oup >= 0);
+ data[oup] = downsample(data, Math.max(0, inp), Math.min(size, inp + step), step);
+ }
+ for (; oup >= 0; oup--) {
+ data[oup] = makeObject();
+ }
+ }
+ // recalculate histogram base.
+ base = base - (offset + off) * binsize;
+ offset = 0;
+ size = (size + 1) >> levels;
+ binsize = binsize * (1 << levels);
+ max = base + binsize * size;
+ }
+
+ @Override
+ public Iter iter() {
+ materialize();
+ return super.iter();
+ }
+
+ @Override
+ public int getNumBins() {
+ materialize();
+ return super.getNumBins();
+ }
+
+ @Override
+ public double getBinsize() {
+ materialize();
+ return super.getBinsize();
+ }
+
+ @Override
+ public double getCoverMinimum() {
+ materialize();
+ return super.getCoverMinimum();
+ }
+
+ @Override
+ public double getCoverMaximum() {
+ materialize();
+ return super.getCoverMaximum();
+ }
+
+ /**
+ * Perform downsampling on a number of bins.
+ *
+ * @param data Data array (needs cast!)
+ * @param start Interval start
+ * @param end Interval end (exclusive)
+ * @param size Intended size - extra bins are assumed to be empty, should be a
+ * power of two
+ * @return New bin value
+ */
+ protected abstract T downsample(Object[] data, int start, int end, int size);
+
+ /**
+ * Rule to combine two bins or entries into one.
+ *
+ * Note: first and second MAY be modified and returned, they will not be used
+ * afterwards.
+ *
+ * @param first First bin value
+ * @param second Second bin value
+ * @return combined bin value
+ */
+ protected abstract T aggregate(T first, T second);
+
+ /**
+ * Clone a data passed to the algorithm for computing the initial size.
+ *
+ * @param data Data to be cloned
+ * @return cloned data
+ */
+ protected abstract T cloneForCache(T data);
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractObjStaticHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractObjStaticHistogram.java
new file mode 100644
index 00000000..c1882302
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractObjStaticHistogram.java
@@ -0,0 +1,129 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/**
+ * Histogram class storing double values.
+ *
+ * The histogram will start with "bin" bins, but it can grow dynamicall to the
+ * left and right.
+ *
+ * @author Erich Schubert
+ *
+ * @param <T> Data type
+ */
+public abstract class AbstractObjStaticHistogram<T> extends AbstractStaticHistogram implements ObjHistogram<T> {
+ /**
+ * Constructor.
+ *
+ * @param bins Number of bins
+ * @param min Cover minimum
+ * @param max Cover maximum
+ */
+ public AbstractObjStaticHistogram(int bins, double min, double max) {
+ super(bins, min, max);
+ if (bins >= 0) {
+ // -1 will be used by FlexiHistogram to delay initialization.
+ data = new Object[bins];
+ }
+ }
+
+ /**
+ * Data store
+ */
+ Object[] data;
+
+ /**
+ * Access the value of a bin with new data.
+ *
+ * @param coord Coordinate
+ * @return bin contents
+ */
+ @SuppressWarnings("unchecked")
+ public T get(double coord) {
+ int bin = getBinNr(coord);
+ if (bin < 0) {
+ if (size - bin > data.length) {
+ // Reallocate. TODO: use an arraylist-like grow strategy!
+ Object[] tmpdata = new Object[growSize(data.length, size - bin)];
+ System.arraycopy(data, 0, tmpdata, -bin, size);
+ data = tmpdata;
+ } else {
+ // Shift in place
+ System.arraycopy(data, 0, data, -bin, size);
+ }
+ for (int i = 0; i < -bin; i++) {
+ data[i] = makeObject();
+ }
+ // Note that bin is negative, -bin is the shift offset!
+ offset -= bin;
+ size -= bin;
+ // TODO: modCounter++; and have iterators fast-fail
+ // Unset max value when resizing
+ max = Double.MAX_VALUE;
+ return (T) data[0];
+ } else if (bin >= size) {
+ if (bin >= data.length) {
+ Object[] tmpdata = new Object[growSize(data.length, bin + 1)];
+ System.arraycopy(data, 0, tmpdata, 0, size);
+ data = tmpdata;
+ }
+ for (int i = size; i <= bin; i++) {
+ data[i] = makeObject();
+ }
+ size = bin + 1;
+ // TODO: modCounter++; and have iterators fast-fail
+ // Unset max value when resizing
+ max = Double.MAX_VALUE;
+ return (T) data[bin];
+ } else {
+ return (T) data[bin];
+ }
+ }
+
+ /**
+ * Class to make a new object for the data store.
+ *
+ * @return New instance.
+ */
+ protected abstract T makeObject();
+
+ @Override
+ public Iter iter() {
+ return new Iter();
+ }
+
+ /**
+ * Iterator class.
+ *
+ * @author Erich Schubert
+ */
+ public class Iter extends AbstractStaticHistogram.Iter implements ObjHistogram.Iter<T> {
+ @SuppressWarnings("unchecked")
+ @Override
+ public T getValue() {
+ return (T) data[bin];
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractStaticHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractStaticHistogram.java
new file mode 100644
index 00000000..799ac009
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractStaticHistogram.java
@@ -0,0 +1,220 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+/**
+ * Abstract base class for histograms.
+ *
+ * Note that this is abstracted from the actual data storage, so it can be
+ * adapted for multiple use cases.
+ *
+ * @author Erich Schubert
+ */
+public abstract class AbstractStaticHistogram implements Histogram {
+ /**
+ * Array shift to account for negative indices.
+ */
+ protected int offset = 0;
+
+ /**
+ * Size of array storage.
+ */
+ protected int size;
+
+ /**
+ * Array 'base', i.e. the point of 0.0. Usually the minimum.
+ */
+ protected double base;
+
+ /**
+ * To avoid introducing an extra bucket for the maximum value.
+ */
+ protected double max;
+
+ /**
+ * Width of a bin.
+ */
+ protected double binsize;
+
+ /**
+ * Histogram constructor
+ *
+ * @param bins Number of bins to use.
+ * @param min Minimum Value
+ * @param max Maximum Value
+ */
+ public AbstractStaticHistogram(int bins, double min, double max) {
+ this.base = min;
+ this.max = max;
+ this.binsize = (max - min) / bins;
+ this.size = bins;
+ }
+
+ /**
+ * Compute the bin number. Has a special case for rounding max down to the
+ * last bin.
+ *
+ * @param coord Coordinate
+ * @return bin number
+ */
+ protected int getBinNr(double coord) {
+ if (Double.isInfinite(coord) || Double.isNaN(coord)) {
+ throw new UnsupportedOperationException("Encountered non-finite value in Histogram: " + coord);
+ }
+ if (coord == max) {
+ // System.err.println("Triggered special case: "+ (Math.floor((coord -
+ // base) / binsize) + offset) + " vs. " + (size - 1));
+ return size - 1;
+ }
+ return (int) Math.floor((coord - base) / binsize) + offset;
+ }
+
+ /**
+ * Compute the size to grow to.
+ *
+ * @param current Current size
+ * @param requiredSize Required size
+ * @return Size to allocate
+ */
+ protected static int growSize(int current, int requiredSize) {
+ // Double until 64, then increase by 50% each time.
+ int newCapacity = ((current < 64) ? ((current + 1) << 1) : ((current >> 1) * 3));
+ // overflow?
+ if (newCapacity < 0) {
+ throw new OutOfMemoryError();
+ }
+ if (requiredSize > newCapacity) {
+ newCapacity = requiredSize;
+ }
+ return requiredSize;
+ }
+
+ /**
+ * Get the number of bins actually in use.
+ *
+ * @return number of bins
+ */
+ @Override
+ public int getNumBins() {
+ return size;
+ }
+
+ /**
+ * Get the size (width) of a bin.
+ *
+ * @return bin size
+ */
+ @Override
+ public double getBinsize() {
+ return binsize;
+ }
+
+ /**
+ * Get minimum (covered by bins, not data!)
+ *
+ * @return minimum
+ */
+ @Override
+ public double getCoverMinimum() {
+ return base - offset * binsize;
+ }
+
+ /**
+ * Get maximum (covered by bins, not data!)
+ *
+ * @return maximum
+ */
+ @Override
+ public double getCoverMaximum() {
+ return base + (size - offset) * binsize;
+ }
+
+ /**
+ * Get an iterator over all histogram bins.
+ *
+ * @return Iterator
+ */
+ @Override
+ public abstract Iter iter();
+
+ /**
+ * Iterator class to iterate over all bins.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public abstract class Iter implements Histogram.Iter {
+ /**
+ * Current bin number
+ */
+ int bin = 0;
+
+ @Override
+ public double getCenter() {
+ return base + (bin + 0.5 - offset) * binsize;
+ }
+
+ @Override
+ public double getLeft() {
+ return base + (bin - offset) * binsize;
+ }
+
+ @Override
+ public double getRight() {
+ return base + (bin + 1 - offset) * binsize;
+ }
+
+ @Override
+ public boolean valid() {
+ return bin >= 0 && bin < size;
+ }
+
+ @Override
+ public void advance() {
+ bin++;
+ }
+
+ @Override
+ public int getOffset() {
+ return bin;
+ }
+
+ @Override
+ public void advance(int count) {
+ bin += count;
+ }
+
+ @Override
+ public void retract() {
+ bin--;
+ }
+
+ @Override
+ public void seek(int off) {
+ bin = off;
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleArrayStaticHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleArrayStaticHistogram.java
new file mode 100644
index 00000000..aeba3c4b
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleArrayStaticHistogram.java
@@ -0,0 +1,85 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram;
+
+/*
+This file is part of ELKI:
+Environment for Developing KDD-Applications Supported by Index-Structures
+
+Copyright (C) 2012
+Ludwig-Maximilians-Universität München
+Lehr- und Forschungseinheit für Datenbanksysteme
+ELKI Development Team
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * Histogram containing an array of doubles (i.e. {@code double[]}). This is
+ * actually one of the simpler specializations, as arrays are objects not
+ * primitive in Java.
+ *
+ * @author Erich Schubert
+ */
+public final class DoubleArrayStaticHistogram extends AbstractObjStaticHistogram<double[]> {
+ /**
+ * Desired number of columns in each bin.
+ */
+ private final int cols;
+
+ /**
+ * Constructor.
+ *
+ * @param bins Number of bins
+ * @param min Minimum value for the coordinates
+ * @param max Maximum value for the coordinates
+ * @param cols Number of columns in each bin.
+ */
+ public DoubleArrayStaticHistogram(int bins, double min, double max, int cols) {
+ super(bins, min, max);
+ this.cols = cols;
+ for (int i = 0; i < bins; i++) {
+ data[i] = new double[cols];
+ }
+ }
+
+ /**
+ * Increment histogram by a double[].
+ *
+ * @param coord Coordinate
+ * @param data Data to increment by.
+ */
+ public void increment(double coord, double[] data) {
+ double[] existing = get(coord);
+ for (int i = 0; i < existing.length; i++) {
+ existing[i] += data[i];
+ }
+ }
+
+ /**
+ * {@inheritDoc}
+ *
+ * Data is combined by incrementing.
+ *
+ * @deprecated use the explicit {@link #increment} instead.
+ */
+ @Deprecated
+ @Override
+ public void putData(double coord, double[] data) {
+ increment(coord, data);
+ }
+
+ @Override
+ protected double[] makeObject() {
+ return new double[cols];
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleDynamicHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleDynamicHistogram.java
new file mode 100644
index 00000000..77a1f9e4
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleDynamicHistogram.java
@@ -0,0 +1,248 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+import de.lmu.ifi.dbs.elki.math.scales.LinearScale;
+import de.lmu.ifi.dbs.elki.utilities.BitsUtil;
+
+/**
+ * A flexible histogram storing double, that can dynamically adapt the number of
+ * bins to the data fed into the histogram.
+ *
+ * @author Erich Schubert
+ */
+public class DoubleDynamicHistogram extends DoubleStaticHistogram {
+ /**
+ * Cache for data to be inserted.
+ */
+ private double[] cachec;
+
+ /**
+ * Cache for data to be inserted.
+ */
+ private double[] cachev;
+
+ /**
+ * Cache fill size
+ */
+ private int cachefill;
+
+ /**
+ * Destination (minimum) size of the structure.
+ * At most destsize * 2 bins are allowed.
+ */
+ private int destsize;
+
+ /**
+ * Constructor.
+ *
+ * @param bins Design number of bins - may become twice as large!
+ */
+ public DoubleDynamicHistogram(int bins) {
+ super(-1, 0.0, 1.0);
+ this.destsize = bins;
+ cachec = new double[this.destsize << CACHE_SHIFT];
+ cachev = new double[this.destsize << CACHE_SHIFT];
+ cachefill = 0;
+ }
+
+ /**
+ * Materialize the histogram from the cache.
+ */
+ void materialize() {
+ // already materialized?
+ if (cachefill < 0) {
+ return;
+ }
+ // Compute minimum and maximum
+ double min = Double.MAX_VALUE, max = Double.MIN_VALUE;
+ for (int i = 0; i < cachefill; i++) {
+ min = Math.min(min, cachec[i]);
+ max = Math.max(max, cachec[i]);
+ }
+ // use the LinearScale magic to round to "likely suiteable" step sizes.
+ // TODO: extract into a reusable function?
+ LinearScale scale = new LinearScale(min, max);
+ min = scale.getMin();
+ max = scale.getMax();
+ this.base = min;
+ this.max = max;
+ this.binsize = (max - min) / this.destsize;
+ // initialize array
+ this.data = new double[this.destsize << 1];
+ size = destsize;
+ // re-insert data we have
+ final int end = cachefill;
+ cachefill = -1; // So reinsert works!
+ for (int i = 0; i < end; i++) {
+ increment(cachec[i], cachev[i]);
+ }
+ // delete cache, signal that we're initialized
+ cachec = null;
+ cachev = null;
+ }
+
+ @Override
+ public double get(double coord) {
+ materialize();
+ testResample(coord);
+ return super.get(coord);
+ }
+
+ /**
+ * Put fresh data into the histogram (or into the cache)
+ *
+ * @param coord Coordinate
+ * @param value Value
+ */
+ @Override
+ public void increment(double coord, double value) {
+ // Store in cache
+ if (cachefill >= 0) {
+ if (cachefill < cachec.length) {
+ cachec[cachefill] = coord;
+ cachev[cachefill] = value;
+ cachefill ++;
+ return;
+ } else {
+ materialize();
+ // But continue below!
+ }
+ }
+ // Check if we need to resample to accomodate this bin.
+ testResample(coord);
+ // super class will handle histogram resizing / shifting
+ super.increment(coord, value);
+ }
+
+ /**
+ * Test (and perform) downsampling when neede.
+ *
+ * @param coord coordinate to accomodate.
+ */
+ private void testResample(double coord) {
+ final int bin = getBinNr(coord);
+ final int sizereq, off;
+ if (bin < 0) {
+ sizereq = size - bin;
+ off = -bin;
+ } else if (bin >= data.length) {
+ sizereq = bin + 1;
+ off = 0;
+ } else {
+ // Within the designated size - nothing to do.
+ return;
+ }
+ if (sizereq < data.length) {
+ // Accomodate by shifting. Let super do the job in {@link #get}
+ return;
+ }
+ // Resampling, eventually by multiple levels.
+ final int levels = BitsUtil.magnitude(sizereq / this.destsize) - 1;
+ assert (levels > 0) : "No resampling required?!? sizereq=" + sizereq + " destsize=" + destsize + " array=" + data.length;
+ final int step = 1 << levels;
+
+ final int fixpoint = off / (step - 1);
+ {
+ // Start positions for in-place bottom-up downsampling.
+ int oup = fixpoint;
+ int inp = (oup << levels) - off;
+ assert (-step < inp && inp <= oup && oup < inp + step) : (inp + " -> " + oup + " s=" + step + " o=" + off + " l=" + levels);
+ for (; inp < size; inp += step, oup++) {
+ assert (oup < inp + step && oup < data.length);
+ data[oup] = downsample(data, Math.max(0, inp), Math.min(size, inp + step), step);
+ }
+ // Clean upwards
+ for (; oup < data.length; oup++) {
+ data[oup] = 0;
+ }
+ }
+ if (off > 0) {
+ // Start positions for in-place downsampling top-down:
+ int oup = fixpoint - 1;
+ int inp = (oup << levels) - off;
+ assert (oup > inp) : (inp + " -> " + oup + " s=" + step + " o=" + off + " l=" + levels);
+ for (; inp > -step; inp -= step, oup--) {
+ assert (oup >= inp && oup >= 0);
+ data[oup] = downsample(data, Math.max(0, inp), Math.min(size, inp + step), step);
+ }
+ for (; oup >= 0; oup--) {
+ data[oup] = 0;
+ }
+ }
+ // recalculate histogram base.
+ base = base - (offset + off) * binsize;
+ offset = 0;
+ size = (size + 1) >> levels;
+ binsize = binsize * (1 << levels);
+ max = base + binsize * size;
+ }
+
+ @Override
+ public Iter iter() {
+ materialize();
+ return super.iter();
+ }
+
+ @Override
+ public int getNumBins() {
+ materialize();
+ return super.getNumBins();
+ }
+
+ @Override
+ public double getBinsize() {
+ materialize();
+ return super.getBinsize();
+ }
+
+ @Override
+ public double getCoverMinimum() {
+ materialize();
+ return super.getCoverMinimum();
+ }
+
+ @Override
+ public double getCoverMaximum() {
+ materialize();
+ return super.getCoverMaximum();
+ }
+
+ /**
+ * Perform downsampling on a number of bins.
+ *
+ * @param data Data array (needs cast!)
+ * @param start Interval start
+ * @param end Interval end (exclusive)
+ * @param size Intended size - extra bins are assumed to be empty, should be a
+ * power of two
+ * @return New bin value
+ */
+ protected double downsample(double[] data, int start, int end, int size) {
+ double sum = 0;
+ for (int i = start; i < end; i++) {
+ sum += data[i];
+ }
+ return sum;
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleHistogram.java
new file mode 100644
index 00000000..d5cee785
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleHistogram.java
@@ -0,0 +1,58 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/**
+ * Histogram storing double values.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.has Iter
+ */
+public interface DoubleHistogram extends Histogram {
+ /**
+ * Increment the histogram at a given position.
+ *
+ * @param coord Coordinate
+ * @param value Value to increment by
+ */
+ public void increment(double coord, double value);
+
+ @Override
+ public Iter iter();
+
+ /**
+ * Iterator interface.
+ *
+ * @author Erich Schubert
+ */
+ public static interface Iter extends Histogram.Iter {
+ /**
+ * Get the value of the bin.
+ *
+ * @return Bin value
+ */
+ public double getValue();
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleStaticHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleStaticHistogram.java
new file mode 100644
index 00000000..db839d10
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleStaticHistogram.java
@@ -0,0 +1,132 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.Arrays;
+
+/**
+ * Histogram class storing double values.
+ *
+ * The histogram will start with "bin" bins, but it can grow dynamically to the
+ * left and right.
+ *
+ * @author Erich Schubert
+ */
+public class DoubleStaticHistogram extends AbstractStaticHistogram implements DoubleHistogram {
+ /**
+ * Constructor.
+ *
+ * @param bins Number of bins
+ * @param min Cover minimum
+ * @param max Cover maximum
+ */
+ public DoubleStaticHistogram(int bins, double min, double max) {
+ super(bins, min, max);
+ if (bins >= 0) {
+ data = new double[bins];
+ } else {
+ data = null;
+ }
+ }
+
+ /**
+ * Data store
+ */
+ double[] data;
+
+ /**
+ * Increment the value of a bin.
+ *
+ * @param coord Coordinate
+ * @param val Value
+ */
+ @Override
+ public void increment(double coord, double val) {
+ int bin = getBinNr(coord);
+ if (bin < 0) {
+ if (size - bin > data.length) {
+ // Reallocate. TODO: use an arraylist-like grow strategy!
+ double[] tmpdata = new double[growSize(data.length, size - bin)];
+ System.arraycopy(data, 0, tmpdata, -bin, size);
+ data = tmpdata;
+ } else {
+ // Shift in place and clear head
+ System.arraycopy(data, 0, data, -bin, size);
+ Arrays.fill(data, 0, -bin, (double) 0);
+ }
+ data[0] = val;
+ // Note that bin is negative, -bin is the shift offset!
+ assert (data.length >= size - bin);
+ offset -= bin;
+ size -= bin;
+ // TODO: modCounter++; and have iterators fast-fail
+ } else if (bin >= data.length) {
+ double[] tmpdata = new double[growSize(data.length, bin + 1)];
+ System.arraycopy(data, 0, tmpdata, 0, size);
+ tmpdata[bin] = val;
+ data = tmpdata;
+ size = bin + 1;
+ // TODO: modCounter++; and have iterators fast-fail
+ // Unset max value when resizing
+ max = Double.MAX_VALUE;
+ } else {
+ if (bin >= size) {
+ // TODO: reset bins to 0 first?
+ size = bin + 1;
+ }
+ data[bin] += val;
+ }
+ }
+
+ /**
+ * Get the value at a particular position.
+ *
+ * @param coord Coordinate
+ * @return Value
+ */
+ public double get(double coord) {
+ int bin = getBinNr(coord);
+ if (bin < 0 || bin >= size) {
+ return 0;
+ }
+ return data[bin];
+ }
+
+ @Override
+ public Iter iter() {
+ return new Iter();
+ }
+
+ /**
+ * Iterator class.
+ *
+ * @author Erich Schubert
+ */
+ public class Iter extends AbstractStaticHistogram.Iter implements DoubleHistogram.Iter {
+ @Override
+ public double getValue() {
+ return data[bin];
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatDynamicHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatDynamicHistogram.java
new file mode 100644
index 00000000..a14ed00a
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatDynamicHistogram.java
@@ -0,0 +1,248 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+import de.lmu.ifi.dbs.elki.math.scales.LinearScale;
+import de.lmu.ifi.dbs.elki.utilities.BitsUtil;
+
+/**
+ * A flexible histogram storing float, that can dynamically adapt the number of
+ * bins to the data fed into the histogram.
+ *
+ * @author Erich Schubert
+ */
+public class FloatDynamicHistogram extends FloatStaticHistogram {
+ /**
+ * Cache for data to be inserted.
+ */
+ private double[] cachec;
+
+ /**
+ * Cache for data to be inserted.
+ */
+ private float[] cachev;
+
+ /**
+ * Cache fill size
+ */
+ private int cachefill;
+
+ /**
+ * Destination (minimum) size of the structure.
+ * At most destsize * 2 bins are allowed.
+ */
+ private int destsize;
+
+ /**
+ * Constructor.
+ *
+ * @param bins Design number of bins - may become twice as large!
+ */
+ public FloatDynamicHistogram(int bins) {
+ super(-1, 0.0, 1.0);
+ this.destsize = bins;
+ cachec = new double[this.destsize << CACHE_SHIFT];
+ cachev = new float[this.destsize << CACHE_SHIFT];
+ cachefill = 0;
+ }
+
+ /**
+ * Materialize the histogram from the cache.
+ */
+ void materialize() {
+ // already materialized?
+ if (cachefill < 0) {
+ return;
+ }
+ // Compute minimum and maximum
+ double min = Double.MAX_VALUE, max = Double.MIN_VALUE;
+ for (int i = 0; i < cachefill; i++) {
+ min = Math.min(min, cachec[i]);
+ max = Math.max(max, cachec[i]);
+ }
+ // use the LinearScale magic to round to "likely suiteable" step sizes.
+ // TODO: extract into a reusable function?
+ LinearScale scale = new LinearScale(min, max);
+ min = scale.getMin();
+ max = scale.getMax();
+ this.base = min;
+ this.max = max;
+ this.binsize = (max - min) / this.destsize;
+ // initialize array
+ this.data = new float[this.destsize << 1];
+ size = destsize;
+ // re-insert data we have
+ final int end = cachefill;
+ cachefill = -1; // So reinsert works!
+ for (int i = 0; i < end; i++) {
+ increment(cachec[i], cachev[i]);
+ }
+ // delete cache, signal that we're initialized
+ cachec = null;
+ cachev = null;
+ }
+
+ @Override
+ public float get(double coord) {
+ materialize();
+ testResample(coord);
+ return super.get(coord);
+ }
+
+ /**
+ * Put fresh data into the histogram (or into the cache)
+ *
+ * @param coord Coordinate
+ * @param value Value
+ */
+ @Override
+ public void increment(double coord, float value) {
+ // Store in cache
+ if (cachefill >= 0) {
+ if (cachefill < cachec.length) {
+ cachec[cachefill] = coord;
+ cachev[cachefill] = value;
+ cachefill ++;
+ return;
+ } else {
+ materialize();
+ // But continue below!
+ }
+ }
+ // Check if we need to resample to accomodate this bin.
+ testResample(coord);
+ // super class will handle histogram resizing / shifting
+ super.increment(coord, value);
+ }
+
+ /**
+ * Test (and perform) downsampling when neede.
+ *
+ * @param coord coordinate to accomodate.
+ */
+ private void testResample(double coord) {
+ final int bin = getBinNr(coord);
+ final int sizereq, off;
+ if (bin < 0) {
+ sizereq = size - bin;
+ off = -bin;
+ } else if (bin >= data.length) {
+ sizereq = bin + 1;
+ off = 0;
+ } else {
+ // Within the designated size - nothing to do.
+ return;
+ }
+ if (sizereq < data.length) {
+ // Accomodate by shifting. Let super do the job in {@link #get}
+ return;
+ }
+ // Resampling, eventually by multiple levels.
+ final int levels = BitsUtil.magnitude(sizereq / this.destsize) - 1;
+ assert (levels > 0) : "No resampling required?!? sizereq=" + sizereq + " destsize=" + destsize + " array=" + data.length;
+ final int step = 1 << levels;
+
+ final int fixpoint = off / (step - 1);
+ {
+ // Start positions for in-place bottom-up downsampling.
+ int oup = fixpoint;
+ int inp = (oup << levels) - off;
+ assert (-step < inp && inp <= oup && oup < inp + step) : (inp + " -> " + oup + " s=" + step + " o=" + off + " l=" + levels);
+ for (; inp < size; inp += step, oup++) {
+ assert (oup < inp + step && oup < data.length);
+ data[oup] = downsample(data, Math.max(0, inp), Math.min(size, inp + step), step);
+ }
+ // Clean upwards
+ for (; oup < data.length; oup++) {
+ data[oup] = 0;
+ }
+ }
+ if (off > 0) {
+ // Start positions for in-place downsampling top-down:
+ int oup = fixpoint - 1;
+ int inp = (oup << levels) - off;
+ assert (oup > inp) : (inp + " -> " + oup + " s=" + step + " o=" + off + " l=" + levels);
+ for (; inp > -step; inp -= step, oup--) {
+ assert (oup >= inp && oup >= 0);
+ data[oup] = downsample(data, Math.max(0, inp), Math.min(size, inp + step), step);
+ }
+ for (; oup >= 0; oup--) {
+ data[oup] = 0;
+ }
+ }
+ // recalculate histogram base.
+ base = base - (offset + off) * binsize;
+ offset = 0;
+ size = (size + 1) >> levels;
+ binsize = binsize * (1 << levels);
+ max = base + binsize * size;
+ }
+
+ @Override
+ public Iter iter() {
+ materialize();
+ return super.iter();
+ }
+
+ @Override
+ public int getNumBins() {
+ materialize();
+ return super.getNumBins();
+ }
+
+ @Override
+ public double getBinsize() {
+ materialize();
+ return super.getBinsize();
+ }
+
+ @Override
+ public double getCoverMinimum() {
+ materialize();
+ return super.getCoverMinimum();
+ }
+
+ @Override
+ public double getCoverMaximum() {
+ materialize();
+ return super.getCoverMaximum();
+ }
+
+ /**
+ * Perform downsampling on a number of bins.
+ *
+ * @param data Data array (needs cast!)
+ * @param start Interval start
+ * @param end Interval end (exclusive)
+ * @param size Intended size - extra bins are assumed to be empty, should be a
+ * power of two
+ * @return New bin value
+ */
+ protected float downsample(float[] data, int start, int end, int size) {
+ float sum = 0;
+ for (int i = start; i < end; i++) {
+ sum += data[i];
+ }
+ return sum;
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatHistogram.java
new file mode 100644
index 00000000..f5a65bfa
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatHistogram.java
@@ -0,0 +1,58 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/**
+ * Histogram storing float values.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.has Iter
+ */
+public interface FloatHistogram extends Histogram {
+ /**
+ * Increment the histogram at a given position.
+ *
+ * @param coord Coordinate
+ * @param value Value to increment by
+ */
+ public void increment(double coord, float value);
+
+ @Override
+ public Iter iter();
+
+ /**
+ * Iterator interface.
+ *
+ * @author Erich Schubert
+ */
+ public static interface Iter extends Histogram.Iter {
+ /**
+ * Get the value of the bin.
+ *
+ * @return Bin value
+ */
+ public float getValue();
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatStaticHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatStaticHistogram.java
new file mode 100644
index 00000000..b3f41994
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatStaticHistogram.java
@@ -0,0 +1,132 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.Arrays;
+
+/**
+ * Histogram class storing float values.
+ *
+ * The histogram will start with "bin" bins, but it can grow dynamically to the
+ * left and right.
+ *
+ * @author Erich Schubert
+ */
+public class FloatStaticHistogram extends AbstractStaticHistogram implements FloatHistogram {
+ /**
+ * Constructor.
+ *
+ * @param bins Number of bins
+ * @param min Cover minimum
+ * @param max Cover maximum
+ */
+ public FloatStaticHistogram(int bins, double min, double max) {
+ super(bins, min, max);
+ if (bins >= 0) {
+ data = new float[bins];
+ } else {
+ data = null;
+ }
+ }
+
+ /**
+ * Data store
+ */
+ float[] data;
+
+ /**
+ * Increment the value of a bin.
+ *
+ * @param coord Coordinate
+ * @param val Value
+ */
+ @Override
+ public void increment(double coord, float val) {
+ int bin = getBinNr(coord);
+ if (bin < 0) {
+ if (size - bin > data.length) {
+ // Reallocate. TODO: use an arraylist-like grow strategy!
+ float[] tmpdata = new float[growSize(data.length, size - bin)];
+ System.arraycopy(data, 0, tmpdata, -bin, size);
+ data = tmpdata;
+ } else {
+ // Shift in place and clear head
+ System.arraycopy(data, 0, data, -bin, size);
+ Arrays.fill(data, 0, -bin, (float) 0);
+ }
+ data[0] = val;
+ // Note that bin is negative, -bin is the shift offset!
+ assert (data.length >= size - bin);
+ offset -= bin;
+ size -= bin;
+ // TODO: modCounter++; and have iterators fast-fail
+ } else if (bin >= data.length) {
+ float[] tmpdata = new float[growSize(data.length, bin + 1)];
+ System.arraycopy(data, 0, tmpdata, 0, size);
+ tmpdata[bin] = val;
+ data = tmpdata;
+ size = bin + 1;
+ // TODO: modCounter++; and have iterators fast-fail
+ // Unset max value when resizing
+ max = Double.MAX_VALUE;
+ } else {
+ if (bin >= size) {
+ // TODO: reset bins to 0 first?
+ size = bin + 1;
+ }
+ data[bin] += val;
+ }
+ }
+
+ /**
+ * Get the value at a particular position.
+ *
+ * @param coord Coordinate
+ * @return Value
+ */
+ public float get(double coord) {
+ int bin = getBinNr(coord);
+ if (bin < 0 || bin >= size) {
+ return 0;
+ }
+ return data[bin];
+ }
+
+ @Override
+ public Iter iter() {
+ return new Iter();
+ }
+
+ /**
+ * Iterator class.
+ *
+ * @author Erich Schubert
+ */
+ public class Iter extends AbstractStaticHistogram.Iter implements FloatHistogram.Iter {
+ @Override
+ public float getValue() {
+ return data[bin];
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/Histogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/Histogram.java
new file mode 100644
index 00000000..75be6830
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/Histogram.java
@@ -0,0 +1,105 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.utilities.iterator.ArrayIter;
+
+/**
+ * Abstract API for histograms. Without specific type information, to allow this
+ * to be shared for primitive types, too!
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.has Iter
+ */
+public interface Histogram {
+ /**
+ * This parameter controls the cache size used for dynamic histograms before
+ * setting the initial thresholds.
+ */
+ public final static int CACHE_SHIFT = 2;
+
+ /**
+ * Get the number of bins actually in use.
+ *
+ * @return number of bins
+ */
+ public abstract int getNumBins();
+
+ /**
+ * Get the size (width) of a bin.
+ *
+ * @return bin size
+ */
+ public abstract double getBinsize();
+
+ /**
+ * Get minimum (covered by bins, not data!)
+ *
+ * @return minimum
+ */
+ public abstract double getCoverMinimum();
+
+ /**
+ * Get maximum (covered by bins, not data!)
+ *
+ * @return maximum
+ */
+ public abstract double getCoverMaximum();
+
+ /**
+ * Get an iterator over all histogram bins.
+ *
+ * @return Iterator
+ */
+ public abstract Iter iter();
+
+ /**
+ * Array iterator.
+ *
+ * @author Erich Schubert
+ */
+ public static interface Iter extends ArrayIter {
+ /**
+ * Get the bin center.
+ *
+ * @return bin center value
+ */
+ public double getCenter();
+
+ /**
+ * Get the bin minimum.
+ *
+ * @return bin left value
+ */
+ public double getLeft();
+
+ /**
+ * Get the bin maximum.
+ *
+ * @return bin right value
+ */
+ public double getRight();
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntArrayStaticHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntArrayStaticHistogram.java
new file mode 100644
index 00000000..8d00604b
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntArrayStaticHistogram.java
@@ -0,0 +1,85 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram;
+
+/*
+This file is part of ELKI:
+Environment for Developing KDD-Applications Supported by Index-Structures
+
+Copyright (C) 2012
+Ludwig-Maximilians-Universität München
+Lehr- und Forschungseinheit für Datenbanksysteme
+ELKI Development Team
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * Histogram containing an array of ints (i.e. {@code int[]}). This is
+ * actually one of the simpler specializations, as arrays are objects not
+ * primitive in Java.
+ *
+ * @author Erich Schubert
+ */
+public final class IntArrayStaticHistogram extends AbstractObjStaticHistogram<int[]> {
+ /**
+ * Desired number of columns in each bin.
+ */
+ private final int cols;
+
+ /**
+ * Constructor.
+ *
+ * @param bins Number of bins
+ * @param min Minimum value for the coordinates
+ * @param max Maximum value for the coordinates
+ * @param cols Number of columns in each bin.
+ */
+ public IntArrayStaticHistogram(int bins, double min, double max, int cols) {
+ super(bins, min, max);
+ this.cols = cols;
+ for (int i = 0; i < bins; i++) {
+ data[i] = new int[cols];
+ }
+ }
+
+ /**
+ * Increment histogram by a double[].
+ *
+ * @param coord Coordinate
+ * @param data Data to increment by.
+ */
+ public void increment(double coord, int[] data) {
+ int[] existing = get(coord);
+ for (int i = 0; i < existing.length; i++) {
+ existing[i] += data[i];
+ }
+ }
+
+ /**
+ * {@inheritDoc}
+ *
+ * Data is combined by incrementing.
+ *
+ * @deprecated use the explicit {@link #increment} instead.
+ */
+ @Deprecated
+ @Override
+ public void putData(double coord, int[] data) {
+ increment(coord, data);
+ }
+
+ @Override
+ protected int[] makeObject() {
+ return new int[cols];
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntDynamicHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntDynamicHistogram.java
new file mode 100644
index 00000000..0967ebd5
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntDynamicHistogram.java
@@ -0,0 +1,248 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+import de.lmu.ifi.dbs.elki.math.scales.LinearScale;
+import de.lmu.ifi.dbs.elki.utilities.BitsUtil;
+
+/**
+ * A flexible histogram storing int, that can dynamically adapt the number of
+ * bins to the data fed into the histogram.
+ *
+ * @author Erich Schubert
+ */
+public class IntDynamicHistogram extends IntStaticHistogram {
+ /**
+ * Cache for data to be inserted.
+ */
+ private double[] cachec;
+
+ /**
+ * Cache for data to be inserted.
+ */
+ private int[] cachev;
+
+ /**
+ * Cache fill size
+ */
+ private int cachefill;
+
+ /**
+ * Destination (minimum) size of the structure.
+ * At most destsize * 2 bins are allowed.
+ */
+ private int destsize;
+
+ /**
+ * Constructor.
+ *
+ * @param bins Design number of bins - may become twice as large!
+ */
+ public IntDynamicHistogram(int bins) {
+ super(-1, 0.0, 1.0);
+ this.destsize = bins;
+ cachec = new double[this.destsize << CACHE_SHIFT];
+ cachev = new int[this.destsize << CACHE_SHIFT];
+ cachefill = 0;
+ }
+
+ /**
+ * Materialize the histogram from the cache.
+ */
+ void materialize() {
+ // already materialized?
+ if (cachefill < 0) {
+ return;
+ }
+ // Compute minimum and maximum
+ double min = Double.MAX_VALUE, max = Double.MIN_VALUE;
+ for (int i = 0; i < cachefill; i++) {
+ min = Math.min(min, cachec[i]);
+ max = Math.max(max, cachec[i]);
+ }
+ // use the LinearScale magic to round to "likely suiteable" step sizes.
+ // TODO: extract into a reusable function?
+ LinearScale scale = new LinearScale(min, max);
+ min = scale.getMin();
+ max = scale.getMax();
+ this.base = min;
+ this.max = max;
+ this.binsize = (max - min) / this.destsize;
+ // initialize array
+ this.data = new int[this.destsize << 1];
+ size = destsize;
+ // re-insert data we have
+ final int end = cachefill;
+ cachefill = -1; // So reinsert works!
+ for (int i = 0; i < end; i++) {
+ increment(cachec[i], cachev[i]);
+ }
+ // delete cache, signal that we're initialized
+ cachec = null;
+ cachev = null;
+ }
+
+ @Override
+ public int get(double coord) {
+ materialize();
+ testResample(coord);
+ return super.get(coord);
+ }
+
+ /**
+ * Put fresh data into the histogram (or into the cache)
+ *
+ * @param coord Coordinate
+ * @param value Value
+ */
+ @Override
+ public void increment(double coord, int value) {
+ // Store in cache
+ if (cachefill >= 0) {
+ if (cachefill < cachec.length) {
+ cachec[cachefill] = coord;
+ cachev[cachefill] = value;
+ cachefill ++;
+ return;
+ } else {
+ materialize();
+ // But continue below!
+ }
+ }
+ // Check if we need to resample to accomodate this bin.
+ testResample(coord);
+ // super class will handle histogram resizing / shifting
+ super.increment(coord, value);
+ }
+
+ /**
+ * Test (and perform) downsampling when neede.
+ *
+ * @param coord coordinate to accomodate.
+ */
+ private void testResample(double coord) {
+ final int bin = getBinNr(coord);
+ final int sizereq, off;
+ if (bin < 0) {
+ sizereq = size - bin;
+ off = -bin;
+ } else if (bin >= data.length) {
+ sizereq = bin + 1;
+ off = 0;
+ } else {
+ // Within the designated size - nothing to do.
+ return;
+ }
+ if (sizereq < data.length) {
+ // Accomodate by shifting. Let super do the job in {@link #get}
+ return;
+ }
+ // Resampling, eventually by multiple levels.
+ final int levels = BitsUtil.magnitude(sizereq / this.destsize) - 1;
+ assert (levels > 0) : "No resampling required?!? sizereq=" + sizereq + " destsize=" + destsize + " array=" + data.length;
+ final int step = 1 << levels;
+
+ final int fixpoint = off / (step - 1);
+ {
+ // Start positions for in-place bottom-up downsampling.
+ int oup = fixpoint;
+ int inp = (oup << levels) - off;
+ assert (-step < inp && inp <= oup && oup < inp + step) : (inp + " -> " + oup + " s=" + step + " o=" + off + " l=" + levels);
+ for (; inp < size; inp += step, oup++) {
+ assert (oup < inp + step && oup < data.length);
+ data[oup] = downsample(data, Math.max(0, inp), Math.min(size, inp + step), step);
+ }
+ // Clean upwards
+ for (; oup < data.length; oup++) {
+ data[oup] = 0;
+ }
+ }
+ if (off > 0) {
+ // Start positions for in-place downsampling top-down:
+ int oup = fixpoint - 1;
+ int inp = (oup << levels) - off;
+ assert (oup > inp) : (inp + " -> " + oup + " s=" + step + " o=" + off + " l=" + levels);
+ for (; inp > -step; inp -= step, oup--) {
+ assert (oup >= inp && oup >= 0);
+ data[oup] = downsample(data, Math.max(0, inp), Math.min(size, inp + step), step);
+ }
+ for (; oup >= 0; oup--) {
+ data[oup] = 0;
+ }
+ }
+ // recalculate histogram base.
+ base = base - (offset + off) * binsize;
+ offset = 0;
+ size = (size + 1) >> levels;
+ binsize = binsize * (1 << levels);
+ max = base + binsize * size;
+ }
+
+ @Override
+ public Iter iter() {
+ materialize();
+ return super.iter();
+ }
+
+ @Override
+ public int getNumBins() {
+ materialize();
+ return super.getNumBins();
+ }
+
+ @Override
+ public double getBinsize() {
+ materialize();
+ return super.getBinsize();
+ }
+
+ @Override
+ public double getCoverMinimum() {
+ materialize();
+ return super.getCoverMinimum();
+ }
+
+ @Override
+ public double getCoverMaximum() {
+ materialize();
+ return super.getCoverMaximum();
+ }
+
+ /**
+ * Perform downsampling on a number of bins.
+ *
+ * @param data Data array (needs cast!)
+ * @param start Interval start
+ * @param end Interval end (exclusive)
+ * @param size Intended size - extra bins are assumed to be empty, should be a
+ * power of two
+ * @return New bin value
+ */
+ protected int downsample(int[] data, int start, int end, int size) {
+ int sum = 0;
+ for (int i = start; i < end; i++) {
+ sum += data[i];
+ }
+ return sum;
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntHistogram.java
new file mode 100644
index 00000000..9ec4ec56
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntHistogram.java
@@ -0,0 +1,58 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/**
+ * Histogram storing int values.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.has Iter
+ */
+public interface IntHistogram extends Histogram {
+ /**
+ * Increment the histogram at a given position.
+ *
+ * @param coord Coordinate
+ * @param value Value to increment by
+ */
+ public void increment(double coord, int value);
+
+ @Override
+ public Iter iter();
+
+ /**
+ * Iterator interface.
+ *
+ * @author Erich Schubert
+ */
+ public static interface Iter extends Histogram.Iter {
+ /**
+ * Get the value of the bin.
+ *
+ * @return Bin value
+ */
+ public int getValue();
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntStaticHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntStaticHistogram.java
new file mode 100644
index 00000000..d4de36d7
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntStaticHistogram.java
@@ -0,0 +1,132 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.Arrays;
+
+/**
+ * Histogram class storing int values.
+ *
+ * The histogram will start with "bin" bins, but it can grow dynamically to the
+ * left and right.
+ *
+ * @author Erich Schubert
+ */
+public class IntStaticHistogram extends AbstractStaticHistogram implements IntHistogram {
+ /**
+ * Constructor.
+ *
+ * @param bins Number of bins
+ * @param min Cover minimum
+ * @param max Cover maximum
+ */
+ public IntStaticHistogram(int bins, double min, double max) {
+ super(bins, min, max);
+ if (bins >= 0) {
+ data = new int[bins];
+ } else {
+ data = null;
+ }
+ }
+
+ /**
+ * Data store
+ */
+ int[] data;
+
+ /**
+ * Increment the value of a bin.
+ *
+ * @param coord Coordinate
+ * @param val Value
+ */
+ @Override
+ public void increment(double coord, int val) {
+ int bin = getBinNr(coord);
+ if (bin < 0) {
+ if (size - bin > data.length) {
+ // Reallocate. TODO: use an arraylist-like grow strategy!
+ int[] tmpdata = new int[growSize(data.length, size - bin)];
+ System.arraycopy(data, 0, tmpdata, -bin, size);
+ data = tmpdata;
+ } else {
+ // Shift in place and clear head
+ System.arraycopy(data, 0, data, -bin, size);
+ Arrays.fill(data, 0, -bin, (int) 0);
+ }
+ data[0] = val;
+ // Note that bin is negative, -bin is the shift offset!
+ assert (data.length >= size - bin);
+ offset -= bin;
+ size -= bin;
+ // TODO: modCounter++; and have iterators fast-fail
+ } else if (bin >= data.length) {
+ int[] tmpdata = new int[growSize(data.length, bin + 1)];
+ System.arraycopy(data, 0, tmpdata, 0, size);
+ tmpdata[bin] = val;
+ data = tmpdata;
+ size = bin + 1;
+ // TODO: modCounter++; and have iterators fast-fail
+ // Unset max value when resizing
+ max = Double.MAX_VALUE;
+ } else {
+ if (bin >= size) {
+ // TODO: reset bins to 0 first?
+ size = bin + 1;
+ }
+ data[bin] += val;
+ }
+ }
+
+ /**
+ * Get the value at a particular position.
+ *
+ * @param coord Coordinate
+ * @return Value
+ */
+ public int get(double coord) {
+ int bin = getBinNr(coord);
+ if (bin < 0 || bin >= size) {
+ return 0;
+ }
+ return data[bin];
+ }
+
+ @Override
+ public Iter iter() {
+ return new Iter();
+ }
+
+ /**
+ * Iterator class.
+ *
+ * @author Erich Schubert
+ */
+ public class Iter extends AbstractStaticHistogram.Iter implements IntHistogram.Iter {
+ @Override
+ public int getValue() {
+ return data[bin];
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongArrayStaticHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongArrayStaticHistogram.java
new file mode 100644
index 00000000..efbf751f
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongArrayStaticHistogram.java
@@ -0,0 +1,85 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/**
+ * Histogram containing an array of longs (i.e. {@code long[]}). This is
+ * actually one of the simpler specializations, as arrays are objects not
+ * primitive in Java.
+ *
+ * @author Erich Schubert
+ */
+public final class LongArrayStaticHistogram extends AbstractObjStaticHistogram<long[]> {
+ /**
+ * Desired number of columns in each bin.
+ */
+ private final int cols;
+
+ /**
+ * Constructor.
+ *
+ * @param bins Number of bins
+ * @param min Minimum value for the coordinates
+ * @param max Maximum value for the coordinates
+ * @param cols Number of columns in each bin.
+ */
+ public LongArrayStaticHistogram(int bins, double min, double max, int cols) {
+ super(bins, min, max);
+ this.cols = cols;
+ for (int i = 0; i < bins; i++) {
+ data[i] = new long[cols];
+ }
+ }
+
+ /**
+ * Increment histogram by a double[].
+ *
+ * @param coord Coordinate
+ * @param data Data to increment by.
+ */
+ public void increment(double coord, long[] data) {
+ long[] existing = get(coord);
+ for (int i = 0; i < existing.length; i++) {
+ existing[i] += data[i];
+ }
+ }
+
+ /**
+ * {@inheritDoc}
+ *
+ * Data is combined by incrementing.
+ *
+ * @deprecated use the explicit {@link #increment} instead.
+ */
+ @Deprecated
+ @Override
+ public void putData(double coord, long[] data) {
+ increment(coord, data);
+ }
+
+ @Override
+ protected long[] makeObject() {
+ return new long[cols];
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongDynamicHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongDynamicHistogram.java
new file mode 100644
index 00000000..676a5e8f
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongDynamicHistogram.java
@@ -0,0 +1,248 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+import de.lmu.ifi.dbs.elki.math.scales.LinearScale;
+import de.lmu.ifi.dbs.elki.utilities.BitsUtil;
+
+/**
+ * A flexible histogram storing long, that can dynamically adapt the number of
+ * bins to the data fed into the histogram.
+ *
+ * @author Erich Schubert
+ */
+public class LongDynamicHistogram extends LongStaticHistogram {
+ /**
+ * Cache for data to be inserted.
+ */
+ private double[] cachec;
+
+ /**
+ * Cache for data to be inserted.
+ */
+ private long[] cachev;
+
+ /**
+ * Cache fill size
+ */
+ private int cachefill;
+
+ /**
+ * Destination (minimum) size of the structure.
+ * At most destsize * 2 bins are allowed.
+ */
+ private int destsize;
+
+ /**
+ * Constructor.
+ *
+ * @param bins Design number of bins - may become twice as large!
+ */
+ public LongDynamicHistogram(int bins) {
+ super(-1, 0.0, 1.0);
+ this.destsize = bins;
+ cachec = new double[this.destsize << CACHE_SHIFT];
+ cachev = new long[this.destsize << CACHE_SHIFT];
+ cachefill = 0;
+ }
+
+ /**
+ * Materialize the histogram from the cache.
+ */
+ void materialize() {
+ // already materialized?
+ if (cachefill < 0) {
+ return;
+ }
+ // Compute minimum and maximum
+ double min = Double.MAX_VALUE, max = Double.MIN_VALUE;
+ for (int i = 0; i < cachefill; i++) {
+ min = Math.min(min, cachec[i]);
+ max = Math.max(max, cachec[i]);
+ }
+ // use the LinearScale magic to round to "likely suiteable" step sizes.
+ // TODO: extract into a reusable function?
+ LinearScale scale = new LinearScale(min, max);
+ min = scale.getMin();
+ max = scale.getMax();
+ this.base = min;
+ this.max = max;
+ this.binsize = (max - min) / this.destsize;
+ // initialize array
+ this.data = new long[this.destsize << 1];
+ size = destsize;
+ // re-insert data we have
+ final int end = cachefill;
+ cachefill = -1; // So reinsert works!
+ for (int i = 0; i < end; i++) {
+ increment(cachec[i], cachev[i]);
+ }
+ // delete cache, signal that we're initialized
+ cachec = null;
+ cachev = null;
+ }
+
+ @Override
+ public long get(double coord) {
+ materialize();
+ testResample(coord);
+ return super.get(coord);
+ }
+
+ /**
+ * Put fresh data into the histogram (or into the cache)
+ *
+ * @param coord Coordinate
+ * @param value Value
+ */
+ @Override
+ public void increment(double coord, long value) {
+ // Store in cache
+ if (cachefill >= 0) {
+ if (cachefill < cachec.length) {
+ cachec[cachefill] = coord;
+ cachev[cachefill] = value;
+ cachefill ++;
+ return;
+ } else {
+ materialize();
+ // But continue below!
+ }
+ }
+ // Check if we need to resample to accomodate this bin.
+ testResample(coord);
+ // super class will handle histogram resizing / shifting
+ super.increment(coord, value);
+ }
+
+ /**
+ * Test (and perform) downsampling when neede.
+ *
+ * @param coord coordinate to accomodate.
+ */
+ private void testResample(double coord) {
+ final int bin = getBinNr(coord);
+ final int sizereq, off;
+ if (bin < 0) {
+ sizereq = size - bin;
+ off = -bin;
+ } else if (bin >= data.length) {
+ sizereq = bin + 1;
+ off = 0;
+ } else {
+ // Within the designated size - nothing to do.
+ return;
+ }
+ if (sizereq < data.length) {
+ // Accomodate by shifting. Let super do the job in {@link #get}
+ return;
+ }
+ // Resampling, eventually by multiple levels.
+ final int levels = BitsUtil.magnitude(sizereq / this.destsize) - 1;
+ assert (levels > 0) : "No resampling required?!? sizereq=" + sizereq + " destsize=" + destsize + " array=" + data.length;
+ final int step = 1 << levels;
+
+ final int fixpoint = off / (step - 1);
+ {
+ // Start positions for in-place bottom-up downsampling.
+ int oup = fixpoint;
+ int inp = (oup << levels) - off;
+ assert (-step < inp && inp <= oup && oup < inp + step) : (inp + " -> " + oup + " s=" + step + " o=" + off + " l=" + levels);
+ for (; inp < size; inp += step, oup++) {
+ assert (oup < inp + step && oup < data.length);
+ data[oup] = downsample(data, Math.max(0, inp), Math.min(size, inp + step), step);
+ }
+ // Clean upwards
+ for (; oup < data.length; oup++) {
+ data[oup] = 0;
+ }
+ }
+ if (off > 0) {
+ // Start positions for in-place downsampling top-down:
+ int oup = fixpoint - 1;
+ int inp = (oup << levels) - off;
+ assert (oup > inp) : (inp + " -> " + oup + " s=" + step + " o=" + off + " l=" + levels);
+ for (; inp > -step; inp -= step, oup--) {
+ assert (oup >= inp && oup >= 0);
+ data[oup] = downsample(data, Math.max(0, inp), Math.min(size, inp + step), step);
+ }
+ for (; oup >= 0; oup--) {
+ data[oup] = 0;
+ }
+ }
+ // recalculate histogram base.
+ base = base - (offset + off) * binsize;
+ offset = 0;
+ size = (size + 1) >> levels;
+ binsize = binsize * (1 << levels);
+ max = base + binsize * size;
+ }
+
+ @Override
+ public Iter iter() {
+ materialize();
+ return super.iter();
+ }
+
+ @Override
+ public int getNumBins() {
+ materialize();
+ return super.getNumBins();
+ }
+
+ @Override
+ public double getBinsize() {
+ materialize();
+ return super.getBinsize();
+ }
+
+ @Override
+ public double getCoverMinimum() {
+ materialize();
+ return super.getCoverMinimum();
+ }
+
+ @Override
+ public double getCoverMaximum() {
+ materialize();
+ return super.getCoverMaximum();
+ }
+
+ /**
+ * Perform downsampling on a number of bins.
+ *
+ * @param data Data array (needs cast!)
+ * @param start Interval start
+ * @param end Interval end (exclusive)
+ * @param size Intended size - extra bins are assumed to be empty, should be a
+ * power of two
+ * @return New bin value
+ */
+ protected long downsample(long[] data, int start, int end, int size) {
+ long sum = 0;
+ for (int i = start; i < end; i++) {
+ sum += data[i];
+ }
+ return sum;
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongHistogram.java
new file mode 100644
index 00000000..9be15e65
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongHistogram.java
@@ -0,0 +1,58 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/**
+ * Histogram storing long values.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.has Iter
+ */
+public interface LongHistogram extends Histogram {
+ /**
+ * Increment the histogram at a given position.
+ *
+ * @param coord Coordinate
+ * @param value Value to increment by
+ */
+ public void increment(double coord, long value);
+
+ @Override
+ public Iter iter();
+
+ /**
+ * Iterator interface.
+ *
+ * @author Erich Schubert
+ */
+ public static interface Iter extends Histogram.Iter {
+ /**
+ * Get the value of the bin.
+ *
+ * @return Bin value
+ */
+ public long getValue();
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongStaticHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongStaticHistogram.java
new file mode 100644
index 00000000..63e15599
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongStaticHistogram.java
@@ -0,0 +1,132 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.Arrays;
+
+/**
+ * Histogram class storing long values.
+ *
+ * The histogram will start with "bin" bins, but it can grow dynamically to the
+ * left and right.
+ *
+ * @author Erich Schubert
+ */
+public class LongStaticHistogram extends AbstractStaticHistogram implements LongHistogram {
+ /**
+ * Constructor.
+ *
+ * @param bins Number of bins
+ * @param min Cover minimum
+ * @param max Cover maximum
+ */
+ public LongStaticHistogram(int bins, double min, double max) {
+ super(bins, min, max);
+ if (bins >= 0) {
+ data = new long[bins];
+ } else {
+ data = null;
+ }
+ }
+
+ /**
+ * Data store
+ */
+ long[] data;
+
+ /**
+ * Increment the value of a bin.
+ *
+ * @param coord Coordinate
+ * @param val Value
+ */
+ @Override
+ public void increment(double coord, long val) {
+ int bin = getBinNr(coord);
+ if (bin < 0) {
+ if (size - bin > data.length) {
+ // Reallocate. TODO: use an arraylist-like grow strategy!
+ long[] tmpdata = new long[growSize(data.length, size - bin)];
+ System.arraycopy(data, 0, tmpdata, -bin, size);
+ data = tmpdata;
+ } else {
+ // Shift in place and clear head
+ System.arraycopy(data, 0, data, -bin, size);
+ Arrays.fill(data, 0, -bin, (long) 0);
+ }
+ data[0] = val;
+ // Note that bin is negative, -bin is the shift offset!
+ assert (data.length >= size - bin);
+ offset -= bin;
+ size -= bin;
+ // TODO: modCounter++; and have iterators fast-fail
+ } else if (bin >= data.length) {
+ long[] tmpdata = new long[growSize(data.length, bin + 1)];
+ System.arraycopy(data, 0, tmpdata, 0, size);
+ tmpdata[bin] = val;
+ data = tmpdata;
+ size = bin + 1;
+ // TODO: modCounter++; and have iterators fast-fail
+ // Unset max value when resizing
+ max = Double.MAX_VALUE;
+ } else {
+ if (bin >= size) {
+ // TODO: reset bins to 0 first?
+ size = bin + 1;
+ }
+ data[bin] += val;
+ }
+ }
+
+ /**
+ * Get the value at a particular position.
+ *
+ * @param coord Coordinate
+ * @return Value
+ */
+ public long get(double coord) {
+ int bin = getBinNr(coord);
+ if (bin < 0 || bin >= size) {
+ return 0;
+ }
+ return data[bin];
+ }
+
+ @Override
+ public Iter iter() {
+ return new Iter();
+ }
+
+ /**
+ * Iterator class.
+ *
+ * @author Erich Schubert
+ */
+ public class Iter extends AbstractStaticHistogram.Iter implements LongHistogram.Iter {
+ @Override
+ public long getValue() {
+ return data[bin];
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/MeanVarianceStaticHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/MeanVarianceStaticHistogram.java
new file mode 100644
index 00000000..2a464382
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/MeanVarianceStaticHistogram.java
@@ -0,0 +1,80 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.math.MeanVariance;
+
+/**
+ * Histogram class storing MeanVaraince object.
+ *
+ * The histogram will start with "bin" bins, but it can grow dynamically to the
+ * left and right.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.composedOf MeanVariance
+ */
+public class MeanVarianceStaticHistogram extends AbstractObjStaticHistogram<MeanVariance> {
+ /**
+ * Constructor.
+ *
+ * @param bins Number of bins
+ * @param min Cover minimum
+ * @param max Cover maximum
+ */
+ public MeanVarianceStaticHistogram(int bins, double min, double max) {
+ super(bins, min, max);
+ }
+
+ /**
+ * Data store
+ */
+ MeanVariance[] data;
+
+ /**
+ * Update the value of a bin with new data.
+ *
+ * @param coord Coordinate
+ * @param val Value
+ */
+ public void put(double coord, double val) {
+ get(coord).put(val);
+ }
+
+ /**
+ * {@inheritDoc}
+ *
+ * Data is combined by using {@link MeanVariance#put(Mean)}.
+ */
+ @Override
+ public void putData(double coord, MeanVariance data) {
+ get(coord).put(data);
+ }
+
+
+ @Override
+ protected MeanVariance makeObject() {
+ return new MeanVariance();
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ObjHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ObjHistogram.java
new file mode 100644
index 00000000..ac4d4e4b
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ObjHistogram.java
@@ -0,0 +1,69 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/**
+ * Basic interface for object based histograms (static and flexible).
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.has Iter
+ *
+ * @param <T> data type
+ */
+public interface ObjHistogram<T> extends Histogram {
+ /**
+ * Get a histogram iterator.
+ *
+ * @return Iterator
+ */
+ @Override
+ public Iter<T> iter();
+
+ /**
+ * Aggregate new data into the histogram.
+ *
+ * Note that the actual definition of "aggregate" depends on the application.
+ * While a static histogram will likely perform this immediately, a flexible
+ * histogram will cache a number of observations.
+ *
+ * @param coord Coordinate
+ * @param data Data
+ */
+ public void putData(double coord, T data);
+
+ /**
+ * Histogram iterator.
+ *
+ * @author Erich Schubert
+ */
+ public static interface Iter<T> extends Histogram.Iter {
+ /**
+ * Get the value of the bin.
+ *
+ * @return Bin value
+ */
+ public T getValue();
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortDynamicHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortDynamicHistogram.java
new file mode 100644
index 00000000..ff94928b
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortDynamicHistogram.java
@@ -0,0 +1,248 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+import de.lmu.ifi.dbs.elki.math.scales.LinearScale;
+import de.lmu.ifi.dbs.elki.utilities.BitsUtil;
+
+/**
+ * A flexible histogram storing short, that can dynamically adapt the number of
+ * bins to the data fed into the histogram.
+ *
+ * @author Erich Schubert
+ */
+public class ShortDynamicHistogram extends ShortStaticHistogram {
+ /**
+ * Cache for data to be inserted.
+ */
+ private double[] cachec;
+
+ /**
+ * Cache for data to be inserted.
+ */
+ private short[] cachev;
+
+ /**
+ * Cache fill size
+ */
+ private int cachefill;
+
+ /**
+ * Destination (minimum) size of the structure.
+ * At most destsize * 2 bins are allowed.
+ */
+ private int destsize;
+
+ /**
+ * Constructor.
+ *
+ * @param bins Design number of bins - may become twice as large!
+ */
+ public ShortDynamicHistogram(int bins) {
+ super(-1, 0.0, 1.0);
+ this.destsize = bins;
+ cachec = new double[this.destsize << CACHE_SHIFT];
+ cachev = new short[this.destsize << CACHE_SHIFT];
+ cachefill = 0;
+ }
+
+ /**
+ * Materialize the histogram from the cache.
+ */
+ void materialize() {
+ // already materialized?
+ if (cachefill < 0) {
+ return;
+ }
+ // Compute minimum and maximum
+ double min = Double.MAX_VALUE, max = Double.MIN_VALUE;
+ for (int i = 0; i < cachefill; i++) {
+ min = Math.min(min, cachec[i]);
+ max = Math.max(max, cachec[i]);
+ }
+ // use the LinearScale magic to round to "likely suiteable" step sizes.
+ // TODO: extract into a reusable function?
+ LinearScale scale = new LinearScale(min, max);
+ min = scale.getMin();
+ max = scale.getMax();
+ this.base = min;
+ this.max = max;
+ this.binsize = (max - min) / this.destsize;
+ // initialize array
+ this.data = new short[this.destsize << 1];
+ size = destsize;
+ // re-insert data we have
+ final int end = cachefill;
+ cachefill = -1; // So reinsert works!
+ for (int i = 0; i < end; i++) {
+ increment(cachec[i], cachev[i]);
+ }
+ // delete cache, signal that we're initialized
+ cachec = null;
+ cachev = null;
+ }
+
+ @Override
+ public short get(double coord) {
+ materialize();
+ testResample(coord);
+ return super.get(coord);
+ }
+
+ /**
+ * Put fresh data into the histogram (or into the cache)
+ *
+ * @param coord Coordinate
+ * @param value Value
+ */
+ @Override
+ public void increment(double coord, short value) {
+ // Store in cache
+ if (cachefill >= 0) {
+ if (cachefill < cachec.length) {
+ cachec[cachefill] = coord;
+ cachev[cachefill] = value;
+ cachefill ++;
+ return;
+ } else {
+ materialize();
+ // But continue below!
+ }
+ }
+ // Check if we need to resample to accomodate this bin.
+ testResample(coord);
+ // super class will handle histogram resizing / shifting
+ super.increment(coord, value);
+ }
+
+ /**
+ * Test (and perform) downsampling when neede.
+ *
+ * @param coord coordinate to accomodate.
+ */
+ private void testResample(double coord) {
+ final int bin = getBinNr(coord);
+ final int sizereq, off;
+ if (bin < 0) {
+ sizereq = size - bin;
+ off = -bin;
+ } else if (bin >= data.length) {
+ sizereq = bin + 1;
+ off = 0;
+ } else {
+ // Within the designated size - nothing to do.
+ return;
+ }
+ if (sizereq < data.length) {
+ // Accomodate by shifting. Let super do the job in {@link #get}
+ return;
+ }
+ // Resampling, eventually by multiple levels.
+ final int levels = BitsUtil.magnitude(sizereq / this.destsize) - 1;
+ assert (levels > 0) : "No resampling required?!? sizereq=" + sizereq + " destsize=" + destsize + " array=" + data.length;
+ final int step = 1 << levels;
+
+ final int fixpoint = off / (step - 1);
+ {
+ // Start positions for in-place bottom-up downsampling.
+ int oup = fixpoint;
+ int inp = (oup << levels) - off;
+ assert (-step < inp && inp <= oup && oup < inp + step) : (inp + " -> " + oup + " s=" + step + " o=" + off + " l=" + levels);
+ for (; inp < size; inp += step, oup++) {
+ assert (oup < inp + step && oup < data.length);
+ data[oup] = downsample(data, Math.max(0, inp), Math.min(size, inp + step), step);
+ }
+ // Clean upwards
+ for (; oup < data.length; oup++) {
+ data[oup] = 0;
+ }
+ }
+ if (off > 0) {
+ // Start positions for in-place downsampling top-down:
+ int oup = fixpoint - 1;
+ int inp = (oup << levels) - off;
+ assert (oup > inp) : (inp + " -> " + oup + " s=" + step + " o=" + off + " l=" + levels);
+ for (; inp > -step; inp -= step, oup--) {
+ assert (oup >= inp && oup >= 0);
+ data[oup] = downsample(data, Math.max(0, inp), Math.min(size, inp + step), step);
+ }
+ for (; oup >= 0; oup--) {
+ data[oup] = 0;
+ }
+ }
+ // recalculate histogram base.
+ base = base - (offset + off) * binsize;
+ offset = 0;
+ size = (size + 1) >> levels;
+ binsize = binsize * (1 << levels);
+ max = base + binsize * size;
+ }
+
+ @Override
+ public Iter iter() {
+ materialize();
+ return super.iter();
+ }
+
+ @Override
+ public int getNumBins() {
+ materialize();
+ return super.getNumBins();
+ }
+
+ @Override
+ public double getBinsize() {
+ materialize();
+ return super.getBinsize();
+ }
+
+ @Override
+ public double getCoverMinimum() {
+ materialize();
+ return super.getCoverMinimum();
+ }
+
+ @Override
+ public double getCoverMaximum() {
+ materialize();
+ return super.getCoverMaximum();
+ }
+
+ /**
+ * Perform downsampling on a number of bins.
+ *
+ * @param data Data array (needs cast!)
+ * @param start Interval start
+ * @param end Interval end (exclusive)
+ * @param size Intended size - extra bins are assumed to be empty, should be a
+ * power of two
+ * @return New bin value
+ */
+ protected short downsample(short[] data, int start, int end, int size) {
+ short sum = 0;
+ for (int i = start; i < end; i++) {
+ sum += data[i];
+ }
+ return sum;
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortHistogram.java
new file mode 100644
index 00000000..699df896
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortHistogram.java
@@ -0,0 +1,58 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/**
+ * Histogram storing short values.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.has Iter
+ */
+public interface ShortHistogram extends Histogram {
+ /**
+ * Increment the histogram at a given position.
+ *
+ * @param coord Coordinate
+ * @param value Value to increment by
+ */
+ public void increment(double coord, short value);
+
+ @Override
+ public Iter iter();
+
+ /**
+ * Iterator interface.
+ *
+ * @author Erich Schubert
+ */
+ public static interface Iter extends Histogram.Iter {
+ /**
+ * Get the value of the bin.
+ *
+ * @return Bin value
+ */
+ public short getValue();
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortStaticHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortStaticHistogram.java
new file mode 100644
index 00000000..b2809e1e
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortStaticHistogram.java
@@ -0,0 +1,132 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.Arrays;
+
+/**
+ * Histogram class storing short values.
+ *
+ * The histogram will start with "bin" bins, but it can grow dynamically to the
+ * left and right.
+ *
+ * @author Erich Schubert
+ */
+public class ShortStaticHistogram extends AbstractStaticHistogram implements ShortHistogram {
+ /**
+ * Constructor.
+ *
+ * @param bins Number of bins
+ * @param min Cover minimum
+ * @param max Cover maximum
+ */
+ public ShortStaticHistogram(int bins, double min, double max) {
+ super(bins, min, max);
+ if (bins >= 0) {
+ data = new short[bins];
+ } else {
+ data = null;
+ }
+ }
+
+ /**
+ * Data store
+ */
+ short[] data;
+
+ /**
+ * Increment the value of a bin.
+ *
+ * @param coord Coordinate
+ * @param val Value
+ */
+ @Override
+ public void increment(double coord, short val) {
+ int bin = getBinNr(coord);
+ if (bin < 0) {
+ if (size - bin > data.length) {
+ // Reallocate. TODO: use an arraylist-like grow strategy!
+ short[] tmpdata = new short[growSize(data.length, size - bin)];
+ System.arraycopy(data, 0, tmpdata, -bin, size);
+ data = tmpdata;
+ } else {
+ // Shift in place and clear head
+ System.arraycopy(data, 0, data, -bin, size);
+ Arrays.fill(data, 0, -bin, (short) 0);
+ }
+ data[0] = val;
+ // Note that bin is negative, -bin is the shift offset!
+ assert (data.length >= size - bin);
+ offset -= bin;
+ size -= bin;
+ // TODO: modCounter++; and have iterators fast-fail
+ } else if (bin >= data.length) {
+ short[] tmpdata = new short[growSize(data.length, bin + 1)];
+ System.arraycopy(data, 0, tmpdata, 0, size);
+ tmpdata[bin] = val;
+ data = tmpdata;
+ size = bin + 1;
+ // TODO: modCounter++; and have iterators fast-fail
+ // Unset max value when resizing
+ max = Double.MAX_VALUE;
+ } else {
+ if (bin >= size) {
+ // TODO: reset bins to 0 first?
+ size = bin + 1;
+ }
+ data[bin] += val;
+ }
+ }
+
+ /**
+ * Get the value at a particular position.
+ *
+ * @param coord Coordinate
+ * @return Value
+ */
+ public short get(double coord) {
+ int bin = getBinNr(coord);
+ if (bin < 0 || bin >= size) {
+ return 0;
+ }
+ return data[bin];
+ }
+
+ @Override
+ public Iter iter() {
+ return new Iter();
+ }
+
+ /**
+ * Iterator class.
+ *
+ * @author Erich Schubert
+ */
+ public class Iter extends AbstractStaticHistogram.Iter implements ShortHistogram.Iter {
+ @Override
+ public short getValue() {
+ return data[bin];
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/package-info.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/package-info.java
new file mode 100644
index 00000000..65dd6446
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/package-info.java
@@ -0,0 +1,34 @@
+/**
+ * <p>Classes for computing histograms.</p>
+ *
+ * This package contains two families of histograms. Static histograms have a fixed initial number of bins.
+ * When encountering values outside of their range, they will grow similar to an ArrayList by adding additional bins.
+ *
+ * "Dynamic" histograms are more useful when you do not know the value range of the data:
+ * they start by collecting a number of sample data, then use this to estimate the initial histogram range.
+ * If they grow to twice their initial size they will downsample, to keep the histogram size in the bounds n to 2n-1,
+ * which effectively limits the memory use and histogram complexity.
+ */
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; \ No newline at end of file