diff options
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/utilities/datastructures')
98 files changed, 9598 insertions, 2932 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/AnyMap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/AnyMap.java deleted file mode 100644 index 763ce105..00000000 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/AnyMap.java +++ /dev/null @@ -1,94 +0,0 @@ -package de.lmu.ifi.dbs.elki.utilities.datastructures; - -/* - This file is part of ELKI: - Environment for Developing KDD-Applications Supported by Index-Structures - - Copyright (C) 2012 - Ludwig-Maximilians-Universität München - Lehr- und Forschungseinheit für Datenbanksysteme - ELKI Development Team - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -import java.util.HashMap; - -/** - * Associative storage based on a {@link HashMap} for multiple object types that - * offers a type checked {@link #get(Object, Class)} method. - * - * @author Erich Schubert - * - * @param <K> Key class type - */ -public class AnyMap<K> extends HashMap<K, Object> { - /** - * Serial version. - */ - private static final long serialVersionUID = 1L; - - /** - * Constructor - */ - public AnyMap() { - super(); - } - - /** - * Type checked get method - * - * @param <T> Return type - * @param key Key - * @param restriction restriction class - * @return Object that is guaranteed to be of class restriction or null - */ - public <T> T get(K key, Class<T> restriction) { - Object o = super.get(key); - if(o == null) { - return null; - } - try { - return restriction.cast(o); - } - catch(ClassCastException e) { - return null; - } - } - - /** - * (Largely) type checked get method for use with generic types - * - * @param <T> Return type - * @param key Key - * @param restriction restriction class - * @return Object that is guaranteed to be of class restriction or null - */ - @SuppressWarnings("unchecked") - public <T> T getGenerics(K key, Class<?> restriction) { - return (T) get(key, restriction); - } - - /** - * Depreciate the use of the untyped get method. - * - * @deprecated use {@link #get(Object, Class)} or - * {@link #getGenerics(Object, Class)} instead, for type safety! - */ - @Override - @Deprecated - public Object get(Object key) { - return super.get(key); - } -}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/HashMapList.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/HashMapList.java deleted file mode 100644 index 26fa4d19..00000000 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/HashMapList.java +++ /dev/null @@ -1,124 +0,0 @@ -package de.lmu.ifi.dbs.elki.utilities.datastructures; - -/* - This file is part of ELKI: - Environment for Developing KDD-Applications Supported by Index-Structures - - Copyright (C) 2012 - Ludwig-Maximilians-Universität München - Lehr- und Forschungseinheit für Datenbanksysteme - ELKI Development Team - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; - -/** - * Multi-Associative container, that stores a list of values for a particular key. - * - * @author Erich Schubert - * - * @apiviz.has List oneway - - contains - * - * @param <K> Key type - * @param <V> Value type - */ -// TODO: use MultiValueMap from apache collections instead? -public class HashMapList<K, V> extends HashMap<K, List<V>> { - /** - * Serial version - */ - private static final long serialVersionUID = 3883242025598456055L; - - /** - * Constructor. - */ - public HashMapList() { - super(); - } - - /** - * Constructor with initial capacity (of the hash) - * - * @param initialCapacity initial capacity - */ - public HashMapList(int initialCapacity) { - super(initialCapacity); - } - - /** - * Add a single value to the given key. - * - * @param key Key - * @param value Additional Value - */ - public synchronized void add(K key, V value) { - List<V> list = super.get(key); - if (list == null) { - list = new ArrayList<V>(1); - super.put(key, list); - } - list.add(value); - } - - /** - * Check that there is at least one value for the key. - */ - @Override - public boolean containsKey(Object key) { - List<V> list = super.get(key); - if (list == null) { - return false; - } - return list.size() > 0; - } - - /** - * Remove a single value from the map. - * - * @param key Key to remove - * @param value Value to remove. - * @return <tt>true</tt> if this list contained the specified element - */ - public synchronized boolean remove(K key, V value) { - List<V> list = super.get(key); - if (list == null) { - return false; - } - boolean success = list.remove(value); - // remove empty lists. - if (list.size() == 0) { - super.remove(key); - } - return success; - } - - /** - * Test if a given value is already present for the key. - * - * @param key Key - * @param value Value - * @return <tt>true</tt> if the keys list contains the specified element - */ - public boolean contains(K key, V value) { - List<V> list = super.get(key); - if (list == null) { - return false; - } - return list.contains(value); - } -} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/MaskedArrayList.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/MaskedArrayList.java deleted file mode 100644 index c24519d1..00000000 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/MaskedArrayList.java +++ /dev/null @@ -1,174 +0,0 @@ -package de.lmu.ifi.dbs.elki.utilities.datastructures; - -/* - This file is part of ELKI: - Environment for Developing KDD-Applications Supported by Index-Structures - - Copyright (C) 2012 - Ludwig-Maximilians-Universität München - Lehr- und Forschungseinheit für Datenbanksysteme - ELKI Development Team - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -import java.util.AbstractCollection; -import java.util.ArrayList; -import java.util.BitSet; -import java.util.Collection; -import java.util.Iterator; - -/** - * This class is a virtual collection based on masking an array list using a bit - * mask. - * - * @author Erich Schubert - * - * @apiviz.stereotype decorator - * @apiviz.composedOf java.util.ArrayList - * @apiviz.composedOf java.util.BitSet - * - * @param <T> Object type - */ -public class MaskedArrayList<T> extends AbstractCollection<T> implements Collection<T> { - /** - * Data storage - */ - protected ArrayList<T> data; - - /** - * The bitmask used for masking - */ - protected BitSet bits; - - /** - * Flag whether to iterator over set or unset values. - */ - protected boolean inverse = false; - - /** - * Constructor. - * - * @param data Data - * @param bits Bitset to use as mask - * @param inverse Flag to inverse the masking rule - */ - public MaskedArrayList(ArrayList<T> data, BitSet bits, boolean inverse) { - super(); - this.data = data; - this.bits = bits; - this.inverse = inverse; - } - - @Override - public boolean add(T e) { - throw new UnsupportedOperationException(); - } - - @Override - public Iterator<T> iterator() { - if(inverse) { - return new InvItr(); - } - else { - return new Itr(); - } - } - - @Override - public int size() { - if(inverse) { - return data.size() - bits.cardinality(); - } - else { - return bits.cardinality(); - } - } - - /** - * Iterator over set bits - * - * @author Erich Schubert - * - * @apiviz.exclude - */ - protected class Itr implements Iterator<T> { - /** - * Next position. - */ - private int pos; - - /** - * Constructor - */ - protected Itr() { - this.pos = bits.nextSetBit(0); - } - - @Override - public boolean hasNext() { - return (pos >= 0) && (pos < data.size()); - } - - @Override - public T next() { - T cur = data.get(pos); - pos = bits.nextSetBit(pos + 1); - return cur; - } - - @Override - public void remove() { - throw new UnsupportedOperationException(); - } - } - - /** - * Iterator over unset elements. - * - * @author Erich Schubert - * - * @apiviz.exclude - */ - protected class InvItr implements Iterator<T> { - /** - * Next unset position. - */ - private int pos; - - /** - * Constructor - */ - protected InvItr() { - this.pos = bits.nextClearBit(0); - } - - @Override - public boolean hasNext() { - return (pos >= 0) && (pos < data.size()); - } - - @Override - public T next() { - T cur = data.get(pos); - pos = bits.nextClearBit(pos + 1); - return cur; - } - - @Override - public void remove() { - throw new UnsupportedOperationException(); - } - } -}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/QuickSelect.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/QuickSelect.java index bfa7950d..3746ff87 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/QuickSelect.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/QuickSelect.java @@ -1,18 +1,10 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures; -import java.util.Comparator; -import java.util.List; - -import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs; -import de.lmu.ifi.dbs.elki.database.ids.DBID; -import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter; -import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; - /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -31,6 +23,14 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; along with this program. If not, see <http://www.gnu.org/licenses/>. */ +import java.util.Comparator; +import java.util.List; + +import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs; +import de.lmu.ifi.dbs.elki.database.ids.DBID; +import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter; +import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; + /** * QuickSelect computes ("selects") the element at a given rank and can be used * to compute Medians and arbitrary quantiles by computing the appropriate rank. @@ -43,13 +43,297 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; * * @apiviz.uses ArrayModifiableDBIDs * @apiviz.uses List - * @apiviz.uses Comparator + * @apiviz.uses Adapter */ public class QuickSelect { /** * For small arrays, use a simpler method: */ - private static final int SMALL = 10; + private static final int SMALL = 47; + + /** + * Choose the best pivot for the given rank. + * + * @param rank Rank + * @param m1 Pivot candidate + * @param m2 Pivot candidate + * @param m3 Pivot candidate + * @param m4 Pivot candidate + * @param m5 Pivot candidate + * @return Best pivot candidate + */ + private static final int bestPivot(int rank, int m1, int m2, int m3, int m4, int m5) { + if (rank < m1) { + return m1; + } + if (rank > m5) { + return m5; + } + if (rank < m2) { + return m2; + } + if (rank > m4) { + return m4; + } + return m3; + } + + /** + * QuickSelect is essentially quicksort, except that we only "sort" that half + * of the array that we are interested in. + * + * @param data Data to process + * @param start Interval start + * @param end Interval end (exclusive) + * @param rank rank position we are interested in (starting at 0) + */ + public static <T> void quickSelect(T data, Adapter<T> adapter, int start, int end, int rank) { + while (true) { + // Optimization for small arrays + // This also ensures a minimum size below + if (start + SMALL > end) { + insertionSort(data, adapter, start, end); + return; + } + + // Best of 5 pivot picking: + // Choose pivots by looking at five candidates. + final int len = end - start; + final int seventh = (len >> 3) + (len >> 6) + 1; + final int m3 = (start + end) >> 1; // middle + final int m2 = m3 - seventh; + final int m1 = m2 - seventh; + final int m4 = m3 + seventh; + final int m5 = m4 + seventh; + + // Explicit (and optimal) sorting network for 5 elements + // See Knuth for details. + if (adapter.compareGreater(data, m1, m2)) { + adapter.swap(data, m1, m2); + } + if (adapter.compareGreater(data, m1, m3)) { + adapter.swap(data, m1, m3); + } + if (adapter.compareGreater(data, m2, m3)) { + adapter.swap(data, m2, m3); + } + if (adapter.compareGreater(data, m4, m5)) { + adapter.swap(data, m4, m5); + } + if (adapter.compareGreater(data, m1, m4)) { + adapter.swap(data, m1, m4); + } + if (adapter.compareGreater(data, m3, m4)) { + adapter.swap(data, m3, m4); + } + if (adapter.compareGreater(data, m2, m5)) { + adapter.swap(data, m2, m5); + } + if (adapter.compareGreater(data, m2, m3)) { + adapter.swap(data, m2, m3); + } + if (adapter.compareGreater(data, m4, m5)) { + adapter.swap(data, m4, m5); + } + + int best = bestPivot(rank, m1, m2, m3, m4, m5); + // final double pivot = data[best]; + // Move middle element out of the way. + adapter.swap(data, best, end - 1); + + // Begin partitioning + int i = start, j = end - 2; + // This is classic quicksort stuff + while (true) { + while (i <= j && adapter.compareGreater(data, end - 1, i)) { + i++; + } + while (j >= i && !adapter.compareGreater(data, end - 1, j)) { + j--; + } + if (i >= j) { + break; + } + adapter.swap(data, i, j); + } + + // Move pivot (former middle element) back into the appropriate place + adapter.swap(data, i, end - 1); + + // In contrast to quicksort, we only need to recurse into the half we are + // interested in. Instead of recursion we now use iteration. + if (rank < i) { + end = i; + } else if (rank > i) { + start = i + 1; + } else { + break; + } + } // Loop until rank==i + } + + /** + * Sort a small array using repetitive insertion sort. + * + * @param data Data to sort + * @param start Interval start + * @param end Interval end + */ + private static <T> void insertionSort(T data, Adapter<T> adapter, int start, int end) { + for (int i = start + 1; i < end; i++) { + for (int j = i; j > start && adapter.compareGreater(data, j - 1, j); j--) { + adapter.swap(data, j, j - 1); + } + } + } + + /** + * Adapter class to apply QuickSelect to arbitrary data structures. + * + * @author Erich Schubert + * + * @param <T> Data structure type + */ + public static interface Adapter<T> { + /** + * Swap the two elements at positions i and j. + * + * @param data Data structure + * @param i Position i + * @param j Position j + */ + void swap(T data, int i, int j); + + /** + * Compare two elements. + * + * @param data Data structure + * @param i Position i + * @param j Position j + * @return {@code true} when the element at position i is greater than that + * at position j. + */ + boolean compareGreater(T data, int i, int j); + } + + /** + * Adapter for double arrays. + */ + public static Adapter<double[]> DOUBLE_ADAPTER = new Adapter<double[]>() { + @Override + public void swap(double[] data, int i, int j) { + double tmp = data[i]; + data[i] = data[j]; + data[j] = tmp; + } + + @Override + public boolean compareGreater(double[] data, int i, int j) { + return data[i] > data[j]; + } + }; + + /** + * Adapter for integer arrays. + */ + public static Adapter<int[]> INTEGER_ADAPTER = new Adapter<int[]>() { + @Override + public void swap(int[] data, int i, int j) { + int tmp = data[i]; + data[i] = data[j]; + data[j] = tmp; + } + + @Override + public boolean compareGreater(int[] data, int i, int j) { + return data[i] > data[j]; + } + }; + + /** + * Adapter for float arrays. + */ + public static Adapter<float[]> FLOAT_ADAPTER = new Adapter<float[]>() { + @Override + public void swap(float[] data, int i, int j) { + float tmp = data[i]; + data[i] = data[j]; + data[j] = tmp; + } + + @Override + public boolean compareGreater(float[] data, int i, int j) { + return data[i] > data[j]; + } + }; + + /** + * Adapter for short arrays. + */ + public static Adapter<short[]> SHORT_ADAPTER = new Adapter<short[]>() { + @Override + public void swap(short[] data, int i, int j) { + short tmp = data[i]; + data[i] = data[j]; + data[j] = tmp; + } + + @Override + public boolean compareGreater(short[] data, int i, int j) { + return data[i] > data[j]; + } + }; + + /** + * Adapter for long arrays. + */ + public static Adapter<long[]> LONG_ADAPTER = new Adapter<long[]>() { + @Override + public void swap(long[] data, int i, int j) { + long tmp = data[i]; + data[i] = data[j]; + data[j] = tmp; + } + + @Override + public boolean compareGreater(long[] data, int i, int j) { + return data[i] > data[j]; + } + }; + + /** + * Adapter for byte arrays. + */ + public static Adapter<byte[]> BYTE_ADAPTER = new Adapter<byte[]>() { + @Override + public void swap(byte[] data, int i, int j) { + byte tmp = data[i]; + data[i] = data[j]; + data[j] = tmp; + } + + @Override + public boolean compareGreater(byte[] data, int i, int j) { + return data[i] > data[j]; + } + }; + + /** + * Adapter for char arrays. + */ + public static Adapter<char[]> CHAR_ADAPTER = new Adapter<char[]>() { + @Override + public void swap(char[] data, int i, int j) { + char tmp = data[i]; + data[i] = data[j]; + data[j] = tmp; + } + + @Override + public boolean compareGreater(char[] data, int i, int j) { + return data[i] > data[j]; + } + }; /** * QuickSelect is essentially quicksort, except that we only "sort" that half @@ -94,11 +378,10 @@ public class QuickSelect { // Integer division is "floor" since we are non-negative. final int left = begin + ((length - 1) >> 1); quickSelect(data, begin, end, left); - if(length % 2 == 1) { + if (length % 2 == 1) { return data[left]; - } - else { - quickSelect(data, begin, end, left + 1); + } else { + quickSelect(data, left + 1, end, left + 1); return data[left] + .5 * (data[left + 1] - data[left]); } } @@ -136,11 +419,10 @@ public class QuickSelect { final double err = dleft - ileft; quickSelect(data, begin, end, ileft); - if(err <= Double.MIN_NORMAL) { + if (err <= Double.MIN_NORMAL) { return data[ileft]; - } - else { - quickSelect(data, begin, end, ileft + 1); + } else { + quickSelect(data, ileft + 1, end, ileft + 1); // Mix: double mix = data[ileft] + (data[ileft + 1] - data[ileft]) * err; return mix; @@ -155,66 +437,94 @@ public class QuickSelect { * @param start Interval start * @param end Interval end (exclusive) * @param rank rank position we are interested in (starting at 0) + * @return Element at the given rank (starting at 0). */ - public static void quickSelect(double[] data, int start, int end, int rank) { - while(true) { + public static double quickSelect(double[] data, int start, int end, int rank) { + while (true) { // Optimization for small arrays // This also ensures a minimum size below - if(start + SMALL > end) { + if (start + SMALL > end) { insertionSort(data, start, end); - return; + return data[rank]; } - // Pick pivot from three candidates: start, middle, end - // Since we compare them, we can also just "bubble sort" them. - final int middle = (start + end) >> 1; - if(data[start] > data[middle]) { - swap(data, start, middle); + // Best of 5 pivot picking: + // Choose pivots by looking at five candidates. + final int len = end - start; + final int seventh = (len >> 3) + (len >> 6) + 1; + final int m3 = (start + end) >> 1; // middle + final int m2 = m3 - seventh; + final int m1 = m2 - seventh; + final int m4 = m3 + seventh; + final int m5 = m4 + seventh; + + // Explicit (and optimal) sorting network for 5 elements + // See Knuth for details. + if (data[m1] > data[m2]) { + swap(data, m1, m2); + } + if (data[m1] > data[m3]) { + swap(data, m1, m3); + } + if (data[m2] > data[m3]) { + swap(data, m2, m3); + } + if (data[m4] > data[m5]) { + swap(data, m4, m5); + } + if (data[m1] > data[m4]) { + swap(data, m1, m4); } - if(data[start] > data[end - 1]) { - swap(data, start, end - 1); + if (data[m3] > data[m4]) { + swap(data, m3, m4); } - if(data[middle] > data[end - 1]) { - swap(data, middle, end - 1); + if (data[m2] > data[m5]) { + swap(data, m2, m5); + } + if (data[m2] > data[m3]) { + swap(data, m2, m3); + } + if (data[m4] > data[m5]) { + swap(data, m4, m5); } - // TODO: use more candidates for larger arrays? - final double pivot = data[middle]; - // Move middle element out of the way, just before end - // (Since we already know that "end" is bigger) - swap(data, middle, end - 2); + int best = bestPivot(rank, m1, m2, m3, m4, m5); + final double pivot = data[best]; + // Move middle element out of the way. + swap(data, best, end - 1); // Begin partitioning - int i = start + 1, j = end - 3; + int i = start, j = end - 2; // This is classic quicksort stuff - while(true) { - while(data[i] <= pivot && i <= j) { + while (true) { + while (i <= j && data[i] <= pivot) { i++; } - while(data[j] >= pivot && j >= i) { + while (j >= i && data[j] >= pivot) { j--; } - if(i >= j) { + if (i >= j) { break; } swap(data, i, j); + i++; + j--; } // Move pivot (former middle element) back into the appropriate place - swap(data, i, end - 2); + swap(data, i, end - 1); // In contrast to quicksort, we only need to recurse into the half we are // interested in. Instead of recursion we now use iteration. - if(rank < i) { + if (rank < i) { end = i; - } - else if(rank > i) { + } else if (rank > i) { start = i + 1; - } - else { + } else { break; } } // Loop until rank==i + return data[rank]; } /** @@ -225,8 +535,8 @@ public class QuickSelect { * @param end Interval end */ private static void insertionSort(double[] data, int start, int end) { - for(int i = start + 1; i < end; i++) { - for(int j = i; j > start && data[j - 1] > data[j]; j--) { + for (int i = start + 1; i < end; i++) { + for (int j = i; j > start && data[j - 1] > data[j]; j--) { swap(data, j, j - 1); } } @@ -345,61 +655,85 @@ public class QuickSelect { * @param rank rank position we are interested in (starting at 0) */ public static <T extends Comparable<? super T>> void quickSelect(T[] data, int start, int end, int rank) { - while(true) { + while (true) { // Optimization for small arrays // This also ensures a minimum size below - if(start + SMALL > end) { + if (start + SMALL > end) { insertionSort(data, start, end); return; } - // Pick pivot from three candidates: start, middle, end - // Since we compare them, we can also just "bubble sort" them. - final int middle = (start + end) >> 1; - if(data[start].compareTo(data[middle]) > 0) { - swap(data, start, middle); + // Best of 5 pivot picking: + // Choose pivots by looking at five candidates. + final int len = end - start; + final int seventh = (len >> 3) + (len >> 6) + 1; + final int m3 = (start + end) >> 1; // middle + final int m2 = m3 - seventh; + final int m1 = m2 - seventh; + final int m4 = m3 + seventh; + final int m5 = m4 + seventh; + + // Explicit (and optimal) sorting network for 5 elements + // See Knuth for details. + if (data[m1].compareTo(data[m2]) > 0) { + swap(data, m1, m2); + } + if (data[m1].compareTo(data[m3]) > 0) { + swap(data, m1, m3); + } + if (data[m2].compareTo(data[m3]) > 0) { + swap(data, m2, m3); + } + if (data[m4].compareTo(data[m5]) > 0) { + swap(data, m4, m5); + } + if (data[m1].compareTo(data[m4]) > 0) { + swap(data, m1, m4); } - if(data[start].compareTo(data[end - 1]) > 0) { - swap(data, start, end - 1); + if (data[m3].compareTo(data[m4]) > 0) { + swap(data, m3, m4); } - if(data[middle].compareTo(data[end - 1]) > 0) { - swap(data, middle, end - 1); + if (data[m2].compareTo(data[m5]) > 0) { + swap(data, m2, m5); + } + if (data[m2].compareTo(data[m3]) > 0) { + swap(data, m2, m3); + } + if (data[m4].compareTo(data[m5]) > 0) { + swap(data, m4, m5); } - // TODO: use more candidates for larger arrays? - final T pivot = data[middle]; - // Move middle element out of the way, just before end - // (Since we already know that "end" is bigger) - swap(data, middle, end - 2); + int best = bestPivot(rank, m1, m2, m3, m4, m5); + final T pivot = data[best]; + // Move middle element out of the way. + swap(data, best, end - 1); // Begin partitioning - int i = start + 1, j = end - 3; + int i = start, j = end - 2; // This is classic quicksort stuff - while(true) { - while(data[i].compareTo(pivot) <= 0 && i <= j) { + while (true) { + while (i <= j && data[i].compareTo(pivot) <= 0) { i++; } - while(data[j].compareTo(pivot) >= 0 && j >= i) { + while (j >= i && data[j].compareTo(pivot) >= 0) { j--; } - if(i >= j) { + if (i >= j) { break; } swap(data, i, j); } // Move pivot (former middle element) back into the appropriate place - swap(data, i, end - 2); + swap(data, i, end - 1); // In contrast to quicksort, we only need to recurse into the half we are // interested in. Instead of recursion we now use iteration. - if(rank < i) { + if (rank < i) { end = i; - } - else if(rank > i) { + } else if (rank > i) { start = i + 1; - } - else { + } else { break; } } // Loop until rank==i @@ -414,8 +748,8 @@ public class QuickSelect { * @param end Interval end */ private static <T extends Comparable<? super T>> void insertionSort(T[] data, int start, int end) { - for(int i = start + 1; i < end; i++) { - for(int j = i; j > start && data[j - 1].compareTo(data[j]) > 0; j--) { + for (int i = start + 1; i < end; i++) { + for (int j = i; j > start && data[j - 1].compareTo(data[j]) > 0; j--) { swap(data, j, j - 1); } } @@ -536,61 +870,86 @@ public class QuickSelect { * @param rank rank position we are interested in (starting at 0) */ public static <T extends Comparable<? super T>> void quickSelect(List<? extends T> data, int start, int end, int rank) { - while(true) { + while (true) { // Optimization for small arrays // This also ensures a minimum size below - if(start + SMALL > end) { + if (start + SMALL > end) { insertionSort(data, start, end); return; } - // Pick pivot from three candidates: start, middle, end - // Since we compare them, we can also just "bubble sort" them. - final int middle = (start + end) >> 1; - if(data.get(start).compareTo(data.get(middle)) > 0) { - swap(data, start, middle); + // Best of 5 pivot picking: + // Choose pivots by looking at five candidates. + final int len = end - start; + final int seventh = (len >> 3) + (len >> 6) + 1; + final int m3 = (start + end) >> 1; // middle + final int m2 = m3 - seventh; + final int m1 = m2 - seventh; + final int m4 = m3 + seventh; + final int m5 = m4 + seventh; + + // Explicit (and optimal) sorting network for 5 elements + // See Knuth for details. + if (data.get(m1).compareTo(data.get(m2)) > 0) { + swap(data, m1, m2); } - if(data.get(start).compareTo(data.get(end - 1)) > 0) { - swap(data, start, end - 1); + if (data.get(m1).compareTo(data.get(m3)) > 0) { + swap(data, m1, m3); } - if(data.get(middle).compareTo(data.get(end - 1)) > 0) { - swap(data, middle, end - 1); + if (data.get(m2).compareTo(data.get(m3)) > 0) { + swap(data, m2, m3); + } + if (data.get(m4).compareTo(data.get(m5)) > 0) { + swap(data, m4, m5); + } + if (data.get(m1).compareTo(data.get(m4)) > 0) { + swap(data, m1, m4); + } + if (data.get(m3).compareTo(data.get(m4)) > 0) { + swap(data, m3, m4); + } + if (data.get(m2).compareTo(data.get(m5)) > 0) { + swap(data, m2, m5); + } + if (data.get(m2).compareTo(data.get(m3)) > 0) { + swap(data, m2, m3); + } + if (data.get(m4).compareTo(data.get(m5)) > 0) { + swap(data, m4, m5); } - // TODO: use more candidates for larger arrays? - final T pivot = data.get(middle); + int best = bestPivot(rank, m1, m2, m3, m4, m5); + final T pivot = data.get(best); // Move middle element out of the way, just before end // (Since we already know that "end" is bigger) - swap(data, middle, end - 2); + swap(data, best, end - 1); // Begin partitioning - int i = start + 1, j = end - 3; + int i = start, j = end - 2; // This is classic quicksort stuff - while(true) { - while(data.get(i).compareTo(pivot) <= 0 && i <= j) { + while (true) { + while (i <= j && data.get(i).compareTo(pivot) <= 0) { i++; } - while(data.get(j).compareTo(pivot) >= 0 && j >= i) { + while (j >= i && data.get(j).compareTo(pivot) >= 0) { j--; } - if(i >= j) { + if (i >= j) { break; } swap(data, i, j); } // Move pivot (former middle element) back into the appropriate place - swap(data, i, end - 2); + swap(data, i, end - 1); // In contrast to quicksort, we only need to recurse into the half we are // interested in. Instead of recursion we now use iteration. - if(rank < i) { + if (rank < i) { end = i; - } - else if(rank > i) { + } else if (rank > i) { start = i + 1; - } - else { + } else { break; } } // Loop until rank==i @@ -605,8 +964,8 @@ public class QuickSelect { * @param end Interval end */ private static <T extends Comparable<? super T>> void insertionSort(List<T> data, int start, int end) { - for(int i = start + 1; i < end; i++) { - for(int j = i; j > start && data.get(j - 1).compareTo(data.get(j)) > 0; j--) { + for (int i = start + 1; i < end; i++) { + for (int j = i; j > start && data.get(j - 1).compareTo(data.get(j)) > 0; j--) { swap(data, j, j - 1); } } @@ -731,61 +1090,86 @@ public class QuickSelect { * @param rank rank position we are interested in (starting at 0) */ public static <T> void quickSelect(List<? extends T> data, Comparator<? super T> comparator, int start, int end, int rank) { - while(true) { + while (true) { // Optimization for small arrays // This also ensures a minimum size below - if(start + SMALL > end) { + if (start + SMALL > end) { insertionSort(data, comparator, start, end); return; } - // Pick pivot from three candidates: start, middle, end - // Since we compare them, we can also just "bubble sort" them. - final int middle = (start + end) >> 1; - if(comparator.compare(data.get(start), data.get(middle)) > 0) { - swap(data, start, middle); + // Best of 5 pivot picking: + // Choose pivots by looking at five candidates. + final int len = end - start; + final int seventh = (len >> 3) + (len >> 6) + 1; + final int m3 = (start + end) >> 1; // middle + final int m2 = m3 - seventh; + final int m1 = m2 - seventh; + final int m4 = m3 + seventh; + final int m5 = m4 + seventh; + + // Explicit (and optimal) sorting network for 5 elements + // See Knuth for details. + if (comparator.compare(data.get(m1), data.get(m2)) > 0) { + swap(data, m1, m2); + } + if (comparator.compare(data.get(m1), data.get(m3)) > 0) { + swap(data, m1, m3); + } + if (comparator.compare(data.get(m2), data.get(m3)) > 0) { + swap(data, m2, m3); + } + if (comparator.compare(data.get(m4), data.get(m5)) > 0) { + swap(data, m4, m5); } - if(comparator.compare(data.get(start), data.get(end - 1)) > 0) { - swap(data, start, end - 1); + if (comparator.compare(data.get(m1), data.get(m4)) > 0) { + swap(data, m1, m4); } - if(comparator.compare(data.get(middle), data.get(end - 1)) > 0) { - swap(data, middle, end - 1); + if (comparator.compare(data.get(m3), data.get(m4)) > 0) { + swap(data, m3, m4); + } + if (comparator.compare(data.get(m2), data.get(m5)) > 0) { + swap(data, m2, m5); + } + if (comparator.compare(data.get(m2), data.get(m3)) > 0) { + swap(data, m2, m3); + } + if (comparator.compare(data.get(m4), data.get(m5)) > 0) { + swap(data, m4, m5); } - // TODO: use more candidates for larger arrays? - final T pivot = data.get(middle); + int best = bestPivot(rank, m1, m2, m3, m4, m5); + final T pivot = data.get(best); // Move middle element out of the way, just before end // (Since we already know that "end" is bigger) - swap(data, middle, end - 2); + swap(data, best, end - 1); // Begin partitioning - int i = start + 1, j = end - 3; + int i = start, j = end - 2; // This is classic quicksort stuff - while(true) { - while(comparator.compare(data.get(i), pivot) <= 0 && i <= j) { + while (true) { + while (i <= j && comparator.compare(data.get(i), pivot) <= 0) { i++; } - while(comparator.compare(data.get(j), pivot) >= 0 && j >= i) { + while (j >= i && comparator.compare(data.get(j), pivot) >= 0) { j--; } - if(i >= j) { + if (i >= j) { break; } swap(data, i, j); } // Move pivot (former middle element) back into the appropriate place - swap(data, i, end - 2); + swap(data, i, end - 1); // In contrast to quicksort, we only need to recurse into the half we are // interested in. Instead of recursion we now use iteration. - if(rank < i) { + if (rank < i) { end = i; - } - else if(rank > i) { + } else if (rank > i) { start = i + 1; - } - else { + } else { break; } } // Loop until rank==i @@ -800,8 +1184,8 @@ public class QuickSelect { * @param end Interval end */ private static <T> void insertionSort(List<T> data, Comparator<? super T> comparator, int start, int end) { - for(int i = start + 1; i < end; i++) { - for(int j = i; j > start && comparator.compare(data.get(j - 1), data.get(j)) > 0; j--) { + for (int i = start + 1; i < end; i++) { + for (int j = i; j > start && comparator.compare(data.get(j - 1), data.get(j)) > 0; j--) { swap(data, j, j - 1); } } @@ -882,7 +1266,7 @@ public class QuickSelect { * @param data Data to process * @param comparator Comparator to use * @param begin Begin of valid values - * @param end End of valid values (inclusive!) + * @param end End of valid values (exclusive) * @param quant Quantile to compute * @return Value at quantile */ @@ -904,93 +1288,132 @@ public class QuickSelect { * @param data Data to process * @param comparator Comparator to use * @param start Interval start - * @param end Interval end (inclusive) + * @param end Interval end (exclusive) * @param rank rank position we are interested in (starting at 0) */ public static void quickSelect(ArrayModifiableDBIDs data, Comparator<? super DBIDRef> comparator, int start, int end, int rank) { - while(true) { + DBIDArrayIter refi = data.iter(), refj = data.iter(), pivot = data.iter(); + while (true) { // Optimization for small arrays // This also ensures a minimum size below - if(start + SMALL > end) { - insertionSort(data, comparator, start, end); + if (start + SMALL > end) { + insertionSort(data, comparator, start, end, refi, refj); return; } - // Pick pivot from three candidates: start, middle, end - // Since we compare them, we can also just "bubble sort" them. - final int middle = (start + end) >> 1; - if(comparator.compare(data.get(start), data.get(middle)) > 0) { - data.swap(start, middle); + // Best of 5 pivot picking: + // Choose pivots by looking at five candidates. + final int len = end - start; + final int seventh = (len >> 3) + (len >> 6) + 1; + final int m3 = (start + end) >> 1; // middle + final int m2 = m3 - seventh; + final int m1 = m2 - seventh; + final int m4 = m3 + seventh; + final int m5 = m4 + seventh; + + // Explicit (and optimal) sorting network for 5 elements + // See Knuth for details. + if (compare(refi, m1, refj, m2, comparator) > 0) { + data.swap(m1, m2); + } + if (compare(refi, m1, refj, m3, comparator) > 0) { + data.swap(m1, m3); + } + if (compare(refi, m2, refj, m3, comparator) > 0) { + data.swap(m2, m3); + } + if (compare(refi, m4, refj, m5, comparator) > 0) { + data.swap(m4, m5); } - if(comparator.compare(data.get(start), data.get(end - 1)) > 0) { - data.swap(start, end - 1); + if (compare(refi, m1, refj, m4, comparator) > 0) { + data.swap(m1, m4); } - if(comparator.compare(data.get(middle), data.get(end - 1)) > 0) { - data.swap(middle, end - 1); + if (compare(refi, m3, refj, m4, comparator) > 0) { + data.swap(m3, m4); + } + if (compare(refi, m2, refj, m5, comparator) > 0) { + data.swap(m2, m5); + } + if (compare(refi, m2, refj, m3, comparator) > 0) { + data.swap(m2, m3); + } + if (compare(refi, m4, refj, m5, comparator) > 0) { + data.swap(m4, m5); } - // TODO: use more candidates for larger arrays? - final DBID pivot = data.get(middle); - // Move middle element out of the way, just before end - // (Since we already know that "end" is bigger) - data.swap(middle, end - 2); + int best = bestPivot(rank, m1, m2, m3, m4, m5); + // Move middle element out of the way. + data.swap(best, end - 1); + pivot.seek(end - 1); // Begin partitioning - int i = start + 1, j = end - 3; - DBIDArrayIter refi = data.iter(), refj = data.iter(); + int i = start, j = end - 3; refi.seek(i); refj.seek(j); // This is classic quicksort stuff - while(true) { - while(comparator.compare(refi, pivot) <= 0 && i <= j) { + while (true) { + while (i <= j && comparator.compare(refi, pivot) <= 0) { i++; refi.advance(); } - while(comparator.compare(refj, pivot) >= 0 && j >= i) { + while (j >= i && comparator.compare(refj, pivot) >= 0) { j--; refj.retract(); } - if(i >= j) { + if (i >= j) { break; } data.swap(i, j); } // Move pivot (former middle element) back into the appropriate place - data.swap(i, end - 2); + data.swap(i, end - 1); // In contrast to quicksort, we only need to recurse into the half we are // interested in. Instead of recursion we now use iteration. - if(rank < i) { + if (rank < i) { end = i; - } - else if(rank > i) { + } else if (rank > i) { start = i + 1; - } - else { + } else { break; } } // Loop until rank==i } /** + * Compare two elements. + * + * @param i1 First scratch variable + * @param p1 Value for first + * @param i2 Second scratch variable + * @param p2 Value for second + * @param comp Comparator + * @return Comparison result + */ + private static int compare(DBIDArrayIter i1, int p1, DBIDArrayIter i2, int p2, Comparator<? super DBIDRef> comp) { + i1.seek(p1); + i2.seek(p2); + return comp.compare(i1, i2); + } + + /** * Sort a small array using repetitive insertion sort. * * @param data Data to sort * @param start Interval start * @param end Interval end */ - private static void insertionSort(ArrayModifiableDBIDs data, Comparator<? super DBIDRef> comparator, int start, int end) { - DBIDArrayIter iter1 = data.iter(), iter2 = data.iter(); - for(int i = start + 1; i < end; i++) { - iter1.seek(i - 1); - iter2.seek(i); - for(int j = i; j > start; j--, iter1.retract(), iter2.retract()) { - if(comparator.compare(iter1, iter2) > 0) { + private static void insertionSort(ArrayModifiableDBIDs data, Comparator<? super DBIDRef> comparator, int start, int end, DBIDArrayIter iter1, DBIDArrayIter iter2) { + for (int i = start + 1; i < end; i++) { + for (int j = i; j > start; j--) { + iter1.seek(j - 1); + iter2.seek(j); + if (comparator.compare(iter1, iter2) <= 0) { break; } data.swap(j, j - 1); } } } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ArrayAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ArrayAdapter.java index 969d068d..2c5eeed1 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ArrayAdapter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ArrayAdapter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ArrayDBIDsAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ArrayDBIDsAdapter.java index 5b1b92b5..da831471 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ArrayDBIDsAdapter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ArrayDBIDsAdapter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ArrayLikeUtil.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ArrayLikeUtil.java index a08feb1a..8fab6f2b 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ArrayLikeUtil.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ArrayLikeUtil.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -27,6 +27,7 @@ import java.util.List; import de.lmu.ifi.dbs.elki.data.FeatureVector; import de.lmu.ifi.dbs.elki.data.NumberVector; +import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector; /** * Utility class that allows plug-in use of various "array-like" types such as @@ -42,17 +43,17 @@ public final class ArrayLikeUtil { /** * Static instance for lists. */ - private static final ListArrayAdapter<Object> LISTADAPTER = new ListArrayAdapter<Object>(); + private static final ListArrayAdapter<Object> LISTADAPTER = new ListArrayAdapter<>(); /** * Static instance for lists of numbers. */ - private static final NumberListArrayAdapter<Number> NUMBERLISTADAPTER = new NumberListArrayAdapter<Number>(); + private static final NumberListArrayAdapter<Number> NUMBERLISTADAPTER = new NumberListArrayAdapter<>(); /** * Static instance. */ - private static final IdentityArrayAdapter<?> IDENTITYADAPTER = new IdentityArrayAdapter<Object>(); + private static final IdentityArrayAdapter<?> IDENTITYADAPTER = new IdentityArrayAdapter<>(); /** * Static instance. @@ -83,7 +84,12 @@ public final class ArrayLikeUtil { * Use ArrayDBIDs as array. */ public static final ArrayDBIDsAdapter ARRAYDBIDADAPTER = new ArrayDBIDsAdapter(); - + + /** + * Adapter for vectors. + */ + public static final NumberArrayAdapter<Double, Vector> VECTORADAPTER = new VectorAdapter(); + /** * Fake constructor. Do not instantiate! */ @@ -169,9 +175,9 @@ public final class ArrayLikeUtil { final int size = adapter.size(array); int index = 0; double max = adapter.getDouble(array, 0); - for(int i = 1; i < size; i++) { + for (int i = 1; i < size; i++) { double val = adapter.getDouble(array, i); - if(val > max) { + if (val > max) { max = val; index = i; } @@ -199,8 +205,11 @@ public final class ArrayLikeUtil { * @return primitive double array */ public static <A> double[] toPrimitiveDoubleArray(A array, NumberArrayAdapter<?, ? super A> adapter) { + if (adapter == DOUBLEARRAYADAPTER) { + return ((double[]) array).clone(); + } double[] ret = new double[adapter.size(array)]; - for(int i = 0; i < ret.length; i++) { + for (int i = 0; i < ret.length; i++) { ret[i] = adapter.getDouble(array, i); } return ret; @@ -234,8 +243,11 @@ public final class ArrayLikeUtil { * @return primitive float array */ public static <A> float[] toPrimitiveFloatArray(A array, NumberArrayAdapter<?, ? super A> adapter) { + if (adapter == FLOATARRAYADAPTER) { + return ((float[]) array).clone(); + } float[] ret = new float[adapter.size(array)]; - for(int i = 0; i < ret.length; i++) { + for (int i = 0; i < ret.length; i++) { ret[i] = adapter.getFloat(array, i); } return ret; @@ -260,4 +272,39 @@ public final class ArrayLikeUtil { public static <N extends Number> float[] toPrimitiveFloatArray(NumberVector<N> obj) { return toPrimitiveFloatArray(obj, numberVectorAdapter(obj)); } -}
\ No newline at end of file + + /** + * Convert a numeric array-like to a <code>int[]</code>. + * + * @param array Array-like + * @param adapter Adapter + * @return primitive double array + */ + public static <A> int[] toPrimitiveIntegerArray(A array, NumberArrayAdapter<?, ? super A> adapter) { + int[] ret = new int[adapter.size(array)]; + for (int i = 0; i < ret.length; i++) { + ret[i] = adapter.getInteger(array, i); + } + return ret; + } + + /** + * Convert a list of numbers to <code>int[]</code>. + * + * @param array List of numbers + * @return double array + */ + public static int[] toPrimitiveIntegerArray(List<? extends Number> array) { + return toPrimitiveIntegerArray(array, NUMBERLISTADAPTER); + } + + /** + * Convert a number vector to <code>int[]</code>. + * + * @param obj Object to convert + * @return primitive double array + */ + public static <N extends Number> int[] toPrimitiveIntegerArray(NumberVector<N> obj) { + return toPrimitiveIntegerArray(obj, numberVectorAdapter(obj)); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/DoubleArrayAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/DoubleArrayAdapter.java index 117f3845..0e31a61a 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/DoubleArrayAdapter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/DoubleArrayAdapter.java @@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ExtendedArray.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ExtendedArray.java index 491c4f95..3af14982 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ExtendedArray.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ExtendedArray.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -91,6 +91,6 @@ public class ExtendedArray<T> implements ArrayAdapter<T, ExtendedArray<T>> { */ @SuppressWarnings("unchecked") public static <T, A> ExtendedArray<T> extend(A array, ArrayAdapter<T, A> getter, T extra) { - return new ExtendedArray<T>(array, (ArrayAdapter<T, Object>) getter, extra); + return new ExtendedArray<>(array, (ArrayAdapter<T, Object>) getter, extra); } }
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/FeatureVectorAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/FeatureVectorAdapter.java index 38b662e8..deb5aafc 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/FeatureVectorAdapter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/FeatureVectorAdapter.java @@ -6,7 +6,7 @@ import de.lmu.ifi.dbs.elki.data.FeatureVector; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/FloatArrayAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/FloatArrayAdapter.java index ae501039..831dc929 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/FloatArrayAdapter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/FloatArrayAdapter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/IdentityArrayAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/IdentityArrayAdapter.java index 0c6e03dd..dfde46b7 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/IdentityArrayAdapter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/IdentityArrayAdapter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ListArrayAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ListArrayAdapter.java index cba1e706..729dfab8 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ListArrayAdapter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ListArrayAdapter.java @@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/NumberArrayAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/NumberArrayAdapter.java index 1dc823b1..5ebbcb0d 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/NumberArrayAdapter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/NumberArrayAdapter.java @@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/NumberListArrayAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/NumberListArrayAdapter.java index 89a4e3d6..a2606347 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/NumberListArrayAdapter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/NumberListArrayAdapter.java @@ -6,7 +6,7 @@ import java.util.List; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/NumberVectorAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/NumberVectorAdapter.java index fd1e6636..5e674026 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/NumberVectorAdapter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/NumberVectorAdapter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/SingleSubsetArrayAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/SingleSubsetArrayAdapter.java index d7483e4d..941c6245 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/SingleSubsetArrayAdapter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/SingleSubsetArrayAdapter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/SubsetArrayAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/SubsetArrayAdapter.java index c607759f..746647cc 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/SubsetArrayAdapter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/SubsetArrayAdapter.java @@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/SubsetNumberArrayAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/SubsetNumberArrayAdapter.java index 6719b60e..c394f9b7 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/SubsetNumberArrayAdapter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/SubsetNumberArrayAdapter.java @@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/TDoubleListAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/TDoubleListAdapter.java index cee393ac..a52ff15e 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/TDoubleListAdapter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/TDoubleListAdapter.java @@ -6,7 +6,7 @@ import gnu.trove.list.TDoubleList; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/VectorAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/VectorAdapter.java new file mode 100644 index 00000000..0bb979e9 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/VectorAdapter.java @@ -0,0 +1,85 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector; + +/** + * Adapter to use a feature vector as an array of features. + * + * Use the static instance from {@link ArrayLikeUtil}! + * + * @author Erich Schubert + */ +public class VectorAdapter implements NumberArrayAdapter<Double, Vector> { + /** + * Constructor. + * + * Use the static instance from {@link ArrayLikeUtil}! + */ + protected VectorAdapter() { + super(); + } + + @Override + public int size(Vector array) { + return array.getDimensionality(); + } + + @Override + @Deprecated + public Double get(Vector array, int off) throws IndexOutOfBoundsException { + return array.getValue(off + 1); + } + + @Override + public double getDouble(Vector array, int off) throws IndexOutOfBoundsException { + return array.doubleValue(off); + } + + @Override + public float getFloat(Vector array, int off) throws IndexOutOfBoundsException { + return array.floatValue(off); + } + + @Override + public int getInteger(Vector array, int off) throws IndexOutOfBoundsException { + return array.intValue(off); + } + + @Override + public short getShort(Vector array, int off) throws IndexOutOfBoundsException { + return array.shortValue(off); + } + + @Override + public long getLong(Vector array, int off) throws IndexOutOfBoundsException { + return array.longValue(off); + } + + @Override + public byte getByte(Vector array, int off) throws IndexOutOfBoundsException { + return array.byteValue(off); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/package-info.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/package-info.java index 55627df4..33058cf4 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/package-info.java @@ -5,7 +5,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/IntegerArrayQuickSort.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/IntegerArrayQuickSort.java index a0c93997..eaf47738 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/IntegerArrayQuickSort.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/IntegerArrayQuickSort.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.arrays; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/IntegerComparator.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/IntegerComparator.java index 51164425..0ccd47db 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/IntegerComparator.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/IntegerComparator.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.arrays; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/package-info.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/package-info.java index 3db78032..874a6d44 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/package-info.java @@ -5,7 +5,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2012 +Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/AbstractHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/AbstractHeap.java deleted file mode 100644 index b6f098e6..00000000 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/AbstractHeap.java +++ /dev/null @@ -1,103 +0,0 @@ -package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; - -/* - This file is part of ELKI: - Environment for Developing KDD-Applications Supported by Index-Structures - - Copyright (C) 2012 - Ludwig-Maximilians-Universität München - Lehr- und Forschungseinheit für Datenbanksysteme - ELKI Development Team - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -/** - * Abstract base class for heaps. - * - * @author Erich Schubert - */ -public class AbstractHeap { - /** - * Default initial capacity - */ - public static final int DEFAULT_INITIAL_CAPACITY = 11; - - /** - * Current number of objects - */ - public int size = 0; - - /** - * Indicate up to where the heap is valid - */ - public int validSize = 0; - - /** - * (Structural) modification counter. Used to invalidate iterators. - */ - public int modCount = 0; - - /** - * Constructor. - */ - public AbstractHeap() { - super(); - } - - /** - * Query the size - * - * @return Size - */ - public int size() { - return this.size; - } - - /** - * Delete all elements from the heap. - */ - public void clear() { - this.size = 0; - this.validSize = -1; - heapModified(); - } - - /** - * Test whether we need to resize to have the requested capacity. - * - * @param requiredSize required capacity - * @param capacity Current capacity - * @return new capacity - */ - protected final int desiredSize(int requiredSize, int capacity) { - // Double until 64, then increase by 50% each time. - int newCapacity = ((capacity < 64) ? ((capacity + 1) * 2) : ((capacity / 2) * 3)); - // overflow? - if (newCapacity < 0) { - throw new OutOfMemoryError(); - } - if (requiredSize > newCapacity) { - newCapacity = requiredSize; - } - return newCapacity; - } - - /** - * Called at the end of each heap modification. - */ - protected void heapModified() { - modCount++; - } -} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMaxHeap.java index ab8ef1bb..222fe83a 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMaxHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMaxHeap.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,41 +23,409 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; along with this program. If not, see <http://www.gnu.org/licenses/>. */ +import java.util.Arrays; +import java.util.ConcurrentModificationException; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + /** - * Basic in-memory heap structure. + * Advanced priority queue class, based on a binary heap (for small sizes), + * which will for larger heaps be accompanied by a 4-ary heap (attached below + * the root of the two-ary heap, making the root actually 3-ary). + * + * This code was automatically instantiated for the type: Comparable + * + * This combination was found to work quite well in benchmarks, but YMMV. * - * This heap is built lazily: if you first add many elements, then poll the - * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log - * n). This is implemented via a simple validTo counter. + * Some other observations from benchmarking: + * <ul> + * <li>Bulk loading did not improve things</li> + * <li>Primitive heaps are substantially faster.</li> + * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and + * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> + * <li>Workload makes a huge difference. A load-once, poll-until-empty priority + * queue is something different than e.g. a top-k heap, which will see a lot of + * top element replacements.</li> + * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for + * top-k make a difference.</li> + * <li>Different day, different benchmark results ...</li> + * </ul> * * @author Erich Schubert + * + * @apiviz.has UnsortedIter + * @param <K> Key type */ -public class ComparableMaxHeap<K extends Comparable<K>> extends ObjectHeap<K> { +public class ComparableMaxHeap<K extends Comparable<? super K>> implements ObjectHeap<K> { + /** + * Base heap. + */ + protected Comparable<Object>[] twoheap; + + /** + * Extension heap. + */ + protected Comparable<Object>[] fourheap; + + /** + * Current size of heap. + */ + protected int size; + + /** + * (Structural) modification counter. Used to invalidate iterators. + */ + protected int modCount = 0; + + /** + * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. + */ + private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; + + /** + * Initial size of the 2-ary heap. + */ + private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; + + /** + * Initial size of 4-ary heap when initialized. + * + * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 + * + * 85 = 4-ary heap of height 3: 21 + 4*4*4 + * + * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 + * + * Since we last grew by 255 (to 511), let's use 341. + */ + private final static int FOUR_HEAP_INITIAL_SIZE = 341; + /** - * Constructor with default capacity. + * Constructor, with default size. */ + @SuppressWarnings("unchecked") public ComparableMaxHeap() { - super(DEFAULT_INITIAL_CAPACITY); + super(); + Comparable<Object>[] twoheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, TWO_HEAP_INITIAL_SIZE); + + this.twoheap = twoheap; + this.fourheap = null; + this.size = 0; + this.modCount = 0; } /** - * Constructor with initial capacity. + * Constructor, with given minimum size. * - * @param size initial capacity + * @param minsize Minimum size */ - public ComparableMaxHeap(int size) { - super(size); + @SuppressWarnings("unchecked") + public ComparableMaxHeap(int minsize) { + super(); + if (minsize < TWO_HEAP_MAX_SIZE) { + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + Comparable<Object>[] twoheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, size); + + this.twoheap = twoheap; + this.fourheap = null; + } else { + Comparable<Object>[] twoheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, TWO_HEAP_INITIAL_SIZE); + Comparable<Object>[] fourheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, minsize - TWO_HEAP_MAX_SIZE); + this.twoheap = twoheap; + this.fourheap = fourheap; + } + this.size = 0; + this.modCount = 0; + } + + @Override + public void clear() { + size = 0; + ++modCount; + fourheap = null; + Arrays.fill(twoheap, null); + } + + @Override + public int size() { + return size; + } + + @Override + public boolean isEmpty() { + return (size == 0); + } + + @Override + @SuppressWarnings("unchecked") + public void add(K o) { + final Comparable<Object> co = (Comparable<Object>)o; + // System.err.println("Add: " + o); + if (size < TWO_HEAP_MAX_SIZE) { + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); + } + final int twopos = size; + twoheap[twopos] = co; + ++size; + heapifyUp2(twopos, co); + ++modCount; + } else { + final int fourpos = size - TWO_HEAP_MAX_SIZE; + if (fourheap == null) { + fourheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, FOUR_HEAP_INITIAL_SIZE); + } else if (fourpos >= fourheap.length) { + // Grow extension heap by half. + fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); + } + fourheap[fourpos] = co; + ++size; + heapifyUp4(fourpos, co); + ++modCount; + } + } + + @Override + public void add(K key, int max) { + if (size < max) { + add(key); + } else if (twoheap[0].compareTo(key) >= 0) { + replaceTopElement(key); + } + } + + @Override + @SuppressWarnings("unchecked") + public K replaceTopElement(K reinsert) { + final Comparable<Object> ret = twoheap[0]; + heapifyDown((Comparable<Object>) reinsert); + ++modCount; + return (K)ret; } /** - * Compare two objects + * Heapify-Up method for 2-ary heap. * - * @param o1 First object - * @param o2 Second object + * @param twopos Position in 2-ary heap. + * @param cur Current object */ + private void heapifyUp2(int twopos, Comparable<Object> cur) { + while (twopos > 0) { + final int parent = (twopos - 1) >>> 1; + Comparable<Object> par = twoheap[parent]; + if (cur.compareTo(par) <= 0) { + break; + } + twoheap[twopos] = par; + twopos = parent; + } + twoheap[twopos] = cur; + } + + /** + * Heapify-Up method for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + */ + private void heapifyUp4(int fourpos, Comparable<Object> cur) { + while (fourpos > 0) { + final int parent = (fourpos - 1) >> 2; + Comparable<Object> par = fourheap[parent]; + if (cur.compareTo(par) <= 0) { + break; + } + fourheap[fourpos] = par; + fourpos = parent; + } + if (fourpos == 0 && twoheap[0].compareTo(cur) < 0) { + fourheap[0] = twoheap[0]; + twoheap[0] = cur; + } else { + fourheap[fourpos] = cur; + } + } + @Override @SuppressWarnings("unchecked") - protected boolean comp(Object o1, Object o2) { - return ((K) o1).compareTo((K) o2) < 0; + public K poll() { + final Comparable<Object> ret = twoheap[0]; + --size; + // Replacement object: + if (size >= TWO_HEAP_MAX_SIZE) { + final int last = size - TWO_HEAP_MAX_SIZE; + final Comparable<Object> reinsert = fourheap[last]; + fourheap[last] = null; + heapifyDown(reinsert); + } else if (size > 0) { + final Comparable<Object> reinsert = twoheap[size]; + twoheap[size] = null; + heapifyDown(reinsert); + } else { + twoheap[0] = null; + } + ++modCount; + return (K)ret; + } + + /** + * Invoke heapify-down for the root object. + * + * @param reinsert Object to insert. + */ + private void heapifyDown(Comparable<Object> reinsert) { + if (size > TWO_HEAP_MAX_SIZE) { + // Special case: 3-ary situation. + final int best = (twoheap[1].compareTo(twoheap[2]) >= 0) ? 1 : 2; + if (fourheap[0].compareTo(twoheap[best]) > 0) { + twoheap[0] = fourheap[0]; + heapifyDown4(0, reinsert); + } else { + twoheap[0] = twoheap[best]; + heapifyDown2(best, reinsert); + } + return; + } + heapifyDown2(0, reinsert); + } + + /** + * Heapify-Down for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + */ + private void heapifyDown2(int twopos, Comparable<Object> cur) { + final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + while (twopos < stop) { + int bestchild = (twopos << 1) + 1; + Comparable<Object> best = twoheap[bestchild]; + final int right = bestchild + 1; + if (right < size && best.compareTo(twoheap[right]) < 0) { + bestchild = right; + best = twoheap[right]; + } + if (cur.compareTo(best) >= 0) { + break; + } + twoheap[twopos] = best; + twopos = bestchild; + } + twoheap[twopos] = cur; + } + + /** + * Heapify-Down for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + */ + private void heapifyDown4(int fourpos, Comparable<Object> cur) { + final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; + while (fourpos < stop) { + final int child = (fourpos << 2) + 1; + Comparable<Object> best = fourheap[child]; + int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; + if (size > minsize) { + Comparable<Object> nextchild = fourheap[candidate]; + if (best.compareTo(nextchild) < 0) { + bestchild = candidate; + best = nextchild; + } + + minsize += 2; + if (size >= minsize) { + nextchild = fourheap[++candidate]; + if (best.compareTo(nextchild) < 0) { + bestchild = candidate; + best = nextchild; + } + + if (size > minsize) { + nextchild = fourheap[++candidate]; + if (best.compareTo(nextchild) < 0) { + bestchild = candidate; + best = nextchild; + } + } + } + } + if (cur.compareTo(best) >= 0) { + break; + } + fourheap[fourpos] = best; + fourpos = bestchild; + } + fourheap[fourpos] = cur; + } + + @Override + @SuppressWarnings("unchecked") + public K peek() { + return (K)twoheap[0]; + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append(ComparableMaxHeap.class.getSimpleName()).append(" ["); + for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) { + buf.append(iter.get()).append(','); + } + buf.append(']'); + return buf.toString(); + } + + @Override + public UnsortedIter unsortedIter() { + return new UnsortedIter(); + } + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * Use this class as follows: + * + * <pre> + * {@code + * for (ObjectHeap.UnsortedIter<K> iter = heap.unsortedIter(); iter.valid(); iter.next()) { + * doSomething(iter.get()); + * } + * } + * </pre> + * + * @author Erich Schubert + */ + private class UnsortedIter implements ObjectHeap.UnsortedIter<K> { + /** + * Iterator position. + */ + protected int pos = 0; + + /** + * Modification counter we were initialized at. + */ + protected final int myModCount = modCount; + + @Override + public boolean valid() { + if (modCount != myModCount) { + throw new ConcurrentModificationException(); + } + return pos < size; + } + + @Override + public void advance() { + pos++; + } + + @SuppressWarnings("unchecked") + + @Override + public K get() { + return (K)((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + } } } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMinHeap.java index 06d2cb32..3cc5a02f 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMinHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMinHeap.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,41 +23,409 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; along with this program. If not, see <http://www.gnu.org/licenses/>. */ +import java.util.Arrays; +import java.util.ConcurrentModificationException; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + /** - * Basic in-memory heap structure. + * Advanced priority queue class, based on a binary heap (for small sizes), + * which will for larger heaps be accompanied by a 4-ary heap (attached below + * the root of the two-ary heap, making the root actually 3-ary). + * + * This code was automatically instantiated for the type: Comparable + * + * This combination was found to work quite well in benchmarks, but YMMV. * - * This heap is built lazily: if you first add many elements, then poll the - * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log - * n). This is implemented via a simple validTo counter. + * Some other observations from benchmarking: + * <ul> + * <li>Bulk loading did not improve things</li> + * <li>Primitive heaps are substantially faster.</li> + * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and + * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> + * <li>Workload makes a huge difference. A load-once, poll-until-empty priority + * queue is something different than e.g. a top-k heap, which will see a lot of + * top element replacements.</li> + * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for + * top-k make a difference.</li> + * <li>Different day, different benchmark results ...</li> + * </ul> * * @author Erich Schubert + * + * @apiviz.has UnsortedIter + * @param <K> Key type */ -public class ComparableMinHeap<K extends Comparable<K>> extends ObjectHeap<K> { +public class ComparableMinHeap<K extends Comparable<? super K>> implements ObjectHeap<K> { + /** + * Base heap. + */ + protected Comparable<Object>[] twoheap; + + /** + * Extension heap. + */ + protected Comparable<Object>[] fourheap; + + /** + * Current size of heap. + */ + protected int size; + + /** + * (Structural) modification counter. Used to invalidate iterators. + */ + protected int modCount = 0; + + /** + * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. + */ + private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; + + /** + * Initial size of the 2-ary heap. + */ + private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; + + /** + * Initial size of 4-ary heap when initialized. + * + * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 + * + * 85 = 4-ary heap of height 3: 21 + 4*4*4 + * + * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 + * + * Since we last grew by 255 (to 511), let's use 341. + */ + private final static int FOUR_HEAP_INITIAL_SIZE = 341; + /** - * Constructor with default capacity. + * Constructor, with default size. */ + @SuppressWarnings("unchecked") public ComparableMinHeap() { - super(DEFAULT_INITIAL_CAPACITY); + super(); + Comparable<Object>[] twoheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, TWO_HEAP_INITIAL_SIZE); + + this.twoheap = twoheap; + this.fourheap = null; + this.size = 0; + this.modCount = 0; } /** - * Constructor with initial capacity. + * Constructor, with given minimum size. * - * @param size initial capacity + * @param minsize Minimum size */ - public ComparableMinHeap(int size) { - super(size); + @SuppressWarnings("unchecked") + public ComparableMinHeap(int minsize) { + super(); + if (minsize < TWO_HEAP_MAX_SIZE) { + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + Comparable<Object>[] twoheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, size); + + this.twoheap = twoheap; + this.fourheap = null; + } else { + Comparable<Object>[] twoheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, TWO_HEAP_INITIAL_SIZE); + Comparable<Object>[] fourheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, minsize - TWO_HEAP_MAX_SIZE); + this.twoheap = twoheap; + this.fourheap = fourheap; + } + this.size = 0; + this.modCount = 0; + } + + @Override + public void clear() { + size = 0; + ++modCount; + fourheap = null; + Arrays.fill(twoheap, null); + } + + @Override + public int size() { + return size; + } + + @Override + public boolean isEmpty() { + return (size == 0); + } + + @Override + @SuppressWarnings("unchecked") + public void add(K o) { + final Comparable<Object> co = (Comparable<Object>)o; + // System.err.println("Add: " + o); + if (size < TWO_HEAP_MAX_SIZE) { + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); + } + final int twopos = size; + twoheap[twopos] = co; + ++size; + heapifyUp2(twopos, co); + ++modCount; + } else { + final int fourpos = size - TWO_HEAP_MAX_SIZE; + if (fourheap == null) { + fourheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, FOUR_HEAP_INITIAL_SIZE); + } else if (fourpos >= fourheap.length) { + // Grow extension heap by half. + fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); + } + fourheap[fourpos] = co; + ++size; + heapifyUp4(fourpos, co); + ++modCount; + } + } + + @Override + public void add(K key, int max) { + if (size < max) { + add(key); + } else if (twoheap[0].compareTo(key) <= 0) { + replaceTopElement(key); + } + } + + @Override + @SuppressWarnings("unchecked") + public K replaceTopElement(K reinsert) { + final Comparable<Object> ret = twoheap[0]; + heapifyDown((Comparable<Object>) reinsert); + ++modCount; + return (K)ret; } /** - * Compare two objects + * Heapify-Up method for 2-ary heap. * - * @param o1 First object - * @param o2 Second object + * @param twopos Position in 2-ary heap. + * @param cur Current object */ + private void heapifyUp2(int twopos, Comparable<Object> cur) { + while (twopos > 0) { + final int parent = (twopos - 1) >>> 1; + Comparable<Object> par = twoheap[parent]; + if (cur.compareTo(par) >= 0) { + break; + } + twoheap[twopos] = par; + twopos = parent; + } + twoheap[twopos] = cur; + } + + /** + * Heapify-Up method for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + */ + private void heapifyUp4(int fourpos, Comparable<Object> cur) { + while (fourpos > 0) { + final int parent = (fourpos - 1) >> 2; + Comparable<Object> par = fourheap[parent]; + if (cur.compareTo(par) >= 0) { + break; + } + fourheap[fourpos] = par; + fourpos = parent; + } + if (fourpos == 0 && twoheap[0].compareTo(cur) > 0) { + fourheap[0] = twoheap[0]; + twoheap[0] = cur; + } else { + fourheap[fourpos] = cur; + } + } + @Override @SuppressWarnings("unchecked") - protected boolean comp(Object o1, Object o2) { - return ((K) o1).compareTo((K) o2) > 0; + public K poll() { + final Comparable<Object> ret = twoheap[0]; + --size; + // Replacement object: + if (size >= TWO_HEAP_MAX_SIZE) { + final int last = size - TWO_HEAP_MAX_SIZE; + final Comparable<Object> reinsert = fourheap[last]; + fourheap[last] = null; + heapifyDown(reinsert); + } else if (size > 0) { + final Comparable<Object> reinsert = twoheap[size]; + twoheap[size] = null; + heapifyDown(reinsert); + } else { + twoheap[0] = null; + } + ++modCount; + return (K)ret; + } + + /** + * Invoke heapify-down for the root object. + * + * @param reinsert Object to insert. + */ + private void heapifyDown(Comparable<Object> reinsert) { + if (size > TWO_HEAP_MAX_SIZE) { + // Special case: 3-ary situation. + final int best = (twoheap[1].compareTo(twoheap[2]) <= 0) ? 1 : 2; + if (fourheap[0].compareTo(twoheap[best]) < 0) { + twoheap[0] = fourheap[0]; + heapifyDown4(0, reinsert); + } else { + twoheap[0] = twoheap[best]; + heapifyDown2(best, reinsert); + } + return; + } + heapifyDown2(0, reinsert); + } + + /** + * Heapify-Down for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + */ + private void heapifyDown2(int twopos, Comparable<Object> cur) { + final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + while (twopos < stop) { + int bestchild = (twopos << 1) + 1; + Comparable<Object> best = twoheap[bestchild]; + final int right = bestchild + 1; + if (right < size && best.compareTo(twoheap[right]) > 0) { + bestchild = right; + best = twoheap[right]; + } + if (cur.compareTo(best) <= 0) { + break; + } + twoheap[twopos] = best; + twopos = bestchild; + } + twoheap[twopos] = cur; + } + + /** + * Heapify-Down for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + */ + private void heapifyDown4(int fourpos, Comparable<Object> cur) { + final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; + while (fourpos < stop) { + final int child = (fourpos << 2) + 1; + Comparable<Object> best = fourheap[child]; + int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; + if (size > minsize) { + Comparable<Object> nextchild = fourheap[candidate]; + if (best.compareTo(nextchild) > 0) { + bestchild = candidate; + best = nextchild; + } + + minsize += 2; + if (size >= minsize) { + nextchild = fourheap[++candidate]; + if (best.compareTo(nextchild) > 0) { + bestchild = candidate; + best = nextchild; + } + + if (size > minsize) { + nextchild = fourheap[++candidate]; + if (best.compareTo(nextchild) > 0) { + bestchild = candidate; + best = nextchild; + } + } + } + } + if (cur.compareTo(best) <= 0) { + break; + } + fourheap[fourpos] = best; + fourpos = bestchild; + } + fourheap[fourpos] = cur; + } + + @Override + @SuppressWarnings("unchecked") + public K peek() { + return (K)twoheap[0]; + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append(ComparableMinHeap.class.getSimpleName()).append(" ["); + for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) { + buf.append(iter.get()).append(','); + } + buf.append(']'); + return buf.toString(); + } + + @Override + public UnsortedIter unsortedIter() { + return new UnsortedIter(); + } + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * Use this class as follows: + * + * <pre> + * {@code + * for (ObjectHeap.UnsortedIter<K> iter = heap.unsortedIter(); iter.valid(); iter.next()) { + * doSomething(iter.get()); + * } + * } + * </pre> + * + * @author Erich Schubert + */ + private class UnsortedIter implements ObjectHeap.UnsortedIter<K> { + /** + * Iterator position. + */ + protected int pos = 0; + + /** + * Modification counter we were initialized at. + */ + protected final int myModCount = modCount; + + @Override + public boolean valid() { + if (modCount != myModCount) { + throw new ConcurrentModificationException(); + } + return pos < size; + } + + @Override + public void advance() { + pos++; + } + + @SuppressWarnings("unchecked") + + @Override + public K get() { + return (K)((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + } } } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparatorMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparatorMaxHeap.java new file mode 100644 index 00000000..7b660d31 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparatorMaxHeap.java @@ -0,0 +1,440 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Arrays; +import java.util.ConcurrentModificationException; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + +/** + * Advanced priority queue class, based on a binary heap (for small sizes), + * which will for larger heaps be accompanied by a 4-ary heap (attached below + * the root of the two-ary heap, making the root actually 3-ary). + * + * This code was automatically instantiated for the type: Comparator + * + * This combination was found to work quite well in benchmarks, but YMMV. + * + * Some other observations from benchmarking: + * <ul> + * <li>Bulk loading did not improve things</li> + * <li>Primitive heaps are substantially faster.</li> + * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and + * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> + * <li>Workload makes a huge difference. A load-once, poll-until-empty priority + * queue is something different than e.g. a top-k heap, which will see a lot of + * top element replacements.</li> + * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for + * top-k make a difference.</li> + * <li>Different day, different benchmark results ...</li> + * </ul> + * + * @author Erich Schubert + * + * @apiviz.has UnsortedIter + * @param <K> Key type + */ +public class ComparatorMaxHeap<K> implements ObjectHeap<K> { + /** + * Base heap. + */ + protected Object[] twoheap; + + /** + * Extension heap. + */ + protected Object[] fourheap; + + /** + * Current size of heap. + */ + protected int size; + + /** + * (Structural) modification counter. Used to invalidate iterators. + */ + protected int modCount = 0; + + /** + * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. + */ + private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; + + /** + * Initial size of the 2-ary heap. + */ + private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; + + /** + * Initial size of 4-ary heap when initialized. + * + * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 + * + * 85 = 4-ary heap of height 3: 21 + 4*4*4 + * + * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 + * + * Since we last grew by 255 (to 511), let's use 341. + */ + private final static int FOUR_HEAP_INITIAL_SIZE = 341; + + + /** + * Comparator + */ + protected java.util.Comparator<Object> comparator; + + /** + * Constructor, with default size. + * @param comparator Comparator + */ + @SuppressWarnings("unchecked") + public ComparatorMaxHeap(java.util.Comparator<? super K> comparator) { + super(); + this.comparator = (java.util.Comparator<Object>) java.util.Comparator.class.cast(comparator); + Object[] twoheap = new Object[TWO_HEAP_INITIAL_SIZE]; + + this.twoheap = twoheap; + this.fourheap = null; + this.size = 0; + this.modCount = 0; + } + + /** + * Constructor, with given minimum size. + * + * @param minsize Minimum size + * @param comparator Comparator + */ + @SuppressWarnings("unchecked") + public ComparatorMaxHeap(int minsize, java.util.Comparator<? super K> comparator) { + super(); + this.comparator = (java.util.Comparator<Object>) java.util.Comparator.class.cast(comparator); + if (minsize < TWO_HEAP_MAX_SIZE) { + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + Object[] twoheap = new Object[size]; + + this.twoheap = twoheap; + this.fourheap = null; + } else { + Object[] twoheap = new Object[TWO_HEAP_INITIAL_SIZE]; + Object[] fourheap = new Object[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + this.twoheap = twoheap; + this.fourheap = fourheap; + } + this.size = 0; + this.modCount = 0; + } + + @Override + public void clear() { + size = 0; + ++modCount; + fourheap = null; + Arrays.fill(twoheap, null); + } + + @Override + public int size() { + return size; + } + + @Override + public boolean isEmpty() { + return (size == 0); + } + + @Override + public void add(K o) { + final Object co = o; + // System.err.println("Add: " + o); + if (size < TWO_HEAP_MAX_SIZE) { + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); + } + final int twopos = size; + twoheap[twopos] = co; + ++size; + heapifyUp2(twopos, co); + ++modCount; + } else { + final int fourpos = size - TWO_HEAP_MAX_SIZE; + if (fourheap == null) { + fourheap = new Object[FOUR_HEAP_INITIAL_SIZE]; + } else if (fourpos >= fourheap.length) { + // Grow extension heap by half. + fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); + } + fourheap[fourpos] = co; + ++size; + heapifyUp4(fourpos, co); + ++modCount; + } + } + + @Override + public void add(K key, int max) { + if (size < max) { + add(key); + } else if (comparator.compare(twoheap[0], key) >= 0) { + replaceTopElement(key); + } + } + + @Override + @SuppressWarnings("unchecked") + public K replaceTopElement(K reinsert) { + final Object ret = twoheap[0]; + heapifyDown( reinsert); + ++modCount; + return (K)ret; + } + + /** + * Heapify-Up method for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + */ + private void heapifyUp2(int twopos, Object cur) { + while (twopos > 0) { + final int parent = (twopos - 1) >>> 1; + Object par = twoheap[parent]; + if (comparator.compare(cur, par) <= 0) { + break; + } + twoheap[twopos] = par; + twopos = parent; + } + twoheap[twopos] = cur; + } + + /** + * Heapify-Up method for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + */ + private void heapifyUp4(int fourpos, Object cur) { + while (fourpos > 0) { + final int parent = (fourpos - 1) >> 2; + Object par = fourheap[parent]; + if (comparator.compare(cur, par) <= 0) { + break; + } + fourheap[fourpos] = par; + fourpos = parent; + } + if (fourpos == 0 && comparator.compare(twoheap[0], cur) < 0) { + fourheap[0] = twoheap[0]; + twoheap[0] = cur; + } else { + fourheap[fourpos] = cur; + } + } + + @Override + @SuppressWarnings("unchecked") + public K poll() { + final Object ret = twoheap[0]; + --size; + // Replacement object: + if (size >= TWO_HEAP_MAX_SIZE) { + final int last = size - TWO_HEAP_MAX_SIZE; + final Object reinsert = fourheap[last]; + fourheap[last] = null; + heapifyDown(reinsert); + } else if (size > 0) { + final Object reinsert = twoheap[size]; + twoheap[size] = null; + heapifyDown(reinsert); + } else { + twoheap[0] = null; + } + ++modCount; + return (K)ret; + } + + /** + * Invoke heapify-down for the root object. + * + * @param reinsert Object to insert. + */ + private void heapifyDown(Object reinsert) { + if (size > TWO_HEAP_MAX_SIZE) { + // Special case: 3-ary situation. + final int best = (comparator.compare(twoheap[1], twoheap[2]) >= 0) ? 1 : 2; + if (comparator.compare(fourheap[0], twoheap[best]) > 0) { + twoheap[0] = fourheap[0]; + heapifyDown4(0, reinsert); + } else { + twoheap[0] = twoheap[best]; + heapifyDown2(best, reinsert); + } + return; + } + heapifyDown2(0, reinsert); + } + + /** + * Heapify-Down for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + */ + private void heapifyDown2(int twopos, Object cur) { + final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + while (twopos < stop) { + int bestchild = (twopos << 1) + 1; + Object best = twoheap[bestchild]; + final int right = bestchild + 1; + if (right < size && comparator.compare(best, twoheap[right]) < 0) { + bestchild = right; + best = twoheap[right]; + } + if (comparator.compare(cur, best) >= 0) { + break; + } + twoheap[twopos] = best; + twopos = bestchild; + } + twoheap[twopos] = cur; + } + + /** + * Heapify-Down for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + */ + private void heapifyDown4(int fourpos, Object cur) { + final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; + while (fourpos < stop) { + final int child = (fourpos << 2) + 1; + Object best = fourheap[child]; + int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; + if (size > minsize) { + Object nextchild = fourheap[candidate]; + if (comparator.compare(best, nextchild) < 0) { + bestchild = candidate; + best = nextchild; + } + + minsize += 2; + if (size >= minsize) { + nextchild = fourheap[++candidate]; + if (comparator.compare(best, nextchild) < 0) { + bestchild = candidate; + best = nextchild; + } + + if (size > minsize) { + nextchild = fourheap[++candidate]; + if (comparator.compare(best, nextchild) < 0) { + bestchild = candidate; + best = nextchild; + } + } + } + } + if (comparator.compare(cur, best) >= 0) { + break; + } + fourheap[fourpos] = best; + fourpos = bestchild; + } + fourheap[fourpos] = cur; + } + + @Override + @SuppressWarnings("unchecked") + public K peek() { + return (K)twoheap[0]; + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append(ComparatorMaxHeap.class.getSimpleName()).append(" ["); + for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) { + buf.append(iter.get()).append(','); + } + buf.append(']'); + return buf.toString(); + } + + @Override + public UnsortedIter unsortedIter() { + return new UnsortedIter(); + } + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * Use this class as follows: + * + * <pre> + * {@code + * for (ObjectHeap.UnsortedIter<K> iter = heap.unsortedIter(); iter.valid(); iter.next()) { + * doSomething(iter.get()); + * } + * } + * </pre> + * + * @author Erich Schubert + */ + private class UnsortedIter implements ObjectHeap.UnsortedIter<K> { + /** + * Iterator position. + */ + protected int pos = 0; + + /** + * Modification counter we were initialized at. + */ + protected final int myModCount = modCount; + + @Override + public boolean valid() { + if (modCount != myModCount) { + throw new ConcurrentModificationException(); + } + return pos < size; + } + + @Override + public void advance() { + pos++; + } + + @SuppressWarnings("unchecked") + + @Override + public K get() { + return (K)((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparatorMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparatorMinHeap.java new file mode 100644 index 00000000..e12c5f64 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparatorMinHeap.java @@ -0,0 +1,440 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Arrays; +import java.util.ConcurrentModificationException; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + +/** + * Advanced priority queue class, based on a binary heap (for small sizes), + * which will for larger heaps be accompanied by a 4-ary heap (attached below + * the root of the two-ary heap, making the root actually 3-ary). + * + * This code was automatically instantiated for the type: Comparator + * + * This combination was found to work quite well in benchmarks, but YMMV. + * + * Some other observations from benchmarking: + * <ul> + * <li>Bulk loading did not improve things</li> + * <li>Primitive heaps are substantially faster.</li> + * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and + * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> + * <li>Workload makes a huge difference. A load-once, poll-until-empty priority + * queue is something different than e.g. a top-k heap, which will see a lot of + * top element replacements.</li> + * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for + * top-k make a difference.</li> + * <li>Different day, different benchmark results ...</li> + * </ul> + * + * @author Erich Schubert + * + * @apiviz.has UnsortedIter + * @param <K> Key type + */ +public class ComparatorMinHeap<K> implements ObjectHeap<K> { + /** + * Base heap. + */ + protected Object[] twoheap; + + /** + * Extension heap. + */ + protected Object[] fourheap; + + /** + * Current size of heap. + */ + protected int size; + + /** + * (Structural) modification counter. Used to invalidate iterators. + */ + protected int modCount = 0; + + /** + * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. + */ + private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; + + /** + * Initial size of the 2-ary heap. + */ + private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; + + /** + * Initial size of 4-ary heap when initialized. + * + * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 + * + * 85 = 4-ary heap of height 3: 21 + 4*4*4 + * + * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 + * + * Since we last grew by 255 (to 511), let's use 341. + */ + private final static int FOUR_HEAP_INITIAL_SIZE = 341; + + + /** + * Comparator + */ + protected java.util.Comparator<Object> comparator; + + /** + * Constructor, with default size. + * @param comparator Comparator + */ + @SuppressWarnings("unchecked") + public ComparatorMinHeap(java.util.Comparator<? super K> comparator) { + super(); + this.comparator = (java.util.Comparator<Object>) java.util.Comparator.class.cast(comparator); + Object[] twoheap = new Object[TWO_HEAP_INITIAL_SIZE]; + + this.twoheap = twoheap; + this.fourheap = null; + this.size = 0; + this.modCount = 0; + } + + /** + * Constructor, with given minimum size. + * + * @param minsize Minimum size + * @param comparator Comparator + */ + @SuppressWarnings("unchecked") + public ComparatorMinHeap(int minsize, java.util.Comparator<? super K> comparator) { + super(); + this.comparator = (java.util.Comparator<Object>) java.util.Comparator.class.cast(comparator); + if (minsize < TWO_HEAP_MAX_SIZE) { + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + Object[] twoheap = new Object[size]; + + this.twoheap = twoheap; + this.fourheap = null; + } else { + Object[] twoheap = new Object[TWO_HEAP_INITIAL_SIZE]; + Object[] fourheap = new Object[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + this.twoheap = twoheap; + this.fourheap = fourheap; + } + this.size = 0; + this.modCount = 0; + } + + @Override + public void clear() { + size = 0; + ++modCount; + fourheap = null; + Arrays.fill(twoheap, null); + } + + @Override + public int size() { + return size; + } + + @Override + public boolean isEmpty() { + return (size == 0); + } + + @Override + public void add(K o) { + final Object co = o; + // System.err.println("Add: " + o); + if (size < TWO_HEAP_MAX_SIZE) { + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); + } + final int twopos = size; + twoheap[twopos] = co; + ++size; + heapifyUp2(twopos, co); + ++modCount; + } else { + final int fourpos = size - TWO_HEAP_MAX_SIZE; + if (fourheap == null) { + fourheap = new Object[FOUR_HEAP_INITIAL_SIZE]; + } else if (fourpos >= fourheap.length) { + // Grow extension heap by half. + fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); + } + fourheap[fourpos] = co; + ++size; + heapifyUp4(fourpos, co); + ++modCount; + } + } + + @Override + public void add(K key, int max) { + if (size < max) { + add(key); + } else if (comparator.compare(twoheap[0], key) <= 0) { + replaceTopElement(key); + } + } + + @Override + @SuppressWarnings("unchecked") + public K replaceTopElement(K reinsert) { + final Object ret = twoheap[0]; + heapifyDown( reinsert); + ++modCount; + return (K)ret; + } + + /** + * Heapify-Up method for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + */ + private void heapifyUp2(int twopos, Object cur) { + while (twopos > 0) { + final int parent = (twopos - 1) >>> 1; + Object par = twoheap[parent]; + if (comparator.compare(cur, par) >= 0) { + break; + } + twoheap[twopos] = par; + twopos = parent; + } + twoheap[twopos] = cur; + } + + /** + * Heapify-Up method for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + */ + private void heapifyUp4(int fourpos, Object cur) { + while (fourpos > 0) { + final int parent = (fourpos - 1) >> 2; + Object par = fourheap[parent]; + if (comparator.compare(cur, par) >= 0) { + break; + } + fourheap[fourpos] = par; + fourpos = parent; + } + if (fourpos == 0 && comparator.compare(twoheap[0], cur) > 0) { + fourheap[0] = twoheap[0]; + twoheap[0] = cur; + } else { + fourheap[fourpos] = cur; + } + } + + @Override + @SuppressWarnings("unchecked") + public K poll() { + final Object ret = twoheap[0]; + --size; + // Replacement object: + if (size >= TWO_HEAP_MAX_SIZE) { + final int last = size - TWO_HEAP_MAX_SIZE; + final Object reinsert = fourheap[last]; + fourheap[last] = null; + heapifyDown(reinsert); + } else if (size > 0) { + final Object reinsert = twoheap[size]; + twoheap[size] = null; + heapifyDown(reinsert); + } else { + twoheap[0] = null; + } + ++modCount; + return (K)ret; + } + + /** + * Invoke heapify-down for the root object. + * + * @param reinsert Object to insert. + */ + private void heapifyDown(Object reinsert) { + if (size > TWO_HEAP_MAX_SIZE) { + // Special case: 3-ary situation. + final int best = (comparator.compare(twoheap[1], twoheap[2]) <= 0) ? 1 : 2; + if (comparator.compare(fourheap[0], twoheap[best]) < 0) { + twoheap[0] = fourheap[0]; + heapifyDown4(0, reinsert); + } else { + twoheap[0] = twoheap[best]; + heapifyDown2(best, reinsert); + } + return; + } + heapifyDown2(0, reinsert); + } + + /** + * Heapify-Down for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + */ + private void heapifyDown2(int twopos, Object cur) { + final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + while (twopos < stop) { + int bestchild = (twopos << 1) + 1; + Object best = twoheap[bestchild]; + final int right = bestchild + 1; + if (right < size && comparator.compare(best, twoheap[right]) > 0) { + bestchild = right; + best = twoheap[right]; + } + if (comparator.compare(cur, best) <= 0) { + break; + } + twoheap[twopos] = best; + twopos = bestchild; + } + twoheap[twopos] = cur; + } + + /** + * Heapify-Down for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + */ + private void heapifyDown4(int fourpos, Object cur) { + final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; + while (fourpos < stop) { + final int child = (fourpos << 2) + 1; + Object best = fourheap[child]; + int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; + if (size > minsize) { + Object nextchild = fourheap[candidate]; + if (comparator.compare(best, nextchild) > 0) { + bestchild = candidate; + best = nextchild; + } + + minsize += 2; + if (size >= minsize) { + nextchild = fourheap[++candidate]; + if (comparator.compare(best, nextchild) > 0) { + bestchild = candidate; + best = nextchild; + } + + if (size > minsize) { + nextchild = fourheap[++candidate]; + if (comparator.compare(best, nextchild) > 0) { + bestchild = candidate; + best = nextchild; + } + } + } + } + if (comparator.compare(cur, best) <= 0) { + break; + } + fourheap[fourpos] = best; + fourpos = bestchild; + } + fourheap[fourpos] = cur; + } + + @Override + @SuppressWarnings("unchecked") + public K peek() { + return (K)twoheap[0]; + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append(ComparatorMinHeap.class.getSimpleName()).append(" ["); + for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) { + buf.append(iter.get()).append(','); + } + buf.append(']'); + return buf.toString(); + } + + @Override + public UnsortedIter unsortedIter() { + return new UnsortedIter(); + } + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * Use this class as follows: + * + * <pre> + * {@code + * for (ObjectHeap.UnsortedIter<K> iter = heap.unsortedIter(); iter.valid(); iter.next()) { + * doSomething(iter.get()); + * } + * } + * </pre> + * + * @author Erich Schubert + */ + private class UnsortedIter implements ObjectHeap.UnsortedIter<K> { + /** + * Iterator position. + */ + protected int pos = 0; + + /** + * Modification counter we were initialized at. + */ + protected final int myModCount = modCount; + + @Override + public boolean valid() { + if (modCount != myModCount) { + throw new ConcurrentModificationException(); + } + return pos < size; + } + + @Override + public void advance() { + pos++; + } + + @SuppressWarnings("unchecked") + + @Override + public K get() { + return (K)((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleHeap.java index f9f928bd..acf77d86 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleHeap.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,53 +23,22 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; along with this program. If not, see <http://www.gnu.org/licenses/>. */ -import java.util.Arrays; - -import de.lmu.ifi.dbs.elki.math.MathUtil; +import de.lmu.ifi.dbs.elki.utilities.datastructures.iterator.Iter; /** - * Basic in-memory heap structure. - * - * This heap is built lazily: if you first add many elements, then poll the - * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log - * n). This is implemented via a simple validTo counter. + * Basic in-memory heap for double values. * * @author Erich Schubert + * + * @apiviz.has UnsortedIter */ -public abstract class DoubleHeap extends AbstractHeap { - /** - * Heap storage: queue - */ - protected transient double[] queue; - - /** - * Constructor with initial capacity. - * - * @param size initial capacity - */ - public DoubleHeap(int size) { - super(); - this.size = 0; - this.queue = new double[size]; - } - +public interface DoubleHeap { /** * Add a key-value pair to the heap * * @param key Key */ - public void add(double key) { - // resize when needed - if (size + 1 > queue.length) { - resize(size + 1); - } - // final int pos = size; - this.queue[size] = key; - this.size += 1; - heapifyUp(size - 1, key); - validSize += 1; - heapModified(); - } + void add(double key); /** * Add a key-value pair to the heap, except if the new element is larger than @@ -78,13 +47,7 @@ public abstract class DoubleHeap extends AbstractHeap { * @param key Key * @param max Maximum size of heap */ - public void add(double key, int max) { - if (size < max) { - add(key); - } else if (comp(key, peek())) { - replaceTopElement(key); - } - } + void add(double key, int max); /** * Combined operation that removes the top element, and inserts a new element @@ -93,172 +56,67 @@ public abstract class DoubleHeap extends AbstractHeap { * @param e New element to insert * @return Previous top element of the heap */ - public double replaceTopElement(double e) { - ensureValid(); - double oldroot = queue[0]; - heapifyDown(0, e); - heapModified(); - return oldroot; - } + double replaceTopElement(double e); /** * Get the current top key * * @return Top key */ - public double peek() { - if (size == 0) { - throw new ArrayIndexOutOfBoundsException("Peek() on an empty heap!"); - } - ensureValid(); - return queue[0]; - } + double peek(); /** * Remove the first element * * @return Top element */ - public double poll() { - return removeAt(0); - } + double poll(); /** - * Repair the heap + * Delete all elements from the heap. */ - protected void ensureValid() { - if (validSize != size) { - if (size > 1) { - // Parent of first invalid - int nextmin = validSize > 0 ? ((validSize - 1) >>> 1) : 0; - int curmin = MathUtil.nextAllOnesInt(nextmin); // Next line - int nextmax = curmin - 1; // End of valid line - int pos = (size - 2) >>> 1; // Parent of last element - // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin+", "+nextmin); - while (pos >= nextmin) { - // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin); - while (pos >= curmin) { - if (!heapifyDown(pos, queue[pos])) { - final int parent = (pos - 1) >>> 1; - if (parent < curmin) { - nextmin = Math.min(nextmin, parent); - nextmax = Math.max(nextmax, parent); - } - } - pos--; - } - curmin = nextmin; - pos = Math.min(pos, nextmax); - nextmax = -1; - } - } - validSize = size; - } - } + void clear(); /** - * Remove the element at the given position. + * Query the size * - * @param pos Element position. - * @return Removed element + * @return Size */ - protected double removeAt(int pos) { - if (pos < 0 || pos >= size) { - return 0.0; - } - final double top = queue[0]; - // Replacement object: - final double reinkey = queue[size - 1]; - // Keep heap in sync - if (validSize == size) { - size -= 1; - validSize -= 1; - heapifyDown(pos, reinkey); - } else { - size -= 1; - validSize = Math.min(pos >>> 1, validSize); - queue[pos] = reinkey; - } - heapModified(); - return top; - } - + public int size(); + /** - * Execute a "Heapify Upwards" aka "SiftUp". Used in insertions. + * Is the heap empty? * - * @param pos insertion position - * @param curkey Current key + * @return {@code true} when the size is 0. */ - protected void heapifyUp(int pos, double curkey) { - while (pos > 0) { - final int parent = (pos - 1) >>> 1; - double parkey = queue[parent]; - - if (comp(curkey, parkey)) { // Compare - break; - } - queue[pos] = parkey; - pos = parent; - } - queue[pos] = curkey; - } + public boolean isEmpty(); /** - * Execute a "Heapify Downwards" aka "SiftDown". Used in deletions. + * Get an unsorted iterator to inspect the heap. * - * @param ipos re-insertion position - * @param curkey Current key - * @return true when the order was changed + * @return Iterator */ - protected boolean heapifyDown(final int ipos, double curkey) { - int pos = ipos; - final int half = size >>> 1; - while (pos < half) { - // Get left child (must exist!) - int cpos = (pos << 1) + 1; - double chikey = queue[cpos]; - // Test right child, if present - final int rchild = cpos + 1; - if (rchild < size) { - double right = queue[rchild]; - if (comp(chikey, right)) { // Compare - cpos = rchild; - chikey = right; - } - } - - if (comp(chikey, curkey)) { // Compare - break; - } - queue[pos] = chikey; - pos = cpos; - } - queue[pos] = curkey; - return (pos == ipos); - } + UnsortedIter unsortedIter(); /** - * Test whether we need to resize to have the requested capacity. + * Unsorted iterator - in heap order. Does not poll the heap. * - * @param requiredSize required capacity - */ - protected final void resize(int requiredSize) { - queue = Arrays.copyOf(queue, desiredSize(requiredSize, queue.length)); - } - - /** - * Delete all elements from the heap. + * <pre> + * {@code + * for (DoubleHeap.UnsortedIter iter = heap.unsortedIter(); iter.valid(); iter.next()) { + * doSomething(iter.get()); + * } + * } + * </pre> + * + * @author Erich Schubert */ - @Override - public void clear() { - super.clear(); - for (int i = 0; i < size; i++) { - queue[i] = 0.0; - } + public static interface UnsortedIter extends Iter { + /** + * Get the iterators current object. + * + * @return Current object + */ + double get(); } - - /** - * Compare two objects - */ - abstract protected boolean comp(double o1, double o2); } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerHeap.java new file mode 100644 index 00000000..c3bf85f4 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerHeap.java @@ -0,0 +1,127 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.utilities.datastructures.iterator.Iter; + +/** + * Basic in-memory heap interface, for double keys and int values. + * + * @author Erich Schubert + * + * @apiviz.has UnsortedIter + */ +public interface DoubleIntegerHeap { + /** + * Add a key-value pair to the heap + * + * @param key Key + * @param val Value + */ + void add(double key, int val); + + /** + * Add a key-value pair to the heap if it improves the top. + * + * @param key Key + * @param val Value + * @param k Desired maximum size + */ + void add(double key, int val, int k); + + /** + * Combined operation that removes the top element, and inserts a new element + * instead. + * + * @param key Key of new element + * @param val Value of new element + */ + void replaceTopElement(double key, int val); + + /** + * Get the current top key + * + * @return Top key + */ + double peekKey(); + + /** + * Get the current top value + * + * @return Value + */ + int peekValue(); + + /** + * Remove the first element + */ + void poll(); + + /** + * Clear the heap contents. + */ + void clear(); + + /** + * Query the size + * + * @return Size + */ + public int size(); + + /** + * Is the heap empty? + * + * @return {@code true} when the size is 0. + */ + public boolean isEmpty(); + + /** + * Get an unsorted iterator to inspect the heap. + * + * @return Iterator + */ + UnsortedIter unsortedIter(); + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * @author Erich Schubert + */ + public static interface UnsortedIter extends Iter { + /** + * Get the current key + * + * @return Current key + */ + double getKey(); + + /** + * Get the current value + * + * @return Current value + */ + int getValue(); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerMaxHeap.java new file mode 100644 index 00000000..34f1e889 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerMaxHeap.java @@ -0,0 +1,478 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Arrays; +import java.util.ConcurrentModificationException; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + +/** + * Advanced priority queue class, based on a binary heap (for small sizes), + * which will for larger heaps be accompanied by a 4-ary heap (attached below + * the root of the two-ary heap, making the root actually 3-ary). + * + * This code was automatically instantiated for the types: Double and Integer + * + * This combination was found to work quite well in benchmarks, but YMMV. + * + * Some other observations from benchmarking: + * <ul> + * <li>Bulk loading did not improve things</li> + * <li>Primitive heaps are substantially faster.</li> + * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and + * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> + * <li>Workload makes a huge difference. A load-once, poll-until-empty priority + * queue is something different than e.g. a top-k heap, which will see a lot of + * top element replacements.</li> + * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for + * top-k make a difference.</li> + * <li>Different day, different benchmark results ...</li> + * </ul> + * + * @author Erich Schubert + * + * @apiviz.has UnsortedIter + */ +public class DoubleIntegerMaxHeap implements DoubleIntegerHeap { + /** + * Base heap. + */ + protected double[] twoheap; + + /** + * Base heap values. + */ + protected int[] twovals; + + /** + * Extension heap. + */ + protected double[] fourheap; + + /** + * Extension heapvalues. + */ + protected int[] fourvals; + + /** + * Current size of heap. + */ + protected int size; + + /** + * (Structural) modification counter. Used to invalidate iterators. + */ + protected int modCount = 0; + + /** + * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. + */ + private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; + + /** + * Initial size of the 2-ary heap. + */ + private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; + + /** + * Initial size of 4-ary heap when initialized. + * + * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 + * + * 85 = 4-ary heap of height 3: 21 + 4*4*4 + * + * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 + * + * Since we last grew by 255 (to 511), let's use 341. + */ + private final static int FOUR_HEAP_INITIAL_SIZE = 341; + + /** + * Constructor, with default size. + */ + public DoubleIntegerMaxHeap() { + super(); + double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE]; + int[] twovals = new int[TWO_HEAP_INITIAL_SIZE]; + + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = null; + this.fourvals = null; + this.size = 0; + this.modCount = 0; + } + + /** + * Constructor, with given minimum size. + * + * @param minsize Minimum size + */ + public DoubleIntegerMaxHeap(int minsize) { + super(); + if (minsize < TWO_HEAP_MAX_SIZE) { + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + double[] twoheap = new double[size]; + int[] twovals = new int[size]; + + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = null; + this.fourvals = null; + } else { + double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE]; + int[] twovals = new int[TWO_HEAP_INITIAL_SIZE]; + double[] fourheap = new double[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + int[] fourvals = new int[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = fourheap; + this.fourvals = fourvals; + } + this.size = 0; + this.modCount = 0; + } + + @Override + public void clear() { + size = 0; + ++modCount; + fourheap = null; + fourvals = null; + Arrays.fill(twoheap, 0.0); + Arrays.fill(twovals, 0); + } + + @Override + public int size() { + return size; + } + + @Override + public boolean isEmpty() { + return (size == 0); + } + + @Override + public void add(double o, int v) { + final double co = o; + final int cv = v; + // System.err.println("Add: " + o); + if (size < TWO_HEAP_MAX_SIZE) { + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); + twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1); + } + final int twopos = size; + twoheap[twopos] = co; + twovals[twopos] = cv; + ++size; + heapifyUp2(twopos, co, cv); + ++modCount; + } else { + final int fourpos = size - TWO_HEAP_MAX_SIZE; + if (fourheap == null) { + fourheap = new double[FOUR_HEAP_INITIAL_SIZE]; + fourvals = new int[FOUR_HEAP_INITIAL_SIZE]; + } else if (fourpos >= fourheap.length) { + // Grow extension heap by half. + fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); + fourvals = Arrays.copyOf(fourvals, fourvals.length + (fourvals.length >> 1)); + } + fourheap[fourpos] = co; + fourvals[fourpos] = cv; + ++size; + heapifyUp4(fourpos, co, cv); + ++modCount; + } + } + + @Override + public void add(double key, int val, int max) { + if (size < max) { + add(key, val); + } else if (twoheap[0] >= key) { + replaceTopElement(key, val); + } + } + + @Override + public void replaceTopElement(double reinsert, int val) { + heapifyDown(reinsert, val); + ++modCount; + } + + /** + * Heapify-Up method for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + * @param val Current value + */ + private void heapifyUp2(int twopos, double cur, int val) { + while (twopos > 0) { + final int parent = (twopos - 1) >>> 1; + double par = twoheap[parent]; + if (cur <= par) { + break; + } + twoheap[twopos] = par; + twovals[twopos] = twovals[parent]; + twopos = parent; + } + twoheap[twopos] = cur; + twovals[twopos] = val; + } + + /** + * Heapify-Up method for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + * @param val Current value + */ + private void heapifyUp4(int fourpos, double cur, int val) { + while (fourpos > 0) { + final int parent = (fourpos - 1) >> 2; + double par = fourheap[parent]; + if (cur <= par) { + break; + } + fourheap[fourpos] = par; + fourvals[fourpos] = fourvals[parent]; + fourpos = parent; + } + if (fourpos == 0 && twoheap[0] < cur) { + fourheap[0] = twoheap[0]; + fourvals[0] = twovals[0]; + twoheap[0] = cur; + twovals[0] = val; + } else { + fourheap[fourpos] = cur; + fourvals[fourpos] = val; + } + } + + @Override + public void poll() { + --size; + // Replacement object: + if (size >= TWO_HEAP_MAX_SIZE) { + final int last = size - TWO_HEAP_MAX_SIZE; + final double reinsert = fourheap[last]; + final int reinsertv = fourvals[last]; + fourheap[last] = 0.0; + fourvals[last] = 0; + heapifyDown(reinsert, reinsertv); + } else if (size > 0) { + final double reinsert = twoheap[size]; + final int reinsertv = twovals[size]; + twoheap[size] = 0.0; + twovals[size] = 0; + heapifyDown(reinsert, reinsertv); + } else { + twoheap[0] = 0.0; + twovals[0] = 0; + } + ++modCount; + } + + /** + * Invoke heapify-down for the root object. + * + * @param reinsert Object to insert. + * @param val Value to reinsert. + */ + private void heapifyDown(double reinsert, int val) { + if (size > TWO_HEAP_MAX_SIZE) { + // Special case: 3-ary situation. + final int best = (twoheap[1] >= twoheap[2]) ? 1 : 2; + if (fourheap[0] > twoheap[best]) { + twoheap[0] = fourheap[0]; + twovals[0] = fourvals[0]; + heapifyDown4(0, reinsert, val); + } else { + twoheap[0] = twoheap[best]; + twovals[0] = twovals[best]; + heapifyDown2(best, reinsert, val); + } + return; + } + heapifyDown2(0, reinsert, val); + } + + /** + * Heapify-Down for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + * @param val Value to reinsert. + */ + private void heapifyDown2(int twopos, double cur, int val) { + final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + while (twopos < stop) { + int bestchild = (twopos << 1) + 1; + double best = twoheap[bestchild]; + final int right = bestchild + 1; + if (right < size && best < twoheap[right]) { + bestchild = right; + best = twoheap[right]; + } + if (cur >= best) { + break; + } + twoheap[twopos] = best; + twovals[twopos] = twovals[bestchild]; + twopos = bestchild; + } + twoheap[twopos] = cur; + twovals[twopos] = val; + } + + /** + * Heapify-Down for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + * @param val Value to reinsert. + */ + private void heapifyDown4(int fourpos, double cur, int val) { + final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; + while (fourpos < stop) { + final int child = (fourpos << 2) + 1; + double best = fourheap[child]; + int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; + if (size > minsize) { + double nextchild = fourheap[candidate]; + if (best < nextchild) { + bestchild = candidate; + best = nextchild; + } + + minsize += 2; + if (size >= minsize) { + nextchild = fourheap[++candidate]; + if (best < nextchild) { + bestchild = candidate; + best = nextchild; + } + + if (size > minsize) { + nextchild = fourheap[++candidate]; + if (best < nextchild) { + bestchild = candidate; + best = nextchild; + } + } + } + } + if (cur >= best) { + break; + } + fourheap[fourpos] = best; + fourvals[fourpos] = fourvals[bestchild]; + fourpos = bestchild; + } + fourheap[fourpos] = cur; + fourvals[fourpos] = val; + } + + @Override + public double peekKey() { + return twoheap[0]; + } + + @Override + public int peekValue() { + return twovals[0]; + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append(DoubleIntegerMaxHeap.class.getSimpleName()).append(" ["); + for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) { + buf.append(iter.getKey()).append(':').append(iter.getValue()).append(','); + } + buf.append(']'); + return buf.toString(); + } + + @Override + public UnsortedIter unsortedIter() { + return new UnsortedIter(); + } + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * Use this class as follows: + * + * <pre> + * {@code + * for (DoubleIntegerHeap.UnsortedIter iter = heap.unsortedIter(); iter.valid(); iter.next()) { + * doSomething(iter.get()); + * } + * } + * </pre> + * + * @author Erich Schubert + */ + private class UnsortedIter implements DoubleIntegerHeap.UnsortedIter { + /** + * Iterator position. + */ + protected int pos = 0; + + /** + * Modification counter we were initialized at. + */ + protected final int myModCount = modCount; + + @Override + public boolean valid() { + if (modCount != myModCount) { + throw new ConcurrentModificationException(); + } + return pos < size; + } + + @Override + public void advance() { + pos++; + } + + @Override + public double getKey() { + return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + } + + @Override + public int getValue() { + return ((pos < TWO_HEAP_MAX_SIZE) ? twovals[pos] : fourvals[pos - TWO_HEAP_MAX_SIZE]); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerMinHeap.java new file mode 100644 index 00000000..ca6192ad --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerMinHeap.java @@ -0,0 +1,478 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Arrays; +import java.util.ConcurrentModificationException; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + +/** + * Advanced priority queue class, based on a binary heap (for small sizes), + * which will for larger heaps be accompanied by a 4-ary heap (attached below + * the root of the two-ary heap, making the root actually 3-ary). + * + * This code was automatically instantiated for the types: Double and Integer + * + * This combination was found to work quite well in benchmarks, but YMMV. + * + * Some other observations from benchmarking: + * <ul> + * <li>Bulk loading did not improve things</li> + * <li>Primitive heaps are substantially faster.</li> + * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and + * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> + * <li>Workload makes a huge difference. A load-once, poll-until-empty priority + * queue is something different than e.g. a top-k heap, which will see a lot of + * top element replacements.</li> + * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for + * top-k make a difference.</li> + * <li>Different day, different benchmark results ...</li> + * </ul> + * + * @author Erich Schubert + * + * @apiviz.has UnsortedIter + */ +public class DoubleIntegerMinHeap implements DoubleIntegerHeap { + /** + * Base heap. + */ + protected double[] twoheap; + + /** + * Base heap values. + */ + protected int[] twovals; + + /** + * Extension heap. + */ + protected double[] fourheap; + + /** + * Extension heapvalues. + */ + protected int[] fourvals; + + /** + * Current size of heap. + */ + protected int size; + + /** + * (Structural) modification counter. Used to invalidate iterators. + */ + protected int modCount = 0; + + /** + * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. + */ + private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; + + /** + * Initial size of the 2-ary heap. + */ + private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; + + /** + * Initial size of 4-ary heap when initialized. + * + * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 + * + * 85 = 4-ary heap of height 3: 21 + 4*4*4 + * + * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 + * + * Since we last grew by 255 (to 511), let's use 341. + */ + private final static int FOUR_HEAP_INITIAL_SIZE = 341; + + /** + * Constructor, with default size. + */ + public DoubleIntegerMinHeap() { + super(); + double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE]; + int[] twovals = new int[TWO_HEAP_INITIAL_SIZE]; + + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = null; + this.fourvals = null; + this.size = 0; + this.modCount = 0; + } + + /** + * Constructor, with given minimum size. + * + * @param minsize Minimum size + */ + public DoubleIntegerMinHeap(int minsize) { + super(); + if (minsize < TWO_HEAP_MAX_SIZE) { + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + double[] twoheap = new double[size]; + int[] twovals = new int[size]; + + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = null; + this.fourvals = null; + } else { + double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE]; + int[] twovals = new int[TWO_HEAP_INITIAL_SIZE]; + double[] fourheap = new double[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + int[] fourvals = new int[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = fourheap; + this.fourvals = fourvals; + } + this.size = 0; + this.modCount = 0; + } + + @Override + public void clear() { + size = 0; + ++modCount; + fourheap = null; + fourvals = null; + Arrays.fill(twoheap, 0.0); + Arrays.fill(twovals, 0); + } + + @Override + public int size() { + return size; + } + + @Override + public boolean isEmpty() { + return (size == 0); + } + + @Override + public void add(double o, int v) { + final double co = o; + final int cv = v; + // System.err.println("Add: " + o); + if (size < TWO_HEAP_MAX_SIZE) { + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); + twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1); + } + final int twopos = size; + twoheap[twopos] = co; + twovals[twopos] = cv; + ++size; + heapifyUp2(twopos, co, cv); + ++modCount; + } else { + final int fourpos = size - TWO_HEAP_MAX_SIZE; + if (fourheap == null) { + fourheap = new double[FOUR_HEAP_INITIAL_SIZE]; + fourvals = new int[FOUR_HEAP_INITIAL_SIZE]; + } else if (fourpos >= fourheap.length) { + // Grow extension heap by half. + fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); + fourvals = Arrays.copyOf(fourvals, fourvals.length + (fourvals.length >> 1)); + } + fourheap[fourpos] = co; + fourvals[fourpos] = cv; + ++size; + heapifyUp4(fourpos, co, cv); + ++modCount; + } + } + + @Override + public void add(double key, int val, int max) { + if (size < max) { + add(key, val); + } else if (twoheap[0] <= key) { + replaceTopElement(key, val); + } + } + + @Override + public void replaceTopElement(double reinsert, int val) { + heapifyDown(reinsert, val); + ++modCount; + } + + /** + * Heapify-Up method for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + * @param val Current value + */ + private void heapifyUp2(int twopos, double cur, int val) { + while (twopos > 0) { + final int parent = (twopos - 1) >>> 1; + double par = twoheap[parent]; + if (cur >= par) { + break; + } + twoheap[twopos] = par; + twovals[twopos] = twovals[parent]; + twopos = parent; + } + twoheap[twopos] = cur; + twovals[twopos] = val; + } + + /** + * Heapify-Up method for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + * @param val Current value + */ + private void heapifyUp4(int fourpos, double cur, int val) { + while (fourpos > 0) { + final int parent = (fourpos - 1) >> 2; + double par = fourheap[parent]; + if (cur >= par) { + break; + } + fourheap[fourpos] = par; + fourvals[fourpos] = fourvals[parent]; + fourpos = parent; + } + if (fourpos == 0 && twoheap[0] > cur) { + fourheap[0] = twoheap[0]; + fourvals[0] = twovals[0]; + twoheap[0] = cur; + twovals[0] = val; + } else { + fourheap[fourpos] = cur; + fourvals[fourpos] = val; + } + } + + @Override + public void poll() { + --size; + // Replacement object: + if (size >= TWO_HEAP_MAX_SIZE) { + final int last = size - TWO_HEAP_MAX_SIZE; + final double reinsert = fourheap[last]; + final int reinsertv = fourvals[last]; + fourheap[last] = 0.0; + fourvals[last] = 0; + heapifyDown(reinsert, reinsertv); + } else if (size > 0) { + final double reinsert = twoheap[size]; + final int reinsertv = twovals[size]; + twoheap[size] = 0.0; + twovals[size] = 0; + heapifyDown(reinsert, reinsertv); + } else { + twoheap[0] = 0.0; + twovals[0] = 0; + } + ++modCount; + } + + /** + * Invoke heapify-down for the root object. + * + * @param reinsert Object to insert. + * @param val Value to reinsert. + */ + private void heapifyDown(double reinsert, int val) { + if (size > TWO_HEAP_MAX_SIZE) { + // Special case: 3-ary situation. + final int best = (twoheap[1] <= twoheap[2]) ? 1 : 2; + if (fourheap[0] < twoheap[best]) { + twoheap[0] = fourheap[0]; + twovals[0] = fourvals[0]; + heapifyDown4(0, reinsert, val); + } else { + twoheap[0] = twoheap[best]; + twovals[0] = twovals[best]; + heapifyDown2(best, reinsert, val); + } + return; + } + heapifyDown2(0, reinsert, val); + } + + /** + * Heapify-Down for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + * @param val Value to reinsert. + */ + private void heapifyDown2(int twopos, double cur, int val) { + final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + while (twopos < stop) { + int bestchild = (twopos << 1) + 1; + double best = twoheap[bestchild]; + final int right = bestchild + 1; + if (right < size && best > twoheap[right]) { + bestchild = right; + best = twoheap[right]; + } + if (cur <= best) { + break; + } + twoheap[twopos] = best; + twovals[twopos] = twovals[bestchild]; + twopos = bestchild; + } + twoheap[twopos] = cur; + twovals[twopos] = val; + } + + /** + * Heapify-Down for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + * @param val Value to reinsert. + */ + private void heapifyDown4(int fourpos, double cur, int val) { + final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; + while (fourpos < stop) { + final int child = (fourpos << 2) + 1; + double best = fourheap[child]; + int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; + if (size > minsize) { + double nextchild = fourheap[candidate]; + if (best > nextchild) { + bestchild = candidate; + best = nextchild; + } + + minsize += 2; + if (size >= minsize) { + nextchild = fourheap[++candidate]; + if (best > nextchild) { + bestchild = candidate; + best = nextchild; + } + + if (size > minsize) { + nextchild = fourheap[++candidate]; + if (best > nextchild) { + bestchild = candidate; + best = nextchild; + } + } + } + } + if (cur <= best) { + break; + } + fourheap[fourpos] = best; + fourvals[fourpos] = fourvals[bestchild]; + fourpos = bestchild; + } + fourheap[fourpos] = cur; + fourvals[fourpos] = val; + } + + @Override + public double peekKey() { + return twoheap[0]; + } + + @Override + public int peekValue() { + return twovals[0]; + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append(DoubleIntegerMinHeap.class.getSimpleName()).append(" ["); + for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) { + buf.append(iter.getKey()).append(':').append(iter.getValue()).append(','); + } + buf.append(']'); + return buf.toString(); + } + + @Override + public UnsortedIter unsortedIter() { + return new UnsortedIter(); + } + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * Use this class as follows: + * + * <pre> + * {@code + * for (DoubleIntegerHeap.UnsortedIter iter = heap.unsortedIter(); iter.valid(); iter.next()) { + * doSomething(iter.get()); + * } + * } + * </pre> + * + * @author Erich Schubert + */ + private class UnsortedIter implements DoubleIntegerHeap.UnsortedIter { + /** + * Iterator position. + */ + protected int pos = 0; + + /** + * Modification counter we were initialized at. + */ + protected final int myModCount = modCount; + + @Override + public boolean valid() { + if (modCount != myModCount) { + throw new ConcurrentModificationException(); + } + return pos < size; + } + + @Override + public void advance() { + pos++; + } + + @Override + public double getKey() { + return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + } + + @Override + public int getValue() { + return ((pos < TWO_HEAP_MAX_SIZE) ? twovals[pos] : fourvals[pos - TWO_HEAP_MAX_SIZE]); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleLongHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleLongHeap.java new file mode 100644 index 00000000..b93adafa --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleLongHeap.java @@ -0,0 +1,127 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.utilities.datastructures.iterator.Iter; + +/** + * Basic in-memory heap interface, for double keys and long values. + * + * @author Erich Schubert + * + * @apiviz.has UnsortedIter + */ +public interface DoubleLongHeap { + /** + * Add a key-value pair to the heap + * + * @param key Key + * @param val Value + */ + void add(double key, long val); + + /** + * Add a key-value pair to the heap if it improves the top. + * + * @param key Key + * @param val Value + * @param k Desired maximum size + */ + void add(double key, long val, int k); + + /** + * Combined operation that removes the top element, and inserts a new element + * instead. + * + * @param key Key of new element + * @param val Value of new element + */ + void replaceTopElement(double key, long val); + + /** + * Get the current top key + * + * @return Top key + */ + double peekKey(); + + /** + * Get the current top value + * + * @return Value + */ + long peekValue(); + + /** + * Remove the first element + */ + void poll(); + + /** + * Clear the heap contents. + */ + void clear(); + + /** + * Query the size + * + * @return Size + */ + public int size(); + + /** + * Is the heap empty? + * + * @return {@code true} when the size is 0. + */ + public boolean isEmpty(); + + /** + * Get an unsorted iterator to inspect the heap. + * + * @return Iterator + */ + UnsortedIter unsortedIter(); + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * @author Erich Schubert + */ + public static interface UnsortedIter extends Iter { + /** + * Get the current key + * + * @return Current key + */ + double getKey(); + + /** + * Get the current value + * + * @return Current value + */ + long getValue(); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleLongMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleLongMaxHeap.java new file mode 100644 index 00000000..6d15656c --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleLongMaxHeap.java @@ -0,0 +1,478 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Arrays; +import java.util.ConcurrentModificationException; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + +/** + * Advanced priority queue class, based on a binary heap (for small sizes), + * which will for larger heaps be accompanied by a 4-ary heap (attached below + * the root of the two-ary heap, making the root actually 3-ary). + * + * This code was automatically instantiated for the types: Double and Long + * + * This combination was found to work quite well in benchmarks, but YMMV. + * + * Some other observations from benchmarking: + * <ul> + * <li>Bulk loading did not improve things</li> + * <li>Primitive heaps are substantially faster.</li> + * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and + * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> + * <li>Workload makes a huge difference. A load-once, poll-until-empty priority + * queue is something different than e.g. a top-k heap, which will see a lot of + * top element replacements.</li> + * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for + * top-k make a difference.</li> + * <li>Different day, different benchmark results ...</li> + * </ul> + * + * @author Erich Schubert + * + * @apiviz.has UnsortedIter + */ +public class DoubleLongMaxHeap implements DoubleLongHeap { + /** + * Base heap. + */ + protected double[] twoheap; + + /** + * Base heap values. + */ + protected long[] twovals; + + /** + * Extension heap. + */ + protected double[] fourheap; + + /** + * Extension heapvalues. + */ + protected long[] fourvals; + + /** + * Current size of heap. + */ + protected int size; + + /** + * (Structural) modification counter. Used to invalidate iterators. + */ + protected int modCount = 0; + + /** + * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. + */ + private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; + + /** + * Initial size of the 2-ary heap. + */ + private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; + + /** + * Initial size of 4-ary heap when initialized. + * + * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 + * + * 85 = 4-ary heap of height 3: 21 + 4*4*4 + * + * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 + * + * Since we last grew by 255 (to 511), let's use 341. + */ + private final static int FOUR_HEAP_INITIAL_SIZE = 341; + + /** + * Constructor, with default size. + */ + public DoubleLongMaxHeap() { + super(); + double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE]; + long[] twovals = new long[TWO_HEAP_INITIAL_SIZE]; + + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = null; + this.fourvals = null; + this.size = 0; + this.modCount = 0; + } + + /** + * Constructor, with given minimum size. + * + * @param minsize Minimum size + */ + public DoubleLongMaxHeap(int minsize) { + super(); + if (minsize < TWO_HEAP_MAX_SIZE) { + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + double[] twoheap = new double[size]; + long[] twovals = new long[size]; + + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = null; + this.fourvals = null; + } else { + double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE]; + long[] twovals = new long[TWO_HEAP_INITIAL_SIZE]; + double[] fourheap = new double[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + long[] fourvals = new long[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = fourheap; + this.fourvals = fourvals; + } + this.size = 0; + this.modCount = 0; + } + + @Override + public void clear() { + size = 0; + ++modCount; + fourheap = null; + fourvals = null; + Arrays.fill(twoheap, 0.0); + Arrays.fill(twovals, 0); + } + + @Override + public int size() { + return size; + } + + @Override + public boolean isEmpty() { + return (size == 0); + } + + @Override + public void add(double o, long v) { + final double co = o; + final long cv = v; + // System.err.println("Add: " + o); + if (size < TWO_HEAP_MAX_SIZE) { + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); + twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1); + } + final int twopos = size; + twoheap[twopos] = co; + twovals[twopos] = cv; + ++size; + heapifyUp2(twopos, co, cv); + ++modCount; + } else { + final int fourpos = size - TWO_HEAP_MAX_SIZE; + if (fourheap == null) { + fourheap = new double[FOUR_HEAP_INITIAL_SIZE]; + fourvals = new long[FOUR_HEAP_INITIAL_SIZE]; + } else if (fourpos >= fourheap.length) { + // Grow extension heap by half. + fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); + fourvals = Arrays.copyOf(fourvals, fourvals.length + (fourvals.length >> 1)); + } + fourheap[fourpos] = co; + fourvals[fourpos] = cv; + ++size; + heapifyUp4(fourpos, co, cv); + ++modCount; + } + } + + @Override + public void add(double key, long val, int max) { + if (size < max) { + add(key, val); + } else if (twoheap[0] >= key) { + replaceTopElement(key, val); + } + } + + @Override + public void replaceTopElement(double reinsert, long val) { + heapifyDown(reinsert, val); + ++modCount; + } + + /** + * Heapify-Up method for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + * @param val Current value + */ + private void heapifyUp2(int twopos, double cur, long val) { + while (twopos > 0) { + final int parent = (twopos - 1) >>> 1; + double par = twoheap[parent]; + if (cur <= par) { + break; + } + twoheap[twopos] = par; + twovals[twopos] = twovals[parent]; + twopos = parent; + } + twoheap[twopos] = cur; + twovals[twopos] = val; + } + + /** + * Heapify-Up method for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + * @param val Current value + */ + private void heapifyUp4(int fourpos, double cur, long val) { + while (fourpos > 0) { + final int parent = (fourpos - 1) >> 2; + double par = fourheap[parent]; + if (cur <= par) { + break; + } + fourheap[fourpos] = par; + fourvals[fourpos] = fourvals[parent]; + fourpos = parent; + } + if (fourpos == 0 && twoheap[0] < cur) { + fourheap[0] = twoheap[0]; + fourvals[0] = twovals[0]; + twoheap[0] = cur; + twovals[0] = val; + } else { + fourheap[fourpos] = cur; + fourvals[fourpos] = val; + } + } + + @Override + public void poll() { + --size; + // Replacement object: + if (size >= TWO_HEAP_MAX_SIZE) { + final int last = size - TWO_HEAP_MAX_SIZE; + final double reinsert = fourheap[last]; + final long reinsertv = fourvals[last]; + fourheap[last] = 0.0; + fourvals[last] = 0; + heapifyDown(reinsert, reinsertv); + } else if (size > 0) { + final double reinsert = twoheap[size]; + final long reinsertv = twovals[size]; + twoheap[size] = 0.0; + twovals[size] = 0; + heapifyDown(reinsert, reinsertv); + } else { + twoheap[0] = 0.0; + twovals[0] = 0; + } + ++modCount; + } + + /** + * Invoke heapify-down for the root object. + * + * @param reinsert Object to insert. + * @param val Value to reinsert. + */ + private void heapifyDown(double reinsert, long val) { + if (size > TWO_HEAP_MAX_SIZE) { + // Special case: 3-ary situation. + final int best = (twoheap[1] >= twoheap[2]) ? 1 : 2; + if (fourheap[0] > twoheap[best]) { + twoheap[0] = fourheap[0]; + twovals[0] = fourvals[0]; + heapifyDown4(0, reinsert, val); + } else { + twoheap[0] = twoheap[best]; + twovals[0] = twovals[best]; + heapifyDown2(best, reinsert, val); + } + return; + } + heapifyDown2(0, reinsert, val); + } + + /** + * Heapify-Down for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + * @param val Value to reinsert. + */ + private void heapifyDown2(int twopos, double cur, long val) { + final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + while (twopos < stop) { + int bestchild = (twopos << 1) + 1; + double best = twoheap[bestchild]; + final int right = bestchild + 1; + if (right < size && best < twoheap[right]) { + bestchild = right; + best = twoheap[right]; + } + if (cur >= best) { + break; + } + twoheap[twopos] = best; + twovals[twopos] = twovals[bestchild]; + twopos = bestchild; + } + twoheap[twopos] = cur; + twovals[twopos] = val; + } + + /** + * Heapify-Down for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + * @param val Value to reinsert. + */ + private void heapifyDown4(int fourpos, double cur, long val) { + final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; + while (fourpos < stop) { + final int child = (fourpos << 2) + 1; + double best = fourheap[child]; + int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; + if (size > minsize) { + double nextchild = fourheap[candidate]; + if (best < nextchild) { + bestchild = candidate; + best = nextchild; + } + + minsize += 2; + if (size >= minsize) { + nextchild = fourheap[++candidate]; + if (best < nextchild) { + bestchild = candidate; + best = nextchild; + } + + if (size > minsize) { + nextchild = fourheap[++candidate]; + if (best < nextchild) { + bestchild = candidate; + best = nextchild; + } + } + } + } + if (cur >= best) { + break; + } + fourheap[fourpos] = best; + fourvals[fourpos] = fourvals[bestchild]; + fourpos = bestchild; + } + fourheap[fourpos] = cur; + fourvals[fourpos] = val; + } + + @Override + public double peekKey() { + return twoheap[0]; + } + + @Override + public long peekValue() { + return twovals[0]; + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append(DoubleLongMaxHeap.class.getSimpleName()).append(" ["); + for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) { + buf.append(iter.getKey()).append(':').append(iter.getValue()).append(','); + } + buf.append(']'); + return buf.toString(); + } + + @Override + public UnsortedIter unsortedIter() { + return new UnsortedIter(); + } + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * Use this class as follows: + * + * <pre> + * {@code + * for (DoubleLongHeap.UnsortedIter iter = heap.unsortedIter(); iter.valid(); iter.next()) { + * doSomething(iter.get()); + * } + * } + * </pre> + * + * @author Erich Schubert + */ + private class UnsortedIter implements DoubleLongHeap.UnsortedIter { + /** + * Iterator position. + */ + protected int pos = 0; + + /** + * Modification counter we were initialized at. + */ + protected final int myModCount = modCount; + + @Override + public boolean valid() { + if (modCount != myModCount) { + throw new ConcurrentModificationException(); + } + return pos < size; + } + + @Override + public void advance() { + pos++; + } + + @Override + public double getKey() { + return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + } + + @Override + public long getValue() { + return ((pos < TWO_HEAP_MAX_SIZE) ? twovals[pos] : fourvals[pos - TWO_HEAP_MAX_SIZE]); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleLongMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleLongMinHeap.java new file mode 100644 index 00000000..d38eb6e3 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleLongMinHeap.java @@ -0,0 +1,478 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Arrays; +import java.util.ConcurrentModificationException; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + +/** + * Advanced priority queue class, based on a binary heap (for small sizes), + * which will for larger heaps be accompanied by a 4-ary heap (attached below + * the root of the two-ary heap, making the root actually 3-ary). + * + * This code was automatically instantiated for the types: Double and Long + * + * This combination was found to work quite well in benchmarks, but YMMV. + * + * Some other observations from benchmarking: + * <ul> + * <li>Bulk loading did not improve things</li> + * <li>Primitive heaps are substantially faster.</li> + * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and + * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> + * <li>Workload makes a huge difference. A load-once, poll-until-empty priority + * queue is something different than e.g. a top-k heap, which will see a lot of + * top element replacements.</li> + * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for + * top-k make a difference.</li> + * <li>Different day, different benchmark results ...</li> + * </ul> + * + * @author Erich Schubert + * + * @apiviz.has UnsortedIter + */ +public class DoubleLongMinHeap implements DoubleLongHeap { + /** + * Base heap. + */ + protected double[] twoheap; + + /** + * Base heap values. + */ + protected long[] twovals; + + /** + * Extension heap. + */ + protected double[] fourheap; + + /** + * Extension heapvalues. + */ + protected long[] fourvals; + + /** + * Current size of heap. + */ + protected int size; + + /** + * (Structural) modification counter. Used to invalidate iterators. + */ + protected int modCount = 0; + + /** + * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. + */ + private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; + + /** + * Initial size of the 2-ary heap. + */ + private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; + + /** + * Initial size of 4-ary heap when initialized. + * + * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 + * + * 85 = 4-ary heap of height 3: 21 + 4*4*4 + * + * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 + * + * Since we last grew by 255 (to 511), let's use 341. + */ + private final static int FOUR_HEAP_INITIAL_SIZE = 341; + + /** + * Constructor, with default size. + */ + public DoubleLongMinHeap() { + super(); + double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE]; + long[] twovals = new long[TWO_HEAP_INITIAL_SIZE]; + + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = null; + this.fourvals = null; + this.size = 0; + this.modCount = 0; + } + + /** + * Constructor, with given minimum size. + * + * @param minsize Minimum size + */ + public DoubleLongMinHeap(int minsize) { + super(); + if (minsize < TWO_HEAP_MAX_SIZE) { + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + double[] twoheap = new double[size]; + long[] twovals = new long[size]; + + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = null; + this.fourvals = null; + } else { + double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE]; + long[] twovals = new long[TWO_HEAP_INITIAL_SIZE]; + double[] fourheap = new double[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + long[] fourvals = new long[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = fourheap; + this.fourvals = fourvals; + } + this.size = 0; + this.modCount = 0; + } + + @Override + public void clear() { + size = 0; + ++modCount; + fourheap = null; + fourvals = null; + Arrays.fill(twoheap, 0.0); + Arrays.fill(twovals, 0); + } + + @Override + public int size() { + return size; + } + + @Override + public boolean isEmpty() { + return (size == 0); + } + + @Override + public void add(double o, long v) { + final double co = o; + final long cv = v; + // System.err.println("Add: " + o); + if (size < TWO_HEAP_MAX_SIZE) { + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); + twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1); + } + final int twopos = size; + twoheap[twopos] = co; + twovals[twopos] = cv; + ++size; + heapifyUp2(twopos, co, cv); + ++modCount; + } else { + final int fourpos = size - TWO_HEAP_MAX_SIZE; + if (fourheap == null) { + fourheap = new double[FOUR_HEAP_INITIAL_SIZE]; + fourvals = new long[FOUR_HEAP_INITIAL_SIZE]; + } else if (fourpos >= fourheap.length) { + // Grow extension heap by half. + fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); + fourvals = Arrays.copyOf(fourvals, fourvals.length + (fourvals.length >> 1)); + } + fourheap[fourpos] = co; + fourvals[fourpos] = cv; + ++size; + heapifyUp4(fourpos, co, cv); + ++modCount; + } + } + + @Override + public void add(double key, long val, int max) { + if (size < max) { + add(key, val); + } else if (twoheap[0] <= key) { + replaceTopElement(key, val); + } + } + + @Override + public void replaceTopElement(double reinsert, long val) { + heapifyDown(reinsert, val); + ++modCount; + } + + /** + * Heapify-Up method for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + * @param val Current value + */ + private void heapifyUp2(int twopos, double cur, long val) { + while (twopos > 0) { + final int parent = (twopos - 1) >>> 1; + double par = twoheap[parent]; + if (cur >= par) { + break; + } + twoheap[twopos] = par; + twovals[twopos] = twovals[parent]; + twopos = parent; + } + twoheap[twopos] = cur; + twovals[twopos] = val; + } + + /** + * Heapify-Up method for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + * @param val Current value + */ + private void heapifyUp4(int fourpos, double cur, long val) { + while (fourpos > 0) { + final int parent = (fourpos - 1) >> 2; + double par = fourheap[parent]; + if (cur >= par) { + break; + } + fourheap[fourpos] = par; + fourvals[fourpos] = fourvals[parent]; + fourpos = parent; + } + if (fourpos == 0 && twoheap[0] > cur) { + fourheap[0] = twoheap[0]; + fourvals[0] = twovals[0]; + twoheap[0] = cur; + twovals[0] = val; + } else { + fourheap[fourpos] = cur; + fourvals[fourpos] = val; + } + } + + @Override + public void poll() { + --size; + // Replacement object: + if (size >= TWO_HEAP_MAX_SIZE) { + final int last = size - TWO_HEAP_MAX_SIZE; + final double reinsert = fourheap[last]; + final long reinsertv = fourvals[last]; + fourheap[last] = 0.0; + fourvals[last] = 0; + heapifyDown(reinsert, reinsertv); + } else if (size > 0) { + final double reinsert = twoheap[size]; + final long reinsertv = twovals[size]; + twoheap[size] = 0.0; + twovals[size] = 0; + heapifyDown(reinsert, reinsertv); + } else { + twoheap[0] = 0.0; + twovals[0] = 0; + } + ++modCount; + } + + /** + * Invoke heapify-down for the root object. + * + * @param reinsert Object to insert. + * @param val Value to reinsert. + */ + private void heapifyDown(double reinsert, long val) { + if (size > TWO_HEAP_MAX_SIZE) { + // Special case: 3-ary situation. + final int best = (twoheap[1] <= twoheap[2]) ? 1 : 2; + if (fourheap[0] < twoheap[best]) { + twoheap[0] = fourheap[0]; + twovals[0] = fourvals[0]; + heapifyDown4(0, reinsert, val); + } else { + twoheap[0] = twoheap[best]; + twovals[0] = twovals[best]; + heapifyDown2(best, reinsert, val); + } + return; + } + heapifyDown2(0, reinsert, val); + } + + /** + * Heapify-Down for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + * @param val Value to reinsert. + */ + private void heapifyDown2(int twopos, double cur, long val) { + final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + while (twopos < stop) { + int bestchild = (twopos << 1) + 1; + double best = twoheap[bestchild]; + final int right = bestchild + 1; + if (right < size && best > twoheap[right]) { + bestchild = right; + best = twoheap[right]; + } + if (cur <= best) { + break; + } + twoheap[twopos] = best; + twovals[twopos] = twovals[bestchild]; + twopos = bestchild; + } + twoheap[twopos] = cur; + twovals[twopos] = val; + } + + /** + * Heapify-Down for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + * @param val Value to reinsert. + */ + private void heapifyDown4(int fourpos, double cur, long val) { + final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; + while (fourpos < stop) { + final int child = (fourpos << 2) + 1; + double best = fourheap[child]; + int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; + if (size > minsize) { + double nextchild = fourheap[candidate]; + if (best > nextchild) { + bestchild = candidate; + best = nextchild; + } + + minsize += 2; + if (size >= minsize) { + nextchild = fourheap[++candidate]; + if (best > nextchild) { + bestchild = candidate; + best = nextchild; + } + + if (size > minsize) { + nextchild = fourheap[++candidate]; + if (best > nextchild) { + bestchild = candidate; + best = nextchild; + } + } + } + } + if (cur <= best) { + break; + } + fourheap[fourpos] = best; + fourvals[fourpos] = fourvals[bestchild]; + fourpos = bestchild; + } + fourheap[fourpos] = cur; + fourvals[fourpos] = val; + } + + @Override + public double peekKey() { + return twoheap[0]; + } + + @Override + public long peekValue() { + return twovals[0]; + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append(DoubleLongMinHeap.class.getSimpleName()).append(" ["); + for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) { + buf.append(iter.getKey()).append(':').append(iter.getValue()).append(','); + } + buf.append(']'); + return buf.toString(); + } + + @Override + public UnsortedIter unsortedIter() { + return new UnsortedIter(); + } + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * Use this class as follows: + * + * <pre> + * {@code + * for (DoubleLongHeap.UnsortedIter iter = heap.unsortedIter(); iter.valid(); iter.next()) { + * doSomething(iter.get()); + * } + * } + * </pre> + * + * @author Erich Schubert + */ + private class UnsortedIter implements DoubleLongHeap.UnsortedIter { + /** + * Iterator position. + */ + protected int pos = 0; + + /** + * Modification counter we were initialized at. + */ + protected final int myModCount = modCount; + + @Override + public boolean valid() { + if (modCount != myModCount) { + throw new ConcurrentModificationException(); + } + return pos < size; + } + + @Override + public void advance() { + pos++; + } + + @Override + public double getKey() { + return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + } + + @Override + public long getValue() { + return ((pos < TWO_HEAP_MAX_SIZE) ? twovals[pos] : fourvals[pos - TWO_HEAP_MAX_SIZE]); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMaxHeap.java index 1b7d6037..7ea28f14 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMaxHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMaxHeap.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,40 +23,400 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; along with this program. If not, see <http://www.gnu.org/licenses/>. */ +import java.util.Arrays; +import java.util.ConcurrentModificationException; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + /** - * Basic in-memory heap structure. + * Advanced priority queue class, based on a binary heap (for small sizes), + * which will for larger heaps be accompanied by a 4-ary heap (attached below + * the root of the two-ary heap, making the root actually 3-ary). + * + * This code was automatically instantiated for the type: Double * - * This heap is built lazily: if you first add many elements, then poll the - * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log - * n). This is implemented via a simple validTo counter. + * This combination was found to work quite well in benchmarks, but YMMV. + * + * Some other observations from benchmarking: + * <ul> + * <li>Bulk loading did not improve things</li> + * <li>Primitive heaps are substantially faster.</li> + * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and + * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> + * <li>Workload makes a huge difference. A load-once, poll-until-empty priority + * queue is something different than e.g. a top-k heap, which will see a lot of + * top element replacements.</li> + * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for + * top-k make a difference.</li> + * <li>Different day, different benchmark results ...</li> + * </ul> * * @author Erich Schubert + * + * @apiviz.has UnsortedIter */ -public class DoubleMaxHeap extends DoubleHeap { +public class DoubleMaxHeap implements DoubleHeap { + /** + * Base heap. + */ + protected double[] twoheap; + + /** + * Extension heap. + */ + protected double[] fourheap; + + /** + * Current size of heap. + */ + protected int size; + + /** + * (Structural) modification counter. Used to invalidate iterators. + */ + protected int modCount = 0; + + /** + * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. + */ + private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; + /** - * Constructor with default capacity. + * Initial size of the 2-ary heap. + */ + private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; + + /** + * Initial size of 4-ary heap when initialized. + * + * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 + * + * 85 = 4-ary heap of height 3: 21 + 4*4*4 + * + * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 + * + * Since we last grew by 255 (to 511), let's use 341. + */ + private final static int FOUR_HEAP_INITIAL_SIZE = 341; + + /** + * Constructor, with default size. */ public DoubleMaxHeap() { - super(DEFAULT_INITIAL_CAPACITY); + super(); + double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE]; + + this.twoheap = twoheap; + this.fourheap = null; + this.size = 0; + this.modCount = 0; } /** - * Constructor with initial capacity. + * Constructor, with given minimum size. * - * @param size initial capacity + * @param minsize Minimum size */ - public DoubleMaxHeap(int size) { - super(size); + public DoubleMaxHeap(int minsize) { + super(); + if (minsize < TWO_HEAP_MAX_SIZE) { + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + double[] twoheap = new double[size]; + + this.twoheap = twoheap; + this.fourheap = null; + } else { + double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE]; + double[] fourheap = new double[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + this.twoheap = twoheap; + this.fourheap = fourheap; + } + this.size = 0; + this.modCount = 0; + } + + @Override + public void clear() { + size = 0; + ++modCount; + fourheap = null; + Arrays.fill(twoheap, 0.0); + } + + @Override + public int size() { + return size; + } + + @Override + public boolean isEmpty() { + return (size == 0); + } + + @Override + public void add(double o) { + final double co = o; + // System.err.println("Add: " + o); + if (size < TWO_HEAP_MAX_SIZE) { + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); + } + final int twopos = size; + twoheap[twopos] = co; + ++size; + heapifyUp2(twopos, co); + ++modCount; + } else { + final int fourpos = size - TWO_HEAP_MAX_SIZE; + if (fourheap == null) { + fourheap = new double[FOUR_HEAP_INITIAL_SIZE]; + } else if (fourpos >= fourheap.length) { + // Grow extension heap by half. + fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); + } + fourheap[fourpos] = co; + ++size; + heapifyUp4(fourpos, co); + ++modCount; + } + } + + @Override + public void add(double key, int max) { + if (size < max) { + add(key); + } else if (twoheap[0] >= key) { + replaceTopElement(key); + } + } + + @Override + public double replaceTopElement(double reinsert) { + final double ret = twoheap[0]; + heapifyDown( reinsert); + ++modCount; + return ret; + } + + /** + * Heapify-Up method for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + */ + private void heapifyUp2(int twopos, double cur) { + while (twopos > 0) { + final int parent = (twopos - 1) >>> 1; + double par = twoheap[parent]; + if (cur <= par) { + break; + } + twoheap[twopos] = par; + twopos = parent; + } + twoheap[twopos] = cur; + } + + /** + * Heapify-Up method for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + */ + private void heapifyUp4(int fourpos, double cur) { + while (fourpos > 0) { + final int parent = (fourpos - 1) >> 2; + double par = fourheap[parent]; + if (cur <= par) { + break; + } + fourheap[fourpos] = par; + fourpos = parent; + } + if (fourpos == 0 && twoheap[0] < cur) { + fourheap[0] = twoheap[0]; + twoheap[0] = cur; + } else { + fourheap[fourpos] = cur; + } + } + + @Override + public double poll() { + final double ret = twoheap[0]; + --size; + // Replacement object: + if (size >= TWO_HEAP_MAX_SIZE) { + final int last = size - TWO_HEAP_MAX_SIZE; + final double reinsert = fourheap[last]; + fourheap[last] = 0.0; + heapifyDown(reinsert); + } else if (size > 0) { + final double reinsert = twoheap[size]; + twoheap[size] = 0.0; + heapifyDown(reinsert); + } else { + twoheap[0] = 0.0; + } + ++modCount; + return ret; + } + + /** + * Invoke heapify-down for the root object. + * + * @param reinsert Object to insert. + */ + private void heapifyDown(double reinsert) { + if (size > TWO_HEAP_MAX_SIZE) { + // Special case: 3-ary situation. + final int best = (twoheap[1] >= twoheap[2]) ? 1 : 2; + if (fourheap[0] > twoheap[best]) { + twoheap[0] = fourheap[0]; + heapifyDown4(0, reinsert); + } else { + twoheap[0] = twoheap[best]; + heapifyDown2(best, reinsert); + } + return; + } + heapifyDown2(0, reinsert); + } + + /** + * Heapify-Down for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + */ + private void heapifyDown2(int twopos, double cur) { + final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + while (twopos < stop) { + int bestchild = (twopos << 1) + 1; + double best = twoheap[bestchild]; + final int right = bestchild + 1; + if (right < size && best < twoheap[right]) { + bestchild = right; + best = twoheap[right]; + } + if (cur >= best) { + break; + } + twoheap[twopos] = best; + twopos = bestchild; + } + twoheap[twopos] = cur; } /** - * Compare two objects + * Heapify-Down for 4-ary heap. * - * @param o1 First object - * @param o2 Second object + * @param fourpos Position in 4-ary heap. + * @param cur Current object */ + private void heapifyDown4(int fourpos, double cur) { + final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; + while (fourpos < stop) { + final int child = (fourpos << 2) + 1; + double best = fourheap[child]; + int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; + if (size > minsize) { + double nextchild = fourheap[candidate]; + if (best < nextchild) { + bestchild = candidate; + best = nextchild; + } + + minsize += 2; + if (size >= minsize) { + nextchild = fourheap[++candidate]; + if (best < nextchild) { + bestchild = candidate; + best = nextchild; + } + + if (size > minsize) { + nextchild = fourheap[++candidate]; + if (best < nextchild) { + bestchild = candidate; + best = nextchild; + } + } + } + } + if (cur >= best) { + break; + } + fourheap[fourpos] = best; + fourpos = bestchild; + } + fourheap[fourpos] = cur; + } + + @Override + public double peek() { + return twoheap[0]; + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append(DoubleMaxHeap.class.getSimpleName()).append(" ["); + for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) { + buf.append(iter.get()).append(','); + } + buf.append(']'); + return buf.toString(); + } + @Override - protected boolean comp(double o1, double o2) { - return o1 < o2; + public UnsortedIter unsortedIter() { + return new UnsortedIter(); + } + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * Use this class as follows: + * + * <pre> + * {@code + * for (DoubleHeap.UnsortedIter iter = heap.unsortedIter(); iter.valid(); iter.next()) { + * doSomething(iter.get()); + * } + * } + * </pre> + * + * @author Erich Schubert + */ + private class UnsortedIter implements DoubleHeap.UnsortedIter { + /** + * Iterator position. + */ + protected int pos = 0; + + /** + * Modification counter we were initialized at. + */ + protected final int myModCount = modCount; + + @Override + public boolean valid() { + if (modCount != myModCount) { + throw new ConcurrentModificationException(); + } + return pos < size; + } + + @Override + public void advance() { + pos++; + } + + @Override + public double get() { + return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + } } } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMinHeap.java index 2ce05ff9..e9334153 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMinHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMinHeap.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,40 +23,400 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; along with this program. If not, see <http://www.gnu.org/licenses/>. */ +import java.util.Arrays; +import java.util.ConcurrentModificationException; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + /** - * Basic in-memory heap structure. + * Advanced priority queue class, based on a binary heap (for small sizes), + * which will for larger heaps be accompanied by a 4-ary heap (attached below + * the root of the two-ary heap, making the root actually 3-ary). + * + * This code was automatically instantiated for the type: Double * - * This heap is built lazily: if you first add many elements, then poll the - * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log - * n). This is implemented via a simple validTo counter. + * This combination was found to work quite well in benchmarks, but YMMV. + * + * Some other observations from benchmarking: + * <ul> + * <li>Bulk loading did not improve things</li> + * <li>Primitive heaps are substantially faster.</li> + * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and + * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> + * <li>Workload makes a huge difference. A load-once, poll-until-empty priority + * queue is something different than e.g. a top-k heap, which will see a lot of + * top element replacements.</li> + * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for + * top-k make a difference.</li> + * <li>Different day, different benchmark results ...</li> + * </ul> * * @author Erich Schubert + * + * @apiviz.has UnsortedIter */ -public class DoubleMinHeap extends DoubleHeap { +public class DoubleMinHeap implements DoubleHeap { + /** + * Base heap. + */ + protected double[] twoheap; + + /** + * Extension heap. + */ + protected double[] fourheap; + + /** + * Current size of heap. + */ + protected int size; + + /** + * (Structural) modification counter. Used to invalidate iterators. + */ + protected int modCount = 0; + + /** + * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. + */ + private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; + /** - * Constructor with default capacity. + * Initial size of the 2-ary heap. + */ + private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; + + /** + * Initial size of 4-ary heap when initialized. + * + * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 + * + * 85 = 4-ary heap of height 3: 21 + 4*4*4 + * + * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 + * + * Since we last grew by 255 (to 511), let's use 341. + */ + private final static int FOUR_HEAP_INITIAL_SIZE = 341; + + /** + * Constructor, with default size. */ public DoubleMinHeap() { - super(DEFAULT_INITIAL_CAPACITY); + super(); + double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE]; + + this.twoheap = twoheap; + this.fourheap = null; + this.size = 0; + this.modCount = 0; } /** - * Constructor with initial capacity. + * Constructor, with given minimum size. * - * @param size initial capacity + * @param minsize Minimum size */ - public DoubleMinHeap(int size) { - super(size); + public DoubleMinHeap(int minsize) { + super(); + if (minsize < TWO_HEAP_MAX_SIZE) { + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + double[] twoheap = new double[size]; + + this.twoheap = twoheap; + this.fourheap = null; + } else { + double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE]; + double[] fourheap = new double[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + this.twoheap = twoheap; + this.fourheap = fourheap; + } + this.size = 0; + this.modCount = 0; + } + + @Override + public void clear() { + size = 0; + ++modCount; + fourheap = null; + Arrays.fill(twoheap, 0.0); + } + + @Override + public int size() { + return size; + } + + @Override + public boolean isEmpty() { + return (size == 0); + } + + @Override + public void add(double o) { + final double co = o; + // System.err.println("Add: " + o); + if (size < TWO_HEAP_MAX_SIZE) { + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); + } + final int twopos = size; + twoheap[twopos] = co; + ++size; + heapifyUp2(twopos, co); + ++modCount; + } else { + final int fourpos = size - TWO_HEAP_MAX_SIZE; + if (fourheap == null) { + fourheap = new double[FOUR_HEAP_INITIAL_SIZE]; + } else if (fourpos >= fourheap.length) { + // Grow extension heap by half. + fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); + } + fourheap[fourpos] = co; + ++size; + heapifyUp4(fourpos, co); + ++modCount; + } + } + + @Override + public void add(double key, int max) { + if (size < max) { + add(key); + } else if (twoheap[0] <= key) { + replaceTopElement(key); + } + } + + @Override + public double replaceTopElement(double reinsert) { + final double ret = twoheap[0]; + heapifyDown( reinsert); + ++modCount; + return ret; + } + + /** + * Heapify-Up method for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + */ + private void heapifyUp2(int twopos, double cur) { + while (twopos > 0) { + final int parent = (twopos - 1) >>> 1; + double par = twoheap[parent]; + if (cur >= par) { + break; + } + twoheap[twopos] = par; + twopos = parent; + } + twoheap[twopos] = cur; + } + + /** + * Heapify-Up method for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + */ + private void heapifyUp4(int fourpos, double cur) { + while (fourpos > 0) { + final int parent = (fourpos - 1) >> 2; + double par = fourheap[parent]; + if (cur >= par) { + break; + } + fourheap[fourpos] = par; + fourpos = parent; + } + if (fourpos == 0 && twoheap[0] > cur) { + fourheap[0] = twoheap[0]; + twoheap[0] = cur; + } else { + fourheap[fourpos] = cur; + } + } + + @Override + public double poll() { + final double ret = twoheap[0]; + --size; + // Replacement object: + if (size >= TWO_HEAP_MAX_SIZE) { + final int last = size - TWO_HEAP_MAX_SIZE; + final double reinsert = fourheap[last]; + fourheap[last] = 0.0; + heapifyDown(reinsert); + } else if (size > 0) { + final double reinsert = twoheap[size]; + twoheap[size] = 0.0; + heapifyDown(reinsert); + } else { + twoheap[0] = 0.0; + } + ++modCount; + return ret; + } + + /** + * Invoke heapify-down for the root object. + * + * @param reinsert Object to insert. + */ + private void heapifyDown(double reinsert) { + if (size > TWO_HEAP_MAX_SIZE) { + // Special case: 3-ary situation. + final int best = (twoheap[1] <= twoheap[2]) ? 1 : 2; + if (fourheap[0] < twoheap[best]) { + twoheap[0] = fourheap[0]; + heapifyDown4(0, reinsert); + } else { + twoheap[0] = twoheap[best]; + heapifyDown2(best, reinsert); + } + return; + } + heapifyDown2(0, reinsert); + } + + /** + * Heapify-Down for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + */ + private void heapifyDown2(int twopos, double cur) { + final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + while (twopos < stop) { + int bestchild = (twopos << 1) + 1; + double best = twoheap[bestchild]; + final int right = bestchild + 1; + if (right < size && best > twoheap[right]) { + bestchild = right; + best = twoheap[right]; + } + if (cur <= best) { + break; + } + twoheap[twopos] = best; + twopos = bestchild; + } + twoheap[twopos] = cur; } /** - * Compare two objects + * Heapify-Down for 4-ary heap. * - * @param o1 First object - * @param o2 Second object + * @param fourpos Position in 4-ary heap. + * @param cur Current object */ + private void heapifyDown4(int fourpos, double cur) { + final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; + while (fourpos < stop) { + final int child = (fourpos << 2) + 1; + double best = fourheap[child]; + int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; + if (size > minsize) { + double nextchild = fourheap[candidate]; + if (best > nextchild) { + bestchild = candidate; + best = nextchild; + } + + minsize += 2; + if (size >= minsize) { + nextchild = fourheap[++candidate]; + if (best > nextchild) { + bestchild = candidate; + best = nextchild; + } + + if (size > minsize) { + nextchild = fourheap[++candidate]; + if (best > nextchild) { + bestchild = candidate; + best = nextchild; + } + } + } + } + if (cur <= best) { + break; + } + fourheap[fourpos] = best; + fourpos = bestchild; + } + fourheap[fourpos] = cur; + } + + @Override + public double peek() { + return twoheap[0]; + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append(DoubleMinHeap.class.getSimpleName()).append(" ["); + for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) { + buf.append(iter.get()).append(','); + } + buf.append(']'); + return buf.toString(); + } + @Override - protected boolean comp(double o1, double o2) { - return o1 > o2; + public UnsortedIter unsortedIter() { + return new UnsortedIter(); + } + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * Use this class as follows: + * + * <pre> + * {@code + * for (DoubleHeap.UnsortedIter iter = heap.unsortedIter(); iter.valid(); iter.next()) { + * doSomething(iter.get()); + * } + * } + * </pre> + * + * @author Erich Schubert + */ + private class UnsortedIter implements DoubleHeap.UnsortedIter { + /** + * Iterator position. + */ + protected int pos = 0; + + /** + * Modification counter we were initialized at. + */ + protected final int myModCount = modCount; + + @Override + public boolean valid() { + if (modCount != myModCount) { + throw new ConcurrentModificationException(); + } + return pos < size; + } + + @Override + public void advance() { + pos++; + } + + @Override + public double get() { + return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + } } } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjMaxHeap.java deleted file mode 100644 index 8417309a..00000000 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjMaxHeap.java +++ /dev/null @@ -1,328 +0,0 @@ -package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; - -/* - This file is part of ELKI: - Environment for Developing KDD-Applications Supported by Index-Structures - - Copyright (C) 2012 - Ludwig-Maximilians-Universität München - Lehr- und Forschungseinheit für Datenbanksysteme - ELKI Development Team - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -import java.util.Arrays; -import java.util.Comparator; - -import de.lmu.ifi.dbs.elki.math.MathUtil; - -/** - * Basic in-memory heap structure. - * - * This heap is built lazily: if you first add many elements, then poll the - * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log - * n). This is implemented via a simple validTo counter. - * - * @author Erich Schubert - * - * @param <V> value type - */ -public class DoubleObjMaxHeap<V> { - /** - * Heap storage: keys - */ - protected double[] keys; - - /** - * Heap storage: values - */ - protected Object[] values; - - /** - * Current number of objects - */ - protected int size = 0; - - /** - * Indicate up to where the heap is valid - */ - protected int validSize = 0; - - /** - * (Structural) modification counter. Used to invalidate iterators. - */ - public transient int modCount = 0; - - /** - * Default initial capacity - */ - private static final int DEFAULT_INITIAL_CAPACITY = 11; - - /** - * Default constructor: default capacity, natural ordering. - */ - public DoubleObjMaxHeap() { - this(DEFAULT_INITIAL_CAPACITY); - } - - /** - * Constructor with initial capacity and {@link Comparator}. - * - * @param size initial capacity - */ - public DoubleObjMaxHeap(int size) { - super(); - this.size = 0; - this.keys = new double[size]; - this.values = new Object[size]; - } - - /** - * Add a key-value pair to the heap - * - * @param key Key - * @param val Value - * @return Success code - */ - public boolean add(double key, V val) { - // resize when needed - if(size + 1 > keys.length) { - resize(size + 1); - } - // final int pos = size; - this.keys[size] = key; - this.values[size] = val; - this.size += 1; - heapifyUp(size - 1, key, val); - validSize += 1; - // We have changed - return true according to {@link Collection#put} - modCount++; - return true; - } - - /** - * Get the current top key - * - * @return Top key - */ - public double peekKey() { - if(size == 0) { - throw new ArrayIndexOutOfBoundsException("Peek() on an empty heap!"); - } - ensureValid(); - return keys[0]; - } - - /** - * Get the current top value - * - * @return Value - */ - @SuppressWarnings("unchecked") - public V peekValue() { - if(size == 0) { - throw new ArrayIndexOutOfBoundsException("Peek() on an empty heap!"); - } - ensureValid(); - return (V) values[0]; - } - - /** - * Remove the first element - */ - public void poll() { - removeAt(0); - } - - /** - * Repair the heap - */ - protected void ensureValid() { - if(validSize != size) { - if(size > 1) { - // Parent of first invalid - int nextmin = validSize > 0 ? ((validSize - 1) >>> 1) : 0; - int curmin = MathUtil.nextAllOnesInt(nextmin); // Next line - int nextmax = curmin - 1; // End of valid line - int pos = (size - 2) >>> 1; // Parent of last element - // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin+", "+nextmin); - while(pos >= nextmin) { - // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin); - while(pos >= curmin) { - if(!heapifyDown(pos, keys[pos], values[pos])) { - final int parent = (pos - 1) >>> 1; - if(parent < curmin) { - nextmin = Math.min(nextmin, parent); - nextmax = Math.max(nextmax, parent); - } - } - pos--; - } - curmin = nextmin; - pos = Math.min(pos, nextmax); - nextmax = -1; - } - } - validSize = size; - } - } - - /** - * Remove the element at the given position. - * - * @param pos Element position. - */ - protected void removeAt(int pos) { - if(pos < 0 || pos >= size) { - return; - } - // Replacement object: - final double reinkey = keys[size - 1]; - final Object reinval = values[size - 1]; - values[size - 1] = null; - // Keep heap in sync - if(validSize == size) { - size -= 1; - validSize -= 1; - heapifyDown(pos, reinkey, reinval); - } - else { - size -= 1; - validSize = Math.min(pos >>> 1, validSize); - keys[pos] = reinkey; - values[pos] = reinval; - } - modCount++; - } - - /** - * Execute a "Heapify Upwards" aka "SiftUp". Used in insertions. - * - * @param pos insertion position - * @param curkey Current key - * @param curval Current value - */ - protected void heapifyUp(int pos, double curkey, Object curval) { - while(pos > 0) { - final int parent = (pos - 1) >>> 1; - double parkey = keys[parent]; - - if(curkey <= parkey) { // Compare - break; - } - keys[pos] = parkey; - values[pos] = values[parent]; - pos = parent; - } - keys[pos] = curkey; - values[pos] = curval; - } - - /** - * Execute a "Heapify Downwards" aka "SiftDown". Used in deletions. - * - * @param ipos re-insertion position - * @param curkey Current key - * @param curval Current value - * @return true when the order was changed - */ - protected boolean heapifyDown(final int ipos, double curkey, Object curval) { - int pos = ipos; - final int half = size >>> 1; - while(pos < half) { - // Get left child (must exist!) - int cpos = (pos << 1) + 1; - double chikey = keys[cpos]; - Object chival = values[cpos]; - // Test right child, if present - final int rchild = cpos + 1; - if(rchild < size) { - double right = keys[rchild]; - if(chikey < right) { // Compare - cpos = rchild; - chikey = right; - chival = values[rchild]; - } - } - - if(curkey >= chikey) { // Compare - break; - } - keys[pos] = chikey; - values[pos] = chival; - pos = cpos; - } - keys[pos] = curkey; - values[pos] = curval; - return (pos == ipos); - } - - /** - * Query the size - * - * @return Size - */ - public int size() { - return this.size; - } - - /** - * Test whether we need to resize to have the requested capacity. - * - * @param requiredSize required capacity - */ - protected final void resize(int requiredSize) { - // Double until 64, then increase by 50% each time. - int newCapacity = ((keys.length < 64) ? ((keys.length + 1) << 1) : ((keys.length >> 1) * 3)); - // overflow? - if(newCapacity < 0) { - throw new OutOfMemoryError(); - } - if(requiredSize > newCapacity) { - newCapacity = requiredSize; - } - keys = Arrays.copyOf(keys, newCapacity); - values = Arrays.copyOf(values, newCapacity); - } - - /** - * Delete all elements from the heap. - */ - public void clear() { - // clean up references in the array for memory management - Arrays.fill(values, null); - this.size = 0; - this.validSize = -1; - modCount++; - } - - /** - * Test whether the heap is still valid. - * - * Debug method. - * - * @return {@code null} when the heap is correct - */ - protected String checkHeap() { - ensureValid(); - for(int i = 1; i < size; i++) { - final int parent = (i - 1) >>> 1; - if(keys[parent] < keys[i]) { // Compare - return "@" + parent + ": " + keys[parent] + " < @" + i + ": " + keys[i]; - } - } - return null; - } -} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjMinHeap.java deleted file mode 100644 index 244277e8..00000000 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjMinHeap.java +++ /dev/null @@ -1,328 +0,0 @@ -package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; - -/* - This file is part of ELKI: - Environment for Developing KDD-Applications Supported by Index-Structures - - Copyright (C) 2012 - Ludwig-Maximilians-Universität München - Lehr- und Forschungseinheit für Datenbanksysteme - ELKI Development Team - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -import java.util.Arrays; -import java.util.Comparator; - -import de.lmu.ifi.dbs.elki.math.MathUtil; - -/** - * Basic in-memory heap structure. - * - * This heap is built lazily: if you first add many elements, then poll the - * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log - * n). This is implemented via a simple validTo counter. - * - * @author Erich Schubert - * - * @param <V> value type - */ -public class DoubleObjMinHeap<V> { - /** - * Heap storage: keys - */ - protected double[] keys; - - /** - * Heap storage: values - */ - protected Object[] values; - - /** - * Current number of objects - */ - protected int size = 0; - - /** - * Indicate up to where the heap is valid - */ - protected int validSize = 0; - - /** - * (Structural) modification counter. Used to invalidate iterators. - */ - public transient int modCount = 0; - - /** - * Default initial capacity - */ - private static final int DEFAULT_INITIAL_CAPACITY = 11; - - /** - * Default constructor: default capacity, natural ordering. - */ - public DoubleObjMinHeap() { - this(DEFAULT_INITIAL_CAPACITY); - } - - /** - * Constructor with initial capacity and {@link Comparator}. - * - * @param size initial capacity - */ - public DoubleObjMinHeap(int size) { - super(); - this.size = 0; - this.keys = new double[size]; - this.values = new Object[size]; - } - - /** - * Add a key-value pair to the heap - * - * @param key Key - * @param val Value - * @return Success code - */ - public boolean add(double key, V val) { - // resize when needed - if(size + 1 > keys.length) { - resize(size + 1); - } - // final int pos = size; - this.keys[size] = key; - this.values[size] = val; - this.size += 1; - heapifyUp(size - 1, key, val); - validSize += 1; - // We have changed - return true according to {@link Collection#put} - modCount++; - return true; - } - - /** - * Get the current top key - * - * @return Top key - */ - public double peekKey() { - if(size == 0) { - throw new ArrayIndexOutOfBoundsException("Peek() on an empty heap!"); - } - ensureValid(); - return keys[0]; - } - - /** - * Get the current top value - * - * @return Value - */ - @SuppressWarnings("unchecked") - public V peekValue() { - if(size == 0) { - throw new ArrayIndexOutOfBoundsException("Peek() on an empty heap!"); - } - ensureValid(); - return (V) values[0]; - } - - /** - * Remove the first element - */ - public void poll() { - removeAt(0); - } - - /** - * Repair the heap - */ - protected void ensureValid() { - if(validSize != size) { - if(size > 1) { - // Parent of first invalid - int nextmin = validSize > 0 ? ((validSize - 1) >>> 1) : 0; - int curmin = MathUtil.nextAllOnesInt(nextmin); // Next line - int nextmax = curmin - 1; // End of valid line - int pos = (size - 2) >>> 1; // Parent of last element - // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin+", "+nextmin); - while(pos >= nextmin) { - // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin); - while(pos >= curmin) { - if(!heapifyDown(pos, keys[pos], values[pos])) { - final int parent = (pos - 1) >>> 1; - if(parent < curmin) { - nextmin = Math.min(nextmin, parent); - nextmax = Math.max(nextmax, parent); - } - } - pos--; - } - curmin = nextmin; - pos = Math.min(pos, nextmax); - nextmax = -1; - } - } - validSize = size; - } - } - - /** - * Remove the element at the given position. - * - * @param pos Element position. - */ - protected void removeAt(int pos) { - if(pos < 0 || pos >= size) { - return; - } - // Replacement object: - final double reinkey = keys[size - 1]; - final Object reinval = values[size - 1]; - values[size - 1] = null; - // Keep heap in sync - if(validSize == size) { - size -= 1; - validSize -= 1; - heapifyDown(pos, reinkey, reinval); - } - else { - size -= 1; - validSize = Math.min(pos >>> 1, validSize); - keys[pos] = reinkey; - values[pos] = reinval; - } - modCount++; - } - - /** - * Execute a "Heapify Upwards" aka "SiftUp". Used in insertions. - * - * @param pos insertion position - * @param curkey Current key - * @param curval Current value - */ - protected void heapifyUp(int pos, double curkey, Object curval) { - while(pos > 0) { - final int parent = (pos - 1) >>> 1; - double parkey = keys[parent]; - - if(curkey >= parkey) { // Compare - break; - } - keys[pos] = parkey; - values[pos] = values[parent]; - pos = parent; - } - keys[pos] = curkey; - values[pos] = curval; - } - - /** - * Execute a "Heapify Downwards" aka "SiftDown". Used in deletions. - * - * @param ipos re-insertion position - * @param curkey Current key - * @param curval Current value - * @return true when the order was changed - */ - protected boolean heapifyDown(final int ipos, double curkey, Object curval) { - int pos = ipos; - final int half = size >>> 1; - while(pos < half) { - // Get left child (must exist!) - int cpos = (pos << 1) + 1; - double chikey = keys[cpos]; - Object chival = values[cpos]; - // Test right child, if present - final int rchild = cpos + 1; - if(rchild < size) { - double right = keys[rchild]; - if(chikey > right) { // Compare - cpos = rchild; - chikey = right; - chival = values[rchild]; - } - } - - if(curkey <= chikey) { // Compare - break; - } - keys[pos] = chikey; - values[pos] = chival; - pos = cpos; - } - keys[pos] = curkey; - values[pos] = curval; - return (pos == ipos); - } - - /** - * Query the size - * - * @return Size - */ - public int size() { - return this.size; - } - - /** - * Test whether we need to resize to have the requested capacity. - * - * @param requiredSize required capacity - */ - protected final void resize(int requiredSize) { - // Double until 64, then increase by 50% each time. - int newCapacity = ((keys.length < 64) ? ((keys.length + 1) << 1) : ((keys.length >> 1) * 3)); - // overflow? - if(newCapacity < 0) { - throw new OutOfMemoryError(); - } - if(requiredSize > newCapacity) { - newCapacity = requiredSize; - } - keys = Arrays.copyOf(keys, newCapacity); - values = Arrays.copyOf(values, newCapacity); - } - - /** - * Delete all elements from the heap. - */ - public void clear() { - // clean up references in the array for memory management - Arrays.fill(values, null); - this.size = 0; - this.validSize = -1; - modCount++; - } - - /** - * Test whether the heap is still valid. - * - * Debug method. - * - * @return {@code null} when the heap is correct - */ - protected String checkHeap() { - ensureValid(); - for(int i = 1; i < size; i++) { - final int parent = (i - 1) >>> 1; - if(keys[parent] > keys[i]) { // Compare - return "@" + parent + ": " + keys[parent] + " < @" + i + ": " + keys[i]; - } - } - return null; - } -} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectHeap.java new file mode 100644 index 00000000..db65ce81 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectHeap.java @@ -0,0 +1,129 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.utilities.datastructures.iterator.Iter; + +/** + * Basic in-memory heap interface, for double keys and V values. + * + * @author Erich Schubert + * + * @apiviz.has UnsortedIter + * @param <V> Value type + */ +public interface DoubleObjectHeap<V> { + /** + * Add a key-value pair to the heap + * + * @param key Key + * @param val Value + */ + void add(double key, V val); + + /** + * Add a key-value pair to the heap if it improves the top. + * + * @param key Key + * @param val Value + * @param k Desired maximum size + */ + void add(double key, V val, int k); + + /** + * Combined operation that removes the top element, and inserts a new element + * instead. + * + * @param key Key of new element + * @param val Value of new element + */ + void replaceTopElement(double key, V val); + + /** + * Get the current top key + * + * @return Top key + */ + double peekKey(); + + /** + * Get the current top value + * + * @return Value + */ + V peekValue(); + + /** + * Remove the first element + */ + void poll(); + + /** + * Clear the heap contents. + */ + void clear(); + + /** + * Query the size + * + * @return Size + */ + public int size(); + + /** + * Is the heap empty? + * + * @return {@code true} when the size is 0. + */ + public boolean isEmpty(); + + /** + * Get an unsorted iterator to inspect the heap. + * + * @return Iterator + */ + UnsortedIter<V> unsortedIter(); + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * @author Erich Schubert + * @param <V> Value type + */ + public static interface UnsortedIter<V> extends Iter { + /** + * Get the current key + * + * @return Current key + */ + double getKey(); + + /** + * Get the current value + * + * @return Current value + */ + V getValue(); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectMaxHeap.java new file mode 100644 index 00000000..dd89573c --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectMaxHeap.java @@ -0,0 +1,482 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Arrays; +import java.util.ConcurrentModificationException; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + +/** + * Advanced priority queue class, based on a binary heap (for small sizes), + * which will for larger heaps be accompanied by a 4-ary heap (attached below + * the root of the two-ary heap, making the root actually 3-ary). + * + * This code was automatically instantiated for the types: Double and Object + * + * This combination was found to work quite well in benchmarks, but YMMV. + * + * Some other observations from benchmarking: + * <ul> + * <li>Bulk loading did not improve things</li> + * <li>Primitive heaps are substantially faster.</li> + * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and + * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> + * <li>Workload makes a huge difference. A load-once, poll-until-empty priority + * queue is something different than e.g. a top-k heap, which will see a lot of + * top element replacements.</li> + * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for + * top-k make a difference.</li> + * <li>Different day, different benchmark results ...</li> + * </ul> + * + * @author Erich Schubert + * + * @apiviz.has UnsortedIter + * @param <V> Value type + */ +public class DoubleObjectMaxHeap<V> implements DoubleObjectHeap<V> { + /** + * Base heap. + */ + protected double[] twoheap; + + /** + * Base heap values. + */ + protected Object[] twovals; + + /** + * Extension heap. + */ + protected double[] fourheap; + + /** + * Extension heapvalues. + */ + protected Object[] fourvals; + + /** + * Current size of heap. + */ + protected int size; + + /** + * (Structural) modification counter. Used to invalidate iterators. + */ + protected int modCount = 0; + + /** + * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. + */ + private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; + + /** + * Initial size of the 2-ary heap. + */ + private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; + + /** + * Initial size of 4-ary heap when initialized. + * + * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 + * + * 85 = 4-ary heap of height 3: 21 + 4*4*4 + * + * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 + * + * Since we last grew by 255 (to 511), let's use 341. + */ + private final static int FOUR_HEAP_INITIAL_SIZE = 341; + + /** + * Constructor, with default size. + */ + public DoubleObjectMaxHeap() { + super(); + double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE]; + Object[] twovals = new Object[TWO_HEAP_INITIAL_SIZE]; + + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = null; + this.fourvals = null; + this.size = 0; + this.modCount = 0; + } + + /** + * Constructor, with given minimum size. + * + * @param minsize Minimum size + */ + public DoubleObjectMaxHeap(int minsize) { + super(); + if (minsize < TWO_HEAP_MAX_SIZE) { + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + double[] twoheap = new double[size]; + Object[] twovals = new Object[size]; + + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = null; + this.fourvals = null; + } else { + double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE]; + Object[] twovals = new Object[TWO_HEAP_INITIAL_SIZE]; + double[] fourheap = new double[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + Object[] fourvals = new Object[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = fourheap; + this.fourvals = fourvals; + } + this.size = 0; + this.modCount = 0; + } + + @Override + public void clear() { + size = 0; + ++modCount; + fourheap = null; + fourvals = null; + Arrays.fill(twoheap, 0.0); + Arrays.fill(twovals, null); + } + + @Override + public int size() { + return size; + } + + @Override + public boolean isEmpty() { + return (size == 0); + } + + @Override + public void add(double o, V v) { + final double co = o; + final Object cv = (Object)v; + // System.err.println("Add: " + o); + if (size < TWO_HEAP_MAX_SIZE) { + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); + twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1); + } + final int twopos = size; + twoheap[twopos] = co; + twovals[twopos] = cv; + ++size; + heapifyUp2(twopos, co, cv); + ++modCount; + } else { + final int fourpos = size - TWO_HEAP_MAX_SIZE; + if (fourheap == null) { + fourheap = new double[FOUR_HEAP_INITIAL_SIZE]; + fourvals = new Object[FOUR_HEAP_INITIAL_SIZE]; + } else if (fourpos >= fourheap.length) { + // Grow extension heap by half. + fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); + fourvals = Arrays.copyOf(fourvals, fourvals.length + (fourvals.length >> 1)); + } + fourheap[fourpos] = co; + fourvals[fourpos] = cv; + ++size; + heapifyUp4(fourpos, co, cv); + ++modCount; + } + } + + @Override + public void add(double key, V val, int max) { + if (size < max) { + add(key, val); + } else if (twoheap[0] >= key) { + replaceTopElement(key, val); + } + } + + @Override + public void replaceTopElement(double reinsert, V val) { + heapifyDown(reinsert, (Object)val); + ++modCount; + } + + /** + * Heapify-Up method for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + * @param val Current value + */ + private void heapifyUp2(int twopos, double cur, Object val) { + while (twopos > 0) { + final int parent = (twopos - 1) >>> 1; + double par = twoheap[parent]; + if (cur <= par) { + break; + } + twoheap[twopos] = par; + twovals[twopos] = twovals[parent]; + twopos = parent; + } + twoheap[twopos] = cur; + twovals[twopos] = val; + } + + /** + * Heapify-Up method for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + * @param val Current value + */ + private void heapifyUp4(int fourpos, double cur, Object val) { + while (fourpos > 0) { + final int parent = (fourpos - 1) >> 2; + double par = fourheap[parent]; + if (cur <= par) { + break; + } + fourheap[fourpos] = par; + fourvals[fourpos] = fourvals[parent]; + fourpos = parent; + } + if (fourpos == 0 && twoheap[0] < cur) { + fourheap[0] = twoheap[0]; + fourvals[0] = twovals[0]; + twoheap[0] = cur; + twovals[0] = val; + } else { + fourheap[fourpos] = cur; + fourvals[fourpos] = val; + } + } + + @Override + public void poll() { + --size; + // Replacement object: + if (size >= TWO_HEAP_MAX_SIZE) { + final int last = size - TWO_HEAP_MAX_SIZE; + final double reinsert = fourheap[last]; + final Object reinsertv = fourvals[last]; + fourheap[last] = 0.0; + fourvals[last] = null; + heapifyDown(reinsert, reinsertv); + } else if (size > 0) { + final double reinsert = twoheap[size]; + final Object reinsertv = twovals[size]; + twoheap[size] = 0.0; + twovals[size] = null; + heapifyDown(reinsert, reinsertv); + } else { + twoheap[0] = 0.0; + twovals[0] = null; + } + ++modCount; + } + + /** + * Invoke heapify-down for the root object. + * + * @param reinsert Object to insert. + * @param val Value to reinsert. + */ + private void heapifyDown(double reinsert, Object val) { + if (size > TWO_HEAP_MAX_SIZE) { + // Special case: 3-ary situation. + final int best = (twoheap[1] >= twoheap[2]) ? 1 : 2; + if (fourheap[0] > twoheap[best]) { + twoheap[0] = fourheap[0]; + twovals[0] = fourvals[0]; + heapifyDown4(0, reinsert, val); + } else { + twoheap[0] = twoheap[best]; + twovals[0] = twovals[best]; + heapifyDown2(best, reinsert, val); + } + return; + } + heapifyDown2(0, reinsert, val); + } + + /** + * Heapify-Down for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + * @param val Value to reinsert. + */ + private void heapifyDown2(int twopos, double cur, Object val) { + final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + while (twopos < stop) { + int bestchild = (twopos << 1) + 1; + double best = twoheap[bestchild]; + final int right = bestchild + 1; + if (right < size && best < twoheap[right]) { + bestchild = right; + best = twoheap[right]; + } + if (cur >= best) { + break; + } + twoheap[twopos] = best; + twovals[twopos] = twovals[bestchild]; + twopos = bestchild; + } + twoheap[twopos] = cur; + twovals[twopos] = val; + } + + /** + * Heapify-Down for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + * @param val Value to reinsert. + */ + private void heapifyDown4(int fourpos, double cur, Object val) { + final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; + while (fourpos < stop) { + final int child = (fourpos << 2) + 1; + double best = fourheap[child]; + int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; + if (size > minsize) { + double nextchild = fourheap[candidate]; + if (best < nextchild) { + bestchild = candidate; + best = nextchild; + } + + minsize += 2; + if (size >= minsize) { + nextchild = fourheap[++candidate]; + if (best < nextchild) { + bestchild = candidate; + best = nextchild; + } + + if (size > minsize) { + nextchild = fourheap[++candidate]; + if (best < nextchild) { + bestchild = candidate; + best = nextchild; + } + } + } + } + if (cur >= best) { + break; + } + fourheap[fourpos] = best; + fourvals[fourpos] = fourvals[bestchild]; + fourpos = bestchild; + } + fourheap[fourpos] = cur; + fourvals[fourpos] = val; + } + + @Override + public double peekKey() { + return twoheap[0]; + } + + @Override + @SuppressWarnings("unchecked") + public V peekValue() { + return (V)twovals[0]; + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append(DoubleObjectMaxHeap.class.getSimpleName()).append(" ["); + for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) { + buf.append(iter.getKey()).append(':').append(iter.getValue()).append(','); + } + buf.append(']'); + return buf.toString(); + } + + @Override + public UnsortedIter unsortedIter() { + return new UnsortedIter(); + } + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * Use this class as follows: + * + * <pre> + * {@code + * for (DoubleObjectHeap.UnsortedIter<V> iter = heap.unsortedIter(); iter.valid(); iter.next()) { + * doSomething(iter.get()); + * } + * } + * </pre> + * + * @author Erich Schubert + */ + private class UnsortedIter implements DoubleObjectHeap.UnsortedIter<V> { + /** + * Iterator position. + */ + protected int pos = 0; + + /** + * Modification counter we were initialized at. + */ + protected final int myModCount = modCount; + + @Override + public boolean valid() { + if (modCount != myModCount) { + throw new ConcurrentModificationException(); + } + return pos < size; + } + + @Override + public void advance() { + pos++; + } + + @Override + public double getKey() { + return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + } + + @SuppressWarnings("unchecked") + + @Override + public V getValue() { + return (V)((pos < TWO_HEAP_MAX_SIZE) ? twovals[pos] : fourvals[pos - TWO_HEAP_MAX_SIZE]); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectMinHeap.java new file mode 100644 index 00000000..905cdedb --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectMinHeap.java @@ -0,0 +1,482 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Arrays; +import java.util.ConcurrentModificationException; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + +/** + * Advanced priority queue class, based on a binary heap (for small sizes), + * which will for larger heaps be accompanied by a 4-ary heap (attached below + * the root of the two-ary heap, making the root actually 3-ary). + * + * This code was automatically instantiated for the types: Double and Object + * + * This combination was found to work quite well in benchmarks, but YMMV. + * + * Some other observations from benchmarking: + * <ul> + * <li>Bulk loading did not improve things</li> + * <li>Primitive heaps are substantially faster.</li> + * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and + * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> + * <li>Workload makes a huge difference. A load-once, poll-until-empty priority + * queue is something different than e.g. a top-k heap, which will see a lot of + * top element replacements.</li> + * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for + * top-k make a difference.</li> + * <li>Different day, different benchmark results ...</li> + * </ul> + * + * @author Erich Schubert + * + * @apiviz.has UnsortedIter + * @param <V> Value type + */ +public class DoubleObjectMinHeap<V> implements DoubleObjectHeap<V> { + /** + * Base heap. + */ + protected double[] twoheap; + + /** + * Base heap values. + */ + protected Object[] twovals; + + /** + * Extension heap. + */ + protected double[] fourheap; + + /** + * Extension heapvalues. + */ + protected Object[] fourvals; + + /** + * Current size of heap. + */ + protected int size; + + /** + * (Structural) modification counter. Used to invalidate iterators. + */ + protected int modCount = 0; + + /** + * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. + */ + private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; + + /** + * Initial size of the 2-ary heap. + */ + private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; + + /** + * Initial size of 4-ary heap when initialized. + * + * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 + * + * 85 = 4-ary heap of height 3: 21 + 4*4*4 + * + * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 + * + * Since we last grew by 255 (to 511), let's use 341. + */ + private final static int FOUR_HEAP_INITIAL_SIZE = 341; + + /** + * Constructor, with default size. + */ + public DoubleObjectMinHeap() { + super(); + double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE]; + Object[] twovals = new Object[TWO_HEAP_INITIAL_SIZE]; + + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = null; + this.fourvals = null; + this.size = 0; + this.modCount = 0; + } + + /** + * Constructor, with given minimum size. + * + * @param minsize Minimum size + */ + public DoubleObjectMinHeap(int minsize) { + super(); + if (minsize < TWO_HEAP_MAX_SIZE) { + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + double[] twoheap = new double[size]; + Object[] twovals = new Object[size]; + + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = null; + this.fourvals = null; + } else { + double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE]; + Object[] twovals = new Object[TWO_HEAP_INITIAL_SIZE]; + double[] fourheap = new double[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + Object[] fourvals = new Object[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = fourheap; + this.fourvals = fourvals; + } + this.size = 0; + this.modCount = 0; + } + + @Override + public void clear() { + size = 0; + ++modCount; + fourheap = null; + fourvals = null; + Arrays.fill(twoheap, 0.0); + Arrays.fill(twovals, null); + } + + @Override + public int size() { + return size; + } + + @Override + public boolean isEmpty() { + return (size == 0); + } + + @Override + public void add(double o, V v) { + final double co = o; + final Object cv = (Object)v; + // System.err.println("Add: " + o); + if (size < TWO_HEAP_MAX_SIZE) { + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); + twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1); + } + final int twopos = size; + twoheap[twopos] = co; + twovals[twopos] = cv; + ++size; + heapifyUp2(twopos, co, cv); + ++modCount; + } else { + final int fourpos = size - TWO_HEAP_MAX_SIZE; + if (fourheap == null) { + fourheap = new double[FOUR_HEAP_INITIAL_SIZE]; + fourvals = new Object[FOUR_HEAP_INITIAL_SIZE]; + } else if (fourpos >= fourheap.length) { + // Grow extension heap by half. + fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); + fourvals = Arrays.copyOf(fourvals, fourvals.length + (fourvals.length >> 1)); + } + fourheap[fourpos] = co; + fourvals[fourpos] = cv; + ++size; + heapifyUp4(fourpos, co, cv); + ++modCount; + } + } + + @Override + public void add(double key, V val, int max) { + if (size < max) { + add(key, val); + } else if (twoheap[0] <= key) { + replaceTopElement(key, val); + } + } + + @Override + public void replaceTopElement(double reinsert, V val) { + heapifyDown(reinsert, (Object)val); + ++modCount; + } + + /** + * Heapify-Up method for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + * @param val Current value + */ + private void heapifyUp2(int twopos, double cur, Object val) { + while (twopos > 0) { + final int parent = (twopos - 1) >>> 1; + double par = twoheap[parent]; + if (cur >= par) { + break; + } + twoheap[twopos] = par; + twovals[twopos] = twovals[parent]; + twopos = parent; + } + twoheap[twopos] = cur; + twovals[twopos] = val; + } + + /** + * Heapify-Up method for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + * @param val Current value + */ + private void heapifyUp4(int fourpos, double cur, Object val) { + while (fourpos > 0) { + final int parent = (fourpos - 1) >> 2; + double par = fourheap[parent]; + if (cur >= par) { + break; + } + fourheap[fourpos] = par; + fourvals[fourpos] = fourvals[parent]; + fourpos = parent; + } + if (fourpos == 0 && twoheap[0] > cur) { + fourheap[0] = twoheap[0]; + fourvals[0] = twovals[0]; + twoheap[0] = cur; + twovals[0] = val; + } else { + fourheap[fourpos] = cur; + fourvals[fourpos] = val; + } + } + + @Override + public void poll() { + --size; + // Replacement object: + if (size >= TWO_HEAP_MAX_SIZE) { + final int last = size - TWO_HEAP_MAX_SIZE; + final double reinsert = fourheap[last]; + final Object reinsertv = fourvals[last]; + fourheap[last] = 0.0; + fourvals[last] = null; + heapifyDown(reinsert, reinsertv); + } else if (size > 0) { + final double reinsert = twoheap[size]; + final Object reinsertv = twovals[size]; + twoheap[size] = 0.0; + twovals[size] = null; + heapifyDown(reinsert, reinsertv); + } else { + twoheap[0] = 0.0; + twovals[0] = null; + } + ++modCount; + } + + /** + * Invoke heapify-down for the root object. + * + * @param reinsert Object to insert. + * @param val Value to reinsert. + */ + private void heapifyDown(double reinsert, Object val) { + if (size > TWO_HEAP_MAX_SIZE) { + // Special case: 3-ary situation. + final int best = (twoheap[1] <= twoheap[2]) ? 1 : 2; + if (fourheap[0] < twoheap[best]) { + twoheap[0] = fourheap[0]; + twovals[0] = fourvals[0]; + heapifyDown4(0, reinsert, val); + } else { + twoheap[0] = twoheap[best]; + twovals[0] = twovals[best]; + heapifyDown2(best, reinsert, val); + } + return; + } + heapifyDown2(0, reinsert, val); + } + + /** + * Heapify-Down for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + * @param val Value to reinsert. + */ + private void heapifyDown2(int twopos, double cur, Object val) { + final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + while (twopos < stop) { + int bestchild = (twopos << 1) + 1; + double best = twoheap[bestchild]; + final int right = bestchild + 1; + if (right < size && best > twoheap[right]) { + bestchild = right; + best = twoheap[right]; + } + if (cur <= best) { + break; + } + twoheap[twopos] = best; + twovals[twopos] = twovals[bestchild]; + twopos = bestchild; + } + twoheap[twopos] = cur; + twovals[twopos] = val; + } + + /** + * Heapify-Down for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + * @param val Value to reinsert. + */ + private void heapifyDown4(int fourpos, double cur, Object val) { + final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; + while (fourpos < stop) { + final int child = (fourpos << 2) + 1; + double best = fourheap[child]; + int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; + if (size > minsize) { + double nextchild = fourheap[candidate]; + if (best > nextchild) { + bestchild = candidate; + best = nextchild; + } + + minsize += 2; + if (size >= minsize) { + nextchild = fourheap[++candidate]; + if (best > nextchild) { + bestchild = candidate; + best = nextchild; + } + + if (size > minsize) { + nextchild = fourheap[++candidate]; + if (best > nextchild) { + bestchild = candidate; + best = nextchild; + } + } + } + } + if (cur <= best) { + break; + } + fourheap[fourpos] = best; + fourvals[fourpos] = fourvals[bestchild]; + fourpos = bestchild; + } + fourheap[fourpos] = cur; + fourvals[fourpos] = val; + } + + @Override + public double peekKey() { + return twoheap[0]; + } + + @Override + @SuppressWarnings("unchecked") + public V peekValue() { + return (V)twovals[0]; + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append(DoubleObjectMinHeap.class.getSimpleName()).append(" ["); + for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) { + buf.append(iter.getKey()).append(':').append(iter.getValue()).append(','); + } + buf.append(']'); + return buf.toString(); + } + + @Override + public UnsortedIter unsortedIter() { + return new UnsortedIter(); + } + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * Use this class as follows: + * + * <pre> + * {@code + * for (DoubleObjectHeap.UnsortedIter<V> iter = heap.unsortedIter(); iter.valid(); iter.next()) { + * doSomething(iter.get()); + * } + * } + * </pre> + * + * @author Erich Schubert + */ + private class UnsortedIter implements DoubleObjectHeap.UnsortedIter<V> { + /** + * Iterator position. + */ + protected int pos = 0; + + /** + * Modification counter we were initialized at. + */ + protected final int myModCount = modCount; + + @Override + public boolean valid() { + if (modCount != myModCount) { + throw new ConcurrentModificationException(); + } + return pos < size; + } + + @Override + public void advance() { + pos++; + } + + @Override + public double getKey() { + return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + } + + @SuppressWarnings("unchecked") + + @Override + public V getValue() { + return (V)((pos < TWO_HEAP_MAX_SIZE) ? twovals[pos] : fourvals[pos - TWO_HEAP_MAX_SIZE]); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoublePriorityObject.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoublePriorityObject.java index 92d548cb..82453885 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoublePriorityObject.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoublePriorityObject.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/Heap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/Heap.java index 86d3ae08..2c278110 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/Heap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/Heap.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -25,11 +25,6 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; import java.util.Arrays; import java.util.Comparator; -import java.util.ConcurrentModificationException; -import java.util.Iterator; -import java.util.NoSuchElementException; - -import de.lmu.ifi.dbs.elki.math.MathUtil; /** * Basic in-memory heap structure. Closely related to a @@ -45,11 +40,11 @@ import de.lmu.ifi.dbs.elki.math.MathUtil; * @param <E> Element type. Should be {@link java.lang.Comparable} or a * {@link java.util.Comparator} needs to be given. */ -public class Heap<E> implements Iterable<E> { +public class Heap<E> { /** * Heap storage. */ - protected transient Object[] queue; + protected Object[] queue; /** * Current number of objects. @@ -57,11 +52,6 @@ public class Heap<E> implements Iterable<E> { protected int size = 0; /** - * Indicate up to where the heap is valid. - */ - protected int validSize = 0; - - /** * The comparator or {@code null}. */ protected final Comparator<Object> comparator; @@ -69,7 +59,7 @@ public class Heap<E> implements Iterable<E> { /** * (Structural) modification counter. Used to invalidate iterators. */ - private transient int modCount = 0; + private int modCount = 0; /** * Default initial capacity. @@ -126,10 +116,8 @@ public class Heap<E> implements Iterable<E> { resize(size + 1); } // final int pos = size; - this.queue[size] = e; this.size += 1; heapifyUp(size - 1, e); - validSize += 1; heapModified(); } @@ -142,7 +130,6 @@ public class Heap<E> implements Iterable<E> { */ @SuppressWarnings("unchecked") public E replaceTopElement(E e) { - ensureValid(); E oldroot = (E) queue[0]; heapifyDown(0, e); heapModified(); @@ -159,7 +146,6 @@ public class Heap<E> implements Iterable<E> { if (size == 0) { return null; } - ensureValid(); return (E) queue[0]; } @@ -169,70 +155,10 @@ public class Heap<E> implements Iterable<E> { * @return Top element. */ public E poll() { - ensureValid(); return removeAt(0); } /** - * Perform pending heap repair operations in a single bulk operation. - */ - protected void ensureValid() { - if (validSize != size) { - if (size > 1) { - // Bottom up heap update. - if (comparator != null) { - // Parent of first invalid - int nextmin = validSize > 0 ? ((validSize - 1) >>> 1) : 0; - int curmin = MathUtil.nextAllOnesInt(nextmin); // Next line - int nextmax = curmin - 1; // End of valid line - int pos = (size - 2) >>> 1; // Parent of last element - // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin+", "+nextmin); - while (pos >= nextmin) { - // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin); - while (pos >= curmin) { - if (!heapifyDownComparator(pos, queue[pos])) { - final int parent = (pos - 1) >>> 1; - if (parent < curmin) { - nextmin = Math.min(nextmin, parent); - nextmax = Math.max(nextmax, parent); - } - } - pos--; - } - curmin = nextmin; - pos = Math.min(pos, nextmax); - nextmax = -1; - } - } else { - // Parent of first invalid - int nextmin = validSize > 0 ? ((validSize - 1) >>> 1) : 0; - int curmin = MathUtil.nextAllOnesInt(nextmin); // Next line - int nextmax = curmin - 1; // End of valid line - int pos = (size - 2) >>> 1; // Parent of last element - // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin+", "+nextmin); - while (pos >= nextmin) { - // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin); - while (pos >= curmin) { - if (!heapifyDownComparable(pos, queue[pos])) { - final int parent = (pos - 1) >>> 1; - if (parent < curmin) { - nextmin = Math.min(nextmin, parent); - nextmax = Math.max(nextmax, parent); - } - } - pos--; - } - curmin = nextmin; - pos = Math.min(pos, nextmax); - nextmax = -1; - } - } - } - validSize = size; - } - } - - /** * Remove the element at the given position. * * @param pos Element position. @@ -247,16 +173,8 @@ public class Heap<E> implements Iterable<E> { // Replacement object: final Object reinsert = queue[size - 1]; queue[size - 1] = null; - // Keep heap in sync - if (validSize == size) { - size -= 1; - validSize -= 1; - heapifyDown(pos, reinsert); - } else { - size -= 1; - validSize = Math.min(pos >>> 1, validSize); - queue[pos] = reinsert; - } + size--; + heapifyDown(pos, reinsert); heapModified(); return ret; } @@ -367,7 +285,7 @@ public class Heap<E> implements Iterable<E> { pos = cpos; } queue[pos] = cur; - return (pos == ipos); + return (pos != ipos); } /** @@ -405,7 +323,7 @@ public class Heap<E> implements Iterable<E> { pos = min; } queue[pos] = cur; - return (pos == ipos); + return (pos != ipos); } /** @@ -453,15 +371,9 @@ public class Heap<E> implements Iterable<E> { queue[i] = null; } this.size = 0; - this.validSize = -1; heapModified(); } - @Override - public Iterator<E> iterator() { - return new Itr(); - } - /** * Called at the end of each heap modification. */ @@ -470,52 +382,12 @@ public class Heap<E> implements Iterable<E> { } /** - * Iterator over queue elements. No particular order (i.e. heap order!) + * Get an unordered heap iterator. * - * @author Erich Schubert - * - * @apiviz.exclude + * @return Iterator. */ - protected final class Itr implements Iterator<E> { - /** - * Cursor position. - */ - private int cursor = 0; - - /** - * Modification counter this iterator is valid for. - */ - private int expectedModCount = modCount; - - @Override - public boolean hasNext() { - return cursor < size; - } - - @SuppressWarnings("unchecked") - @Override - public E next() { - if (expectedModCount != modCount) { - throw new ConcurrentModificationException(); - } - if (cursor < size) { - return (E) queue[cursor++]; - } - throw new NoSuchElementException(); - } - - @Override - public void remove() { - if (expectedModCount != modCount) { - throw new ConcurrentModificationException(); - } - if (cursor > 0) { - cursor--; - } else { - throw new IllegalStateException(); - } - expectedModCount = modCount; - } + public UnorderedIter unorderedIter() { + return new UnorderedIter(); } /** @@ -526,7 +398,6 @@ public class Heap<E> implements Iterable<E> { * @return {@code null} when the heap is correct */ protected String checkHeap() { - ensureValid(); if (comparator == null) { for (int i = 1; i < size; i++) { final int parent = (i - 1) >>> 1; @@ -546,4 +417,43 @@ public class Heap<E> implements Iterable<E> { } return null; } + + /** + * Heap iterator. + * + * @author Erich Schubert + */ + public class UnorderedIter implements de.lmu.ifi.dbs.elki.utilities.datastructures.iterator.Iter { + /** + * Current iterator position. + */ + int pos = 0; + + /** + * Constructor. + */ + protected UnorderedIter() { + super(); + } + + @Override + public boolean valid() { + return pos < size(); + } + + @Override + public void advance() { + pos++; + } + + /** + * Get the current queue element. + * + * @return Element + */ + @SuppressWarnings("unchecked") + public E get() { + return (E) queue[pos]; + } + } } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerHeap.java index 6203ad96..3235926b 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerHeap.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,53 +23,22 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; along with this program. If not, see <http://www.gnu.org/licenses/>. */ -import java.util.Arrays; - -import de.lmu.ifi.dbs.elki.math.MathUtil; +import de.lmu.ifi.dbs.elki.utilities.datastructures.iterator.Iter; /** - * Basic in-memory heap structure. - * - * This heap is built lazily: if you first add many elements, then poll the - * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log - * n). This is implemented via a simple validTo counter. + * Basic in-memory heap for int values. * * @author Erich Schubert + * + * @apiviz.has UnsortedIter */ -public abstract class IntegerHeap extends AbstractHeap { - /** - * Heap storage: queue - */ - protected transient int[] queue; - - /** - * Constructor with initial capacity. - * - * @param size initial capacity - */ - public IntegerHeap(int size) { - super(); - this.size = 0; - this.queue = new int[size]; - } - +public interface IntegerHeap { /** * Add a key-value pair to the heap * * @param key Key */ - public void add(int key) { - // resize when needed - if (size + 1 > queue.length) { - resize(size + 1); - } - // final int pos = size; - this.queue[size] = key; - this.size += 1; - heapifyUp(size - 1, key); - validSize += 1; - heapModified(); - } + void add(int key); /** * Add a key-value pair to the heap, except if the new element is larger than @@ -78,13 +47,7 @@ public abstract class IntegerHeap extends AbstractHeap { * @param key Key * @param max Maximum size of heap */ - public void add(int key, int max) { - if (size < max) { - add(key); - } else if (comp(key, peek())) { - replaceTopElement(key); - } - } + void add(int key, int max); /** * Combined operation that removes the top element, and inserts a new element @@ -93,172 +56,67 @@ public abstract class IntegerHeap extends AbstractHeap { * @param e New element to insert * @return Previous top element of the heap */ - public int replaceTopElement(int e) { - ensureValid(); - int oldroot = queue[0]; - heapifyDown(0, e); - heapModified(); - return oldroot; - } + int replaceTopElement(int e); /** * Get the current top key * * @return Top key */ - public int peek() { - if (size == 0) { - throw new ArrayIndexOutOfBoundsException("Peek() on an empty heap!"); - } - ensureValid(); - return queue[0]; - } + int peek(); /** * Remove the first element * * @return Top element */ - public int poll() { - return removeAt(0); - } + int poll(); /** - * Repair the heap + * Delete all elements from the heap. */ - protected void ensureValid() { - if (validSize != size) { - if (size > 1) { - // Parent of first invalid - int nextmin = validSize > 0 ? ((validSize - 1) >>> 1) : 0; - int curmin = MathUtil.nextAllOnesInt(nextmin); // Next line - int nextmax = curmin - 1; // End of valid line - int pos = (size - 2) >>> 1; // Parent of last element - // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin+", "+nextmin); - while (pos >= nextmin) { - // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin); - while (pos >= curmin) { - if (!heapifyDown(pos, queue[pos])) { - final int parent = (pos - 1) >>> 1; - if (parent < curmin) { - nextmin = Math.min(nextmin, parent); - nextmax = Math.max(nextmax, parent); - } - } - pos--; - } - curmin = nextmin; - pos = Math.min(pos, nextmax); - nextmax = -1; - } - } - validSize = size; - } - } + void clear(); /** - * Remove the element at the given position. + * Query the size * - * @param pos Element position. - * @return Removed element + * @return Size */ - protected int removeAt(int pos) { - if (pos < 0 || pos >= size) { - return 0; - } - final int top = queue[0]; - // Replacement object: - final int reinkey = queue[size - 1]; - // Keep heap in sync - if (validSize == size) { - size -= 1; - validSize -= 1; - heapifyDown(pos, reinkey); - } else { - size -= 1; - validSize = Math.min(pos >>> 1, validSize); - queue[pos] = reinkey; - } - heapModified(); - return top; - } - + public int size(); + /** - * Execute a "Heapify Upwards" aka "SiftUp". Used in insertions. + * Is the heap empty? * - * @param pos insertion position - * @param curkey Current key + * @return {@code true} when the size is 0. */ - protected void heapifyUp(int pos, int curkey) { - while (pos > 0) { - final int parent = (pos - 1) >>> 1; - int parkey = queue[parent]; - - if (comp(curkey, parkey)) { // Compare - break; - } - queue[pos] = parkey; - pos = parent; - } - queue[pos] = curkey; - } + public boolean isEmpty(); /** - * Execute a "Heapify Downwards" aka "SiftDown". Used in deletions. + * Get an unsorted iterator to inspect the heap. * - * @param ipos re-insertion position - * @param curkey Current key - * @return true when the order was changed + * @return Iterator */ - protected boolean heapifyDown(final int ipos, int curkey) { - int pos = ipos; - final int half = size >>> 1; - while (pos < half) { - // Get left child (must exist!) - int cpos = (pos << 1) + 1; - int chikey = queue[cpos]; - // Test right child, if present - final int rchild = cpos + 1; - if (rchild < size) { - int right = queue[rchild]; - if (comp(chikey, right)) { // Compare - cpos = rchild; - chikey = right; - } - } - - if (comp(chikey, curkey)) { // Compare - break; - } - queue[pos] = chikey; - pos = cpos; - } - queue[pos] = curkey; - return (pos == ipos); - } + UnsortedIter unsortedIter(); /** - * Test whether we need to resize to have the requested capacity. + * Unsorted iterator - in heap order. Does not poll the heap. * - * @param requiredSize required capacity - */ - protected final void resize(int requiredSize) { - queue = Arrays.copyOf(queue, desiredSize(requiredSize, queue.length)); - } - - /** - * Delete all elements from the heap. + * <pre> + * {@code + * for (IntegerHeap.UnsortedIter iter = heap.unsortedIter(); iter.valid(); iter.next()) { + * doSomething(iter.get()); + * } + * } + * </pre> + * + * @author Erich Schubert */ - @Override - public void clear() { - super.clear(); - for (int i = 0; i < size; i++) { - queue[i] = 0; - } + public static interface UnsortedIter extends Iter { + /** + * Get the iterators current object. + * + * @return Current object + */ + int get(); } - - /** - * Compare two objects - */ - abstract protected boolean comp(int o1, int o2); } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMaxHeap.java index 383eb727..60f61d99 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMaxHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMaxHeap.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,40 +23,400 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; along with this program. If not, see <http://www.gnu.org/licenses/>. */ +import java.util.Arrays; +import java.util.ConcurrentModificationException; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + /** - * Basic in-memory heap structure. + * Advanced priority queue class, based on a binary heap (for small sizes), + * which will for larger heaps be accompanied by a 4-ary heap (attached below + * the root of the two-ary heap, making the root actually 3-ary). + * + * This code was automatically instantiated for the type: Integer * - * This heap is built lazily: if you first add many elements, then poll the - * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log - * n). This is implemented via a simple validTo counter. + * This combination was found to work quite well in benchmarks, but YMMV. + * + * Some other observations from benchmarking: + * <ul> + * <li>Bulk loading did not improve things</li> + * <li>Primitive heaps are substantially faster.</li> + * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and + * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> + * <li>Workload makes a huge difference. A load-once, poll-until-empty priority + * queue is something different than e.g. a top-k heap, which will see a lot of + * top element replacements.</li> + * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for + * top-k make a difference.</li> + * <li>Different day, different benchmark results ...</li> + * </ul> * * @author Erich Schubert + * + * @apiviz.has UnsortedIter */ -public class IntegerMaxHeap extends IntegerHeap { +public class IntegerMaxHeap implements IntegerHeap { + /** + * Base heap. + */ + protected int[] twoheap; + + /** + * Extension heap. + */ + protected int[] fourheap; + + /** + * Current size of heap. + */ + protected int size; + + /** + * (Structural) modification counter. Used to invalidate iterators. + */ + protected int modCount = 0; + + /** + * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. + */ + private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; + /** - * Constructor with default capacity. + * Initial size of the 2-ary heap. + */ + private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; + + /** + * Initial size of 4-ary heap when initialized. + * + * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 + * + * 85 = 4-ary heap of height 3: 21 + 4*4*4 + * + * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 + * + * Since we last grew by 255 (to 511), let's use 341. + */ + private final static int FOUR_HEAP_INITIAL_SIZE = 341; + + /** + * Constructor, with default size. */ public IntegerMaxHeap() { - super(DEFAULT_INITIAL_CAPACITY); + super(); + int[] twoheap = new int[TWO_HEAP_INITIAL_SIZE]; + + this.twoheap = twoheap; + this.fourheap = null; + this.size = 0; + this.modCount = 0; } /** - * Constructor with initial capacity. + * Constructor, with given minimum size. * - * @param size initial capacity + * @param minsize Minimum size */ - public IntegerMaxHeap(int size) { - super(size); + public IntegerMaxHeap(int minsize) { + super(); + if (minsize < TWO_HEAP_MAX_SIZE) { + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + int[] twoheap = new int[size]; + + this.twoheap = twoheap; + this.fourheap = null; + } else { + int[] twoheap = new int[TWO_HEAP_INITIAL_SIZE]; + int[] fourheap = new int[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + this.twoheap = twoheap; + this.fourheap = fourheap; + } + this.size = 0; + this.modCount = 0; + } + + @Override + public void clear() { + size = 0; + ++modCount; + fourheap = null; + Arrays.fill(twoheap, 0); + } + + @Override + public int size() { + return size; + } + + @Override + public boolean isEmpty() { + return (size == 0); + } + + @Override + public void add(int o) { + final int co = o; + // System.err.println("Add: " + o); + if (size < TWO_HEAP_MAX_SIZE) { + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); + } + final int twopos = size; + twoheap[twopos] = co; + ++size; + heapifyUp2(twopos, co); + ++modCount; + } else { + final int fourpos = size - TWO_HEAP_MAX_SIZE; + if (fourheap == null) { + fourheap = new int[FOUR_HEAP_INITIAL_SIZE]; + } else if (fourpos >= fourheap.length) { + // Grow extension heap by half. + fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); + } + fourheap[fourpos] = co; + ++size; + heapifyUp4(fourpos, co); + ++modCount; + } + } + + @Override + public void add(int key, int max) { + if (size < max) { + add(key); + } else if (twoheap[0] >= key) { + replaceTopElement(key); + } + } + + @Override + public int replaceTopElement(int reinsert) { + final int ret = twoheap[0]; + heapifyDown( reinsert); + ++modCount; + return ret; + } + + /** + * Heapify-Up method for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + */ + private void heapifyUp2(int twopos, int cur) { + while (twopos > 0) { + final int parent = (twopos - 1) >>> 1; + int par = twoheap[parent]; + if (cur <= par) { + break; + } + twoheap[twopos] = par; + twopos = parent; + } + twoheap[twopos] = cur; + } + + /** + * Heapify-Up method for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + */ + private void heapifyUp4(int fourpos, int cur) { + while (fourpos > 0) { + final int parent = (fourpos - 1) >> 2; + int par = fourheap[parent]; + if (cur <= par) { + break; + } + fourheap[fourpos] = par; + fourpos = parent; + } + if (fourpos == 0 && twoheap[0] < cur) { + fourheap[0] = twoheap[0]; + twoheap[0] = cur; + } else { + fourheap[fourpos] = cur; + } + } + + @Override + public int poll() { + final int ret = twoheap[0]; + --size; + // Replacement object: + if (size >= TWO_HEAP_MAX_SIZE) { + final int last = size - TWO_HEAP_MAX_SIZE; + final int reinsert = fourheap[last]; + fourheap[last] = 0; + heapifyDown(reinsert); + } else if (size > 0) { + final int reinsert = twoheap[size]; + twoheap[size] = 0; + heapifyDown(reinsert); + } else { + twoheap[0] = 0; + } + ++modCount; + return ret; + } + + /** + * Invoke heapify-down for the root object. + * + * @param reinsert Object to insert. + */ + private void heapifyDown(int reinsert) { + if (size > TWO_HEAP_MAX_SIZE) { + // Special case: 3-ary situation. + final int best = (twoheap[1] >= twoheap[2]) ? 1 : 2; + if (fourheap[0] > twoheap[best]) { + twoheap[0] = fourheap[0]; + heapifyDown4(0, reinsert); + } else { + twoheap[0] = twoheap[best]; + heapifyDown2(best, reinsert); + } + return; + } + heapifyDown2(0, reinsert); + } + + /** + * Heapify-Down for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + */ + private void heapifyDown2(int twopos, int cur) { + final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + while (twopos < stop) { + int bestchild = (twopos << 1) + 1; + int best = twoheap[bestchild]; + final int right = bestchild + 1; + if (right < size && best < twoheap[right]) { + bestchild = right; + best = twoheap[right]; + } + if (cur >= best) { + break; + } + twoheap[twopos] = best; + twopos = bestchild; + } + twoheap[twopos] = cur; } /** - * Compare two objects + * Heapify-Down for 4-ary heap. * - * @param o1 First object - * @param o2 Second object + * @param fourpos Position in 4-ary heap. + * @param cur Current object */ + private void heapifyDown4(int fourpos, int cur) { + final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; + while (fourpos < stop) { + final int child = (fourpos << 2) + 1; + int best = fourheap[child]; + int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; + if (size > minsize) { + int nextchild = fourheap[candidate]; + if (best < nextchild) { + bestchild = candidate; + best = nextchild; + } + + minsize += 2; + if (size >= minsize) { + nextchild = fourheap[++candidate]; + if (best < nextchild) { + bestchild = candidate; + best = nextchild; + } + + if (size > minsize) { + nextchild = fourheap[++candidate]; + if (best < nextchild) { + bestchild = candidate; + best = nextchild; + } + } + } + } + if (cur >= best) { + break; + } + fourheap[fourpos] = best; + fourpos = bestchild; + } + fourheap[fourpos] = cur; + } + + @Override + public int peek() { + return twoheap[0]; + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append(IntegerMaxHeap.class.getSimpleName()).append(" ["); + for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) { + buf.append(iter.get()).append(','); + } + buf.append(']'); + return buf.toString(); + } + @Override - protected boolean comp(int o1, int o2) { - return o1 < o2; + public UnsortedIter unsortedIter() { + return new UnsortedIter(); + } + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * Use this class as follows: + * + * <pre> + * {@code + * for (IntegerHeap.UnsortedIter iter = heap.unsortedIter(); iter.valid(); iter.next()) { + * doSomething(iter.get()); + * } + * } + * </pre> + * + * @author Erich Schubert + */ + private class UnsortedIter implements IntegerHeap.UnsortedIter { + /** + * Iterator position. + */ + protected int pos = 0; + + /** + * Modification counter we were initialized at. + */ + protected final int myModCount = modCount; + + @Override + public boolean valid() { + if (modCount != myModCount) { + throw new ConcurrentModificationException(); + } + return pos < size; + } + + @Override + public void advance() { + pos++; + } + + @Override + public int get() { + return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + } } } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMinHeap.java index f81fe275..c352ece4 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMinHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMinHeap.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,40 +23,400 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; along with this program. If not, see <http://www.gnu.org/licenses/>. */ +import java.util.Arrays; +import java.util.ConcurrentModificationException; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + /** - * Basic in-memory heap structure. + * Advanced priority queue class, based on a binary heap (for small sizes), + * which will for larger heaps be accompanied by a 4-ary heap (attached below + * the root of the two-ary heap, making the root actually 3-ary). + * + * This code was automatically instantiated for the type: Integer * - * This heap is built lazily: if you first add many elements, then poll the - * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log - * n). This is implemented via a simple validTo counter. + * This combination was found to work quite well in benchmarks, but YMMV. + * + * Some other observations from benchmarking: + * <ul> + * <li>Bulk loading did not improve things</li> + * <li>Primitive heaps are substantially faster.</li> + * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and + * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> + * <li>Workload makes a huge difference. A load-once, poll-until-empty priority + * queue is something different than e.g. a top-k heap, which will see a lot of + * top element replacements.</li> + * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for + * top-k make a difference.</li> + * <li>Different day, different benchmark results ...</li> + * </ul> * * @author Erich Schubert + * + * @apiviz.has UnsortedIter */ -public class IntegerMinHeap extends IntegerHeap { +public class IntegerMinHeap implements IntegerHeap { + /** + * Base heap. + */ + protected int[] twoheap; + + /** + * Extension heap. + */ + protected int[] fourheap; + + /** + * Current size of heap. + */ + protected int size; + + /** + * (Structural) modification counter. Used to invalidate iterators. + */ + protected int modCount = 0; + + /** + * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. + */ + private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; + /** - * Constructor with default capacity. + * Initial size of the 2-ary heap. + */ + private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; + + /** + * Initial size of 4-ary heap when initialized. + * + * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 + * + * 85 = 4-ary heap of height 3: 21 + 4*4*4 + * + * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 + * + * Since we last grew by 255 (to 511), let's use 341. + */ + private final static int FOUR_HEAP_INITIAL_SIZE = 341; + + /** + * Constructor, with default size. */ public IntegerMinHeap() { - super(DEFAULT_INITIAL_CAPACITY); + super(); + int[] twoheap = new int[TWO_HEAP_INITIAL_SIZE]; + + this.twoheap = twoheap; + this.fourheap = null; + this.size = 0; + this.modCount = 0; } /** - * Constructor with initial capacity. + * Constructor, with given minimum size. * - * @param size initial capacity + * @param minsize Minimum size */ - public IntegerMinHeap(int size) { - super(size); + public IntegerMinHeap(int minsize) { + super(); + if (minsize < TWO_HEAP_MAX_SIZE) { + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + int[] twoheap = new int[size]; + + this.twoheap = twoheap; + this.fourheap = null; + } else { + int[] twoheap = new int[TWO_HEAP_INITIAL_SIZE]; + int[] fourheap = new int[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + this.twoheap = twoheap; + this.fourheap = fourheap; + } + this.size = 0; + this.modCount = 0; + } + + @Override + public void clear() { + size = 0; + ++modCount; + fourheap = null; + Arrays.fill(twoheap, 0); + } + + @Override + public int size() { + return size; + } + + @Override + public boolean isEmpty() { + return (size == 0); + } + + @Override + public void add(int o) { + final int co = o; + // System.err.println("Add: " + o); + if (size < TWO_HEAP_MAX_SIZE) { + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); + } + final int twopos = size; + twoheap[twopos] = co; + ++size; + heapifyUp2(twopos, co); + ++modCount; + } else { + final int fourpos = size - TWO_HEAP_MAX_SIZE; + if (fourheap == null) { + fourheap = new int[FOUR_HEAP_INITIAL_SIZE]; + } else if (fourpos >= fourheap.length) { + // Grow extension heap by half. + fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); + } + fourheap[fourpos] = co; + ++size; + heapifyUp4(fourpos, co); + ++modCount; + } + } + + @Override + public void add(int key, int max) { + if (size < max) { + add(key); + } else if (twoheap[0] <= key) { + replaceTopElement(key); + } + } + + @Override + public int replaceTopElement(int reinsert) { + final int ret = twoheap[0]; + heapifyDown( reinsert); + ++modCount; + return ret; + } + + /** + * Heapify-Up method for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + */ + private void heapifyUp2(int twopos, int cur) { + while (twopos > 0) { + final int parent = (twopos - 1) >>> 1; + int par = twoheap[parent]; + if (cur >= par) { + break; + } + twoheap[twopos] = par; + twopos = parent; + } + twoheap[twopos] = cur; + } + + /** + * Heapify-Up method for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + */ + private void heapifyUp4(int fourpos, int cur) { + while (fourpos > 0) { + final int parent = (fourpos - 1) >> 2; + int par = fourheap[parent]; + if (cur >= par) { + break; + } + fourheap[fourpos] = par; + fourpos = parent; + } + if (fourpos == 0 && twoheap[0] > cur) { + fourheap[0] = twoheap[0]; + twoheap[0] = cur; + } else { + fourheap[fourpos] = cur; + } + } + + @Override + public int poll() { + final int ret = twoheap[0]; + --size; + // Replacement object: + if (size >= TWO_HEAP_MAX_SIZE) { + final int last = size - TWO_HEAP_MAX_SIZE; + final int reinsert = fourheap[last]; + fourheap[last] = 0; + heapifyDown(reinsert); + } else if (size > 0) { + final int reinsert = twoheap[size]; + twoheap[size] = 0; + heapifyDown(reinsert); + } else { + twoheap[0] = 0; + } + ++modCount; + return ret; + } + + /** + * Invoke heapify-down for the root object. + * + * @param reinsert Object to insert. + */ + private void heapifyDown(int reinsert) { + if (size > TWO_HEAP_MAX_SIZE) { + // Special case: 3-ary situation. + final int best = (twoheap[1] <= twoheap[2]) ? 1 : 2; + if (fourheap[0] < twoheap[best]) { + twoheap[0] = fourheap[0]; + heapifyDown4(0, reinsert); + } else { + twoheap[0] = twoheap[best]; + heapifyDown2(best, reinsert); + } + return; + } + heapifyDown2(0, reinsert); + } + + /** + * Heapify-Down for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + */ + private void heapifyDown2(int twopos, int cur) { + final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + while (twopos < stop) { + int bestchild = (twopos << 1) + 1; + int best = twoheap[bestchild]; + final int right = bestchild + 1; + if (right < size && best > twoheap[right]) { + bestchild = right; + best = twoheap[right]; + } + if (cur <= best) { + break; + } + twoheap[twopos] = best; + twopos = bestchild; + } + twoheap[twopos] = cur; } /** - * Compare two objects + * Heapify-Down for 4-ary heap. * - * @param o1 First object - * @param o2 Second object + * @param fourpos Position in 4-ary heap. + * @param cur Current object */ + private void heapifyDown4(int fourpos, int cur) { + final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; + while (fourpos < stop) { + final int child = (fourpos << 2) + 1; + int best = fourheap[child]; + int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; + if (size > minsize) { + int nextchild = fourheap[candidate]; + if (best > nextchild) { + bestchild = candidate; + best = nextchild; + } + + minsize += 2; + if (size >= minsize) { + nextchild = fourheap[++candidate]; + if (best > nextchild) { + bestchild = candidate; + best = nextchild; + } + + if (size > minsize) { + nextchild = fourheap[++candidate]; + if (best > nextchild) { + bestchild = candidate; + best = nextchild; + } + } + } + } + if (cur <= best) { + break; + } + fourheap[fourpos] = best; + fourpos = bestchild; + } + fourheap[fourpos] = cur; + } + + @Override + public int peek() { + return twoheap[0]; + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append(IntegerMinHeap.class.getSimpleName()).append(" ["); + for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) { + buf.append(iter.get()).append(','); + } + buf.append(']'); + return buf.toString(); + } + @Override - protected boolean comp(int o1, int o2) { - return o1 > o2; + public UnsortedIter unsortedIter() { + return new UnsortedIter(); + } + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * Use this class as follows: + * + * <pre> + * {@code + * for (IntegerHeap.UnsortedIter iter = heap.unsortedIter(); iter.valid(); iter.next()) { + * doSomething(iter.get()); + * } + * } + * </pre> + * + * @author Erich Schubert + */ + private class UnsortedIter implements IntegerHeap.UnsortedIter { + /** + * Iterator position. + */ + protected int pos = 0; + + /** + * Modification counter we were initialized at. + */ + protected final int myModCount = modCount; + + @Override + public boolean valid() { + if (modCount != myModCount) { + throw new ConcurrentModificationException(); + } + return pos < size; + } + + @Override + public void advance() { + pos++; + } + + @Override + public int get() { + return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + } } } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectHeap.java new file mode 100644 index 00000000..01f7aea0 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectHeap.java @@ -0,0 +1,129 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.utilities.datastructures.iterator.Iter; + +/** + * Basic in-memory heap interface, for int keys and V values. + * + * @author Erich Schubert + * + * @apiviz.has UnsortedIter + * @param <V> Value type + */ +public interface IntegerObjectHeap<V> { + /** + * Add a key-value pair to the heap + * + * @param key Key + * @param val Value + */ + void add(int key, V val); + + /** + * Add a key-value pair to the heap if it improves the top. + * + * @param key Key + * @param val Value + * @param k Desired maximum size + */ + void add(int key, V val, int k); + + /** + * Combined operation that removes the top element, and inserts a new element + * instead. + * + * @param key Key of new element + * @param val Value of new element + */ + void replaceTopElement(int key, V val); + + /** + * Get the current top key + * + * @return Top key + */ + int peekKey(); + + /** + * Get the current top value + * + * @return Value + */ + V peekValue(); + + /** + * Remove the first element + */ + void poll(); + + /** + * Clear the heap contents. + */ + void clear(); + + /** + * Query the size + * + * @return Size + */ + public int size(); + + /** + * Is the heap empty? + * + * @return {@code true} when the size is 0. + */ + public boolean isEmpty(); + + /** + * Get an unsorted iterator to inspect the heap. + * + * @return Iterator + */ + UnsortedIter<V> unsortedIter(); + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * @author Erich Schubert + * @param <V> Value type + */ + public static interface UnsortedIter<V> extends Iter { + /** + * Get the current key + * + * @return Current key + */ + int getKey(); + + /** + * Get the current value + * + * @return Current value + */ + V getValue(); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectMaxHeap.java new file mode 100644 index 00000000..93a4e75a --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectMaxHeap.java @@ -0,0 +1,482 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Arrays; +import java.util.ConcurrentModificationException; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + +/** + * Advanced priority queue class, based on a binary heap (for small sizes), + * which will for larger heaps be accompanied by a 4-ary heap (attached below + * the root of the two-ary heap, making the root actually 3-ary). + * + * This code was automatically instantiated for the types: Integer and Object + * + * This combination was found to work quite well in benchmarks, but YMMV. + * + * Some other observations from benchmarking: + * <ul> + * <li>Bulk loading did not improve things</li> + * <li>Primitive heaps are substantially faster.</li> + * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and + * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> + * <li>Workload makes a huge difference. A load-once, poll-until-empty priority + * queue is something different than e.g. a top-k heap, which will see a lot of + * top element replacements.</li> + * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for + * top-k make a difference.</li> + * <li>Different day, different benchmark results ...</li> + * </ul> + * + * @author Erich Schubert + * + * @apiviz.has UnsortedIter + * @param <V> Value type + */ +public class IntegerObjectMaxHeap<V> implements IntegerObjectHeap<V> { + /** + * Base heap. + */ + protected int[] twoheap; + + /** + * Base heap values. + */ + protected Object[] twovals; + + /** + * Extension heap. + */ + protected int[] fourheap; + + /** + * Extension heapvalues. + */ + protected Object[] fourvals; + + /** + * Current size of heap. + */ + protected int size; + + /** + * (Structural) modification counter. Used to invalidate iterators. + */ + protected int modCount = 0; + + /** + * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. + */ + private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; + + /** + * Initial size of the 2-ary heap. + */ + private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; + + /** + * Initial size of 4-ary heap when initialized. + * + * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 + * + * 85 = 4-ary heap of height 3: 21 + 4*4*4 + * + * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 + * + * Since we last grew by 255 (to 511), let's use 341. + */ + private final static int FOUR_HEAP_INITIAL_SIZE = 341; + + /** + * Constructor, with default size. + */ + public IntegerObjectMaxHeap() { + super(); + int[] twoheap = new int[TWO_HEAP_INITIAL_SIZE]; + Object[] twovals = new Object[TWO_HEAP_INITIAL_SIZE]; + + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = null; + this.fourvals = null; + this.size = 0; + this.modCount = 0; + } + + /** + * Constructor, with given minimum size. + * + * @param minsize Minimum size + */ + public IntegerObjectMaxHeap(int minsize) { + super(); + if (minsize < TWO_HEAP_MAX_SIZE) { + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + int[] twoheap = new int[size]; + Object[] twovals = new Object[size]; + + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = null; + this.fourvals = null; + } else { + int[] twoheap = new int[TWO_HEAP_INITIAL_SIZE]; + Object[] twovals = new Object[TWO_HEAP_INITIAL_SIZE]; + int[] fourheap = new int[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + Object[] fourvals = new Object[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = fourheap; + this.fourvals = fourvals; + } + this.size = 0; + this.modCount = 0; + } + + @Override + public void clear() { + size = 0; + ++modCount; + fourheap = null; + fourvals = null; + Arrays.fill(twoheap, 0); + Arrays.fill(twovals, null); + } + + @Override + public int size() { + return size; + } + + @Override + public boolean isEmpty() { + return (size == 0); + } + + @Override + public void add(int o, V v) { + final int co = o; + final Object cv = (Object)v; + // System.err.println("Add: " + o); + if (size < TWO_HEAP_MAX_SIZE) { + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); + twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1); + } + final int twopos = size; + twoheap[twopos] = co; + twovals[twopos] = cv; + ++size; + heapifyUp2(twopos, co, cv); + ++modCount; + } else { + final int fourpos = size - TWO_HEAP_MAX_SIZE; + if (fourheap == null) { + fourheap = new int[FOUR_HEAP_INITIAL_SIZE]; + fourvals = new Object[FOUR_HEAP_INITIAL_SIZE]; + } else if (fourpos >= fourheap.length) { + // Grow extension heap by half. + fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); + fourvals = Arrays.copyOf(fourvals, fourvals.length + (fourvals.length >> 1)); + } + fourheap[fourpos] = co; + fourvals[fourpos] = cv; + ++size; + heapifyUp4(fourpos, co, cv); + ++modCount; + } + } + + @Override + public void add(int key, V val, int max) { + if (size < max) { + add(key, val); + } else if (twoheap[0] >= key) { + replaceTopElement(key, val); + } + } + + @Override + public void replaceTopElement(int reinsert, V val) { + heapifyDown(reinsert, (Object)val); + ++modCount; + } + + /** + * Heapify-Up method for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + * @param val Current value + */ + private void heapifyUp2(int twopos, int cur, Object val) { + while (twopos > 0) { + final int parent = (twopos - 1) >>> 1; + int par = twoheap[parent]; + if (cur <= par) { + break; + } + twoheap[twopos] = par; + twovals[twopos] = twovals[parent]; + twopos = parent; + } + twoheap[twopos] = cur; + twovals[twopos] = val; + } + + /** + * Heapify-Up method for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + * @param val Current value + */ + private void heapifyUp4(int fourpos, int cur, Object val) { + while (fourpos > 0) { + final int parent = (fourpos - 1) >> 2; + int par = fourheap[parent]; + if (cur <= par) { + break; + } + fourheap[fourpos] = par; + fourvals[fourpos] = fourvals[parent]; + fourpos = parent; + } + if (fourpos == 0 && twoheap[0] < cur) { + fourheap[0] = twoheap[0]; + fourvals[0] = twovals[0]; + twoheap[0] = cur; + twovals[0] = val; + } else { + fourheap[fourpos] = cur; + fourvals[fourpos] = val; + } + } + + @Override + public void poll() { + --size; + // Replacement object: + if (size >= TWO_HEAP_MAX_SIZE) { + final int last = size - TWO_HEAP_MAX_SIZE; + final int reinsert = fourheap[last]; + final Object reinsertv = fourvals[last]; + fourheap[last] = 0; + fourvals[last] = null; + heapifyDown(reinsert, reinsertv); + } else if (size > 0) { + final int reinsert = twoheap[size]; + final Object reinsertv = twovals[size]; + twoheap[size] = 0; + twovals[size] = null; + heapifyDown(reinsert, reinsertv); + } else { + twoheap[0] = 0; + twovals[0] = null; + } + ++modCount; + } + + /** + * Invoke heapify-down for the root object. + * + * @param reinsert Object to insert. + * @param val Value to reinsert. + */ + private void heapifyDown(int reinsert, Object val) { + if (size > TWO_HEAP_MAX_SIZE) { + // Special case: 3-ary situation. + final int best = (twoheap[1] >= twoheap[2]) ? 1 : 2; + if (fourheap[0] > twoheap[best]) { + twoheap[0] = fourheap[0]; + twovals[0] = fourvals[0]; + heapifyDown4(0, reinsert, val); + } else { + twoheap[0] = twoheap[best]; + twovals[0] = twovals[best]; + heapifyDown2(best, reinsert, val); + } + return; + } + heapifyDown2(0, reinsert, val); + } + + /** + * Heapify-Down for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + * @param val Value to reinsert. + */ + private void heapifyDown2(int twopos, int cur, Object val) { + final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + while (twopos < stop) { + int bestchild = (twopos << 1) + 1; + int best = twoheap[bestchild]; + final int right = bestchild + 1; + if (right < size && best < twoheap[right]) { + bestchild = right; + best = twoheap[right]; + } + if (cur >= best) { + break; + } + twoheap[twopos] = best; + twovals[twopos] = twovals[bestchild]; + twopos = bestchild; + } + twoheap[twopos] = cur; + twovals[twopos] = val; + } + + /** + * Heapify-Down for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + * @param val Value to reinsert. + */ + private void heapifyDown4(int fourpos, int cur, Object val) { + final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; + while (fourpos < stop) { + final int child = (fourpos << 2) + 1; + int best = fourheap[child]; + int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; + if (size > minsize) { + int nextchild = fourheap[candidate]; + if (best < nextchild) { + bestchild = candidate; + best = nextchild; + } + + minsize += 2; + if (size >= minsize) { + nextchild = fourheap[++candidate]; + if (best < nextchild) { + bestchild = candidate; + best = nextchild; + } + + if (size > minsize) { + nextchild = fourheap[++candidate]; + if (best < nextchild) { + bestchild = candidate; + best = nextchild; + } + } + } + } + if (cur >= best) { + break; + } + fourheap[fourpos] = best; + fourvals[fourpos] = fourvals[bestchild]; + fourpos = bestchild; + } + fourheap[fourpos] = cur; + fourvals[fourpos] = val; + } + + @Override + public int peekKey() { + return twoheap[0]; + } + + @Override + @SuppressWarnings("unchecked") + public V peekValue() { + return (V)twovals[0]; + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append(IntegerObjectMaxHeap.class.getSimpleName()).append(" ["); + for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) { + buf.append(iter.getKey()).append(':').append(iter.getValue()).append(','); + } + buf.append(']'); + return buf.toString(); + } + + @Override + public UnsortedIter unsortedIter() { + return new UnsortedIter(); + } + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * Use this class as follows: + * + * <pre> + * {@code + * for (IntegerObjectHeap.UnsortedIter<V> iter = heap.unsortedIter(); iter.valid(); iter.next()) { + * doSomething(iter.get()); + * } + * } + * </pre> + * + * @author Erich Schubert + */ + private class UnsortedIter implements IntegerObjectHeap.UnsortedIter<V> { + /** + * Iterator position. + */ + protected int pos = 0; + + /** + * Modification counter we were initialized at. + */ + protected final int myModCount = modCount; + + @Override + public boolean valid() { + if (modCount != myModCount) { + throw new ConcurrentModificationException(); + } + return pos < size; + } + + @Override + public void advance() { + pos++; + } + + @Override + public int getKey() { + return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + } + + @SuppressWarnings("unchecked") + + @Override + public V getValue() { + return (V)((pos < TWO_HEAP_MAX_SIZE) ? twovals[pos] : fourvals[pos - TWO_HEAP_MAX_SIZE]); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectMinHeap.java new file mode 100644 index 00000000..e54c7d28 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectMinHeap.java @@ -0,0 +1,482 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Arrays; +import java.util.ConcurrentModificationException; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + +/** + * Advanced priority queue class, based on a binary heap (for small sizes), + * which will for larger heaps be accompanied by a 4-ary heap (attached below + * the root of the two-ary heap, making the root actually 3-ary). + * + * This code was automatically instantiated for the types: Integer and Object + * + * This combination was found to work quite well in benchmarks, but YMMV. + * + * Some other observations from benchmarking: + * <ul> + * <li>Bulk loading did not improve things</li> + * <li>Primitive heaps are substantially faster.</li> + * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and + * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> + * <li>Workload makes a huge difference. A load-once, poll-until-empty priority + * queue is something different than e.g. a top-k heap, which will see a lot of + * top element replacements.</li> + * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for + * top-k make a difference.</li> + * <li>Different day, different benchmark results ...</li> + * </ul> + * + * @author Erich Schubert + * + * @apiviz.has UnsortedIter + * @param <V> Value type + */ +public class IntegerObjectMinHeap<V> implements IntegerObjectHeap<V> { + /** + * Base heap. + */ + protected int[] twoheap; + + /** + * Base heap values. + */ + protected Object[] twovals; + + /** + * Extension heap. + */ + protected int[] fourheap; + + /** + * Extension heapvalues. + */ + protected Object[] fourvals; + + /** + * Current size of heap. + */ + protected int size; + + /** + * (Structural) modification counter. Used to invalidate iterators. + */ + protected int modCount = 0; + + /** + * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. + */ + private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; + + /** + * Initial size of the 2-ary heap. + */ + private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; + + /** + * Initial size of 4-ary heap when initialized. + * + * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 + * + * 85 = 4-ary heap of height 3: 21 + 4*4*4 + * + * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 + * + * Since we last grew by 255 (to 511), let's use 341. + */ + private final static int FOUR_HEAP_INITIAL_SIZE = 341; + + /** + * Constructor, with default size. + */ + public IntegerObjectMinHeap() { + super(); + int[] twoheap = new int[TWO_HEAP_INITIAL_SIZE]; + Object[] twovals = new Object[TWO_HEAP_INITIAL_SIZE]; + + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = null; + this.fourvals = null; + this.size = 0; + this.modCount = 0; + } + + /** + * Constructor, with given minimum size. + * + * @param minsize Minimum size + */ + public IntegerObjectMinHeap(int minsize) { + super(); + if (minsize < TWO_HEAP_MAX_SIZE) { + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + int[] twoheap = new int[size]; + Object[] twovals = new Object[size]; + + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = null; + this.fourvals = null; + } else { + int[] twoheap = new int[TWO_HEAP_INITIAL_SIZE]; + Object[] twovals = new Object[TWO_HEAP_INITIAL_SIZE]; + int[] fourheap = new int[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + Object[] fourvals = new Object[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = fourheap; + this.fourvals = fourvals; + } + this.size = 0; + this.modCount = 0; + } + + @Override + public void clear() { + size = 0; + ++modCount; + fourheap = null; + fourvals = null; + Arrays.fill(twoheap, 0); + Arrays.fill(twovals, null); + } + + @Override + public int size() { + return size; + } + + @Override + public boolean isEmpty() { + return (size == 0); + } + + @Override + public void add(int o, V v) { + final int co = o; + final Object cv = (Object)v; + // System.err.println("Add: " + o); + if (size < TWO_HEAP_MAX_SIZE) { + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); + twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1); + } + final int twopos = size; + twoheap[twopos] = co; + twovals[twopos] = cv; + ++size; + heapifyUp2(twopos, co, cv); + ++modCount; + } else { + final int fourpos = size - TWO_HEAP_MAX_SIZE; + if (fourheap == null) { + fourheap = new int[FOUR_HEAP_INITIAL_SIZE]; + fourvals = new Object[FOUR_HEAP_INITIAL_SIZE]; + } else if (fourpos >= fourheap.length) { + // Grow extension heap by half. + fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); + fourvals = Arrays.copyOf(fourvals, fourvals.length + (fourvals.length >> 1)); + } + fourheap[fourpos] = co; + fourvals[fourpos] = cv; + ++size; + heapifyUp4(fourpos, co, cv); + ++modCount; + } + } + + @Override + public void add(int key, V val, int max) { + if (size < max) { + add(key, val); + } else if (twoheap[0] <= key) { + replaceTopElement(key, val); + } + } + + @Override + public void replaceTopElement(int reinsert, V val) { + heapifyDown(reinsert, (Object)val); + ++modCount; + } + + /** + * Heapify-Up method for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + * @param val Current value + */ + private void heapifyUp2(int twopos, int cur, Object val) { + while (twopos > 0) { + final int parent = (twopos - 1) >>> 1; + int par = twoheap[parent]; + if (cur >= par) { + break; + } + twoheap[twopos] = par; + twovals[twopos] = twovals[parent]; + twopos = parent; + } + twoheap[twopos] = cur; + twovals[twopos] = val; + } + + /** + * Heapify-Up method for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + * @param val Current value + */ + private void heapifyUp4(int fourpos, int cur, Object val) { + while (fourpos > 0) { + final int parent = (fourpos - 1) >> 2; + int par = fourheap[parent]; + if (cur >= par) { + break; + } + fourheap[fourpos] = par; + fourvals[fourpos] = fourvals[parent]; + fourpos = parent; + } + if (fourpos == 0 && twoheap[0] > cur) { + fourheap[0] = twoheap[0]; + fourvals[0] = twovals[0]; + twoheap[0] = cur; + twovals[0] = val; + } else { + fourheap[fourpos] = cur; + fourvals[fourpos] = val; + } + } + + @Override + public void poll() { + --size; + // Replacement object: + if (size >= TWO_HEAP_MAX_SIZE) { + final int last = size - TWO_HEAP_MAX_SIZE; + final int reinsert = fourheap[last]; + final Object reinsertv = fourvals[last]; + fourheap[last] = 0; + fourvals[last] = null; + heapifyDown(reinsert, reinsertv); + } else if (size > 0) { + final int reinsert = twoheap[size]; + final Object reinsertv = twovals[size]; + twoheap[size] = 0; + twovals[size] = null; + heapifyDown(reinsert, reinsertv); + } else { + twoheap[0] = 0; + twovals[0] = null; + } + ++modCount; + } + + /** + * Invoke heapify-down for the root object. + * + * @param reinsert Object to insert. + * @param val Value to reinsert. + */ + private void heapifyDown(int reinsert, Object val) { + if (size > TWO_HEAP_MAX_SIZE) { + // Special case: 3-ary situation. + final int best = (twoheap[1] <= twoheap[2]) ? 1 : 2; + if (fourheap[0] < twoheap[best]) { + twoheap[0] = fourheap[0]; + twovals[0] = fourvals[0]; + heapifyDown4(0, reinsert, val); + } else { + twoheap[0] = twoheap[best]; + twovals[0] = twovals[best]; + heapifyDown2(best, reinsert, val); + } + return; + } + heapifyDown2(0, reinsert, val); + } + + /** + * Heapify-Down for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + * @param val Value to reinsert. + */ + private void heapifyDown2(int twopos, int cur, Object val) { + final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + while (twopos < stop) { + int bestchild = (twopos << 1) + 1; + int best = twoheap[bestchild]; + final int right = bestchild + 1; + if (right < size && best > twoheap[right]) { + bestchild = right; + best = twoheap[right]; + } + if (cur <= best) { + break; + } + twoheap[twopos] = best; + twovals[twopos] = twovals[bestchild]; + twopos = bestchild; + } + twoheap[twopos] = cur; + twovals[twopos] = val; + } + + /** + * Heapify-Down for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + * @param val Value to reinsert. + */ + private void heapifyDown4(int fourpos, int cur, Object val) { + final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; + while (fourpos < stop) { + final int child = (fourpos << 2) + 1; + int best = fourheap[child]; + int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; + if (size > minsize) { + int nextchild = fourheap[candidate]; + if (best > nextchild) { + bestchild = candidate; + best = nextchild; + } + + minsize += 2; + if (size >= minsize) { + nextchild = fourheap[++candidate]; + if (best > nextchild) { + bestchild = candidate; + best = nextchild; + } + + if (size > minsize) { + nextchild = fourheap[++candidate]; + if (best > nextchild) { + bestchild = candidate; + best = nextchild; + } + } + } + } + if (cur <= best) { + break; + } + fourheap[fourpos] = best; + fourvals[fourpos] = fourvals[bestchild]; + fourpos = bestchild; + } + fourheap[fourpos] = cur; + fourvals[fourpos] = val; + } + + @Override + public int peekKey() { + return twoheap[0]; + } + + @Override + @SuppressWarnings("unchecked") + public V peekValue() { + return (V)twovals[0]; + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append(IntegerObjectMinHeap.class.getSimpleName()).append(" ["); + for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) { + buf.append(iter.getKey()).append(':').append(iter.getValue()).append(','); + } + buf.append(']'); + return buf.toString(); + } + + @Override + public UnsortedIter unsortedIter() { + return new UnsortedIter(); + } + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * Use this class as follows: + * + * <pre> + * {@code + * for (IntegerObjectHeap.UnsortedIter<V> iter = heap.unsortedIter(); iter.valid(); iter.next()) { + * doSomething(iter.get()); + * } + * } + * </pre> + * + * @author Erich Schubert + */ + private class UnsortedIter implements IntegerObjectHeap.UnsortedIter<V> { + /** + * Iterator position. + */ + protected int pos = 0; + + /** + * Modification counter we were initialized at. + */ + protected final int myModCount = modCount; + + @Override + public boolean valid() { + if (modCount != myModCount) { + throw new ConcurrentModificationException(); + } + return pos < size; + } + + @Override + public void advance() { + pos++; + } + + @Override + public int getKey() { + return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + } + + @SuppressWarnings("unchecked") + + @Override + public V getValue() { + return (V)((pos < TWO_HEAP_MAX_SIZE) ? twovals[pos] : fourvals[pos - TWO_HEAP_MAX_SIZE]); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerPriorityObject.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerPriorityObject.java index 2014de65..f007b9fc 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerPriorityObject.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerPriorityObject.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ObjectHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ObjectHeap.java index 2e20ed56..b5dbbb0e 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ObjectHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ObjectHeap.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,53 +23,24 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; along with this program. If not, see <http://www.gnu.org/licenses/>. */ -import java.util.Arrays; - -import de.lmu.ifi.dbs.elki.math.MathUtil; +import de.lmu.ifi.dbs.elki.utilities.datastructures.iterator.Iter; /** - * Basic in-memory heap structure. - * - * This heap is built lazily: if you first add many elements, then poll the - * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log - * n). This is implemented via a simple validTo counter. + * Basic in-memory heap for K values. * * @author Erich Schubert + * + * @apiviz.has UnsortedIter + * + * @param <K> Key type */ -public abstract class ObjectHeap<K> extends AbstractHeap { - /** - * Heap storage: queue - */ - protected transient Object[] queue; - - /** - * Constructor with initial capacity. - * - * @param size initial capacity - */ - public ObjectHeap(int size) { - super(); - this.size = 0; - this.queue = new Object[size]; - } - +public interface ObjectHeap<K> { /** * Add a key-value pair to the heap * * @param key Key */ - public void add(Object key) { - // resize when needed - if (size + 1 > queue.length) { - resize(size + 1); - } - // final int pos = size; - this.queue[size] = key; - this.size += 1; - heapifyUp(size - 1, key); - validSize += 1; - heapModified(); - } + void add(K key); /** * Add a key-value pair to the heap, except if the new element is larger than @@ -78,13 +49,7 @@ public abstract class ObjectHeap<K> extends AbstractHeap { * @param key Key * @param max Maximum size of heap */ - public void add(Object key, int max) { - if (size < max) { - add(key); - } else if (comp(key, peek())) { - replaceTopElement(key); - } - } + void add(K key, int max); /** * Combined operation that removes the top element, and inserts a new element @@ -93,175 +58,69 @@ public abstract class ObjectHeap<K> extends AbstractHeap { * @param e New element to insert * @return Previous top element of the heap */ - @SuppressWarnings("unchecked") - public Object replaceTopElement(Object e) { - ensureValid(); - Object oldroot = (K) queue[0]; - heapifyDown(0, e); - heapModified(); - return oldroot; - } + K replaceTopElement(K e); /** * Get the current top key * * @return Top key */ - @SuppressWarnings("unchecked") - public Object peek() { - if (size == 0) { - throw new ArrayIndexOutOfBoundsException("Peek() on an empty heap!"); - } - ensureValid(); - return (K) queue[0]; - } + K peek(); /** * Remove the first element * * @return Top element */ - public Object poll() { - return removeAt(0); - } + K poll(); /** - * Repair the heap + * Delete all elements from the heap. */ - protected void ensureValid() { - if (validSize != size) { - if (size > 1) { - // Parent of first invalid - int nextmin = validSize > 0 ? ((validSize - 1) >>> 1) : 0; - int curmin = MathUtil.nextAllOnesInt(nextmin); // Next line - int nextmax = curmin - 1; // End of valid line - int pos = (size - 2) >>> 1; // Parent of last element - // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin+", "+nextmin); - while (pos >= nextmin) { - // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin); - while (pos >= curmin) { - if (!heapifyDown(pos, queue[pos])) { - final int parent = (pos - 1) >>> 1; - if (parent < curmin) { - nextmin = Math.min(nextmin, parent); - nextmax = Math.max(nextmax, parent); - } - } - pos--; - } - curmin = nextmin; - pos = Math.min(pos, nextmax); - nextmax = -1; - } - } - validSize = size; - } - } + void clear(); /** - * Remove the element at the given position. + * Query the size * - * @param pos Element position. - * @return Removed element + * @return Size */ - @SuppressWarnings("unchecked") - protected Object removeAt(int pos) { - if (pos < 0 || pos >= size) { - return null; - } - final Object top = (K) queue[0]; - // Replacement object: - final Object reinkey = queue[size - 1]; - // Keep heap in sync - if (validSize == size) { - size -= 1; - validSize -= 1; - heapifyDown(pos, reinkey); - } else { - size -= 1; - validSize = Math.min(pos >>> 1, validSize); - queue[pos] = reinkey; - } - heapModified(); - return top; - } + public int size(); /** - * Execute a "Heapify Upwards" aka "SiftUp". Used in insertions. + * Is the heap empty? * - * @param pos insertion position - * @param curkey Current key + * @return {@code true} when the size is 0. */ - protected void heapifyUp(int pos, Object curkey) { - while (pos > 0) { - final int parent = (pos - 1) >>> 1; - Object parkey = queue[parent]; - - if (comp(curkey, parkey)) { // Compare - break; - } - queue[pos] = parkey; - pos = parent; - } - queue[pos] = curkey; - } + public boolean isEmpty(); /** - * Execute a "Heapify Downwards" aka "SiftDown". Used in deletions. + * Get an unsorted iterator to inspect the heap. * - * @param ipos re-insertion position - * @param curkey Current key - * @return true when the order was changed + * @return Iterator */ - protected boolean heapifyDown(final int ipos, Object curkey) { - int pos = ipos; - final int half = size >>> 1; - while (pos < half) { - // Get left child (must exist!) - int cpos = (pos << 1) + 1; - Object chikey = queue[cpos]; - // Test right child, if present - final int rchild = cpos + 1; - if (rchild < size) { - Object right = queue[rchild]; - if (comp(chikey, right)) { // Compare - cpos = rchild; - chikey = right; - } - } - - if (comp(chikey, curkey)) { // Compare - break; - } - queue[pos] = chikey; - pos = cpos; - } - queue[pos] = curkey; - return (pos == ipos); - } + UnsortedIter<K> unsortedIter(); /** - * Test whether we need to resize to have the requested capacity. + * Unsorted iterator - in heap order. Does not poll the heap. * - * @param requiredSize required capacity - */ - protected final void resize(int requiredSize) { - queue = Arrays.copyOf(queue, desiredSize(requiredSize, queue.length)); - } - - /** - * Delete all elements from the heap. + * <pre> + * {@code + * for (ObjectHeap.UnsortedIter<K> iter = heap.unsortedIter(); iter.valid(); iter.next()) { + * doSomething(iter.get()); + * } + * } + * </pre> + * + * @author Erich Schubert + * + * @param <K> Key type */ - @Override - public void clear() { - super.clear(); - for (int i = 0; i < size; i++) { - queue[i] = null; - } + public static interface UnsortedIter<K> extends Iter { + /** + * Get the iterators current object. + * + * @return Current object + */ + K get(); } - - /** - * Compare two objects - */ - abstract protected boolean comp(Object o1, Object o2); } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TiedTopBoundedHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TiedTopBoundedHeap.java index 2daaafa4..32f57999 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TiedTopBoundedHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TiedTopBoundedHeap.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -25,11 +25,8 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; import java.util.ArrayList; import java.util.Comparator; -import java.util.Iterator; import java.util.List; -import de.lmu.ifi.dbs.elki.utilities.iterator.MergedIterator; - /** * A size-limited heap similar to {@link TopBoundedHeap}, discarding elements * with the highest value. However, this variation keeps a list of tied @@ -43,7 +40,7 @@ public class TiedTopBoundedHeap<E> extends TopBoundedHeap<E> { /** * List to keep ties in. */ - private List<E> ties = new ArrayList<E>(); + private List<E> ties = new ArrayList<>(); /** * Constructor with comparator. @@ -75,12 +72,6 @@ public class TiedTopBoundedHeap<E> extends TopBoundedHeap<E> { ties.clear(); } - @SuppressWarnings("unchecked") - @Override - public Iterator<E> iterator() { - return new MergedIterator<E>(ties.iterator(), super.iterator()); - } - @Override public E peek() { if (ties.isEmpty()) { @@ -131,4 +122,44 @@ public class TiedTopBoundedHeap<E> extends TopBoundedHeap<E> { ties.clear(); } } + + /** + * Get an unordered heap iterator. + * + * @return Iterator. + */ + @Override + public UnorderedIter unorderedIter() { + return new UnorderedIter(); + } + + /** + * Unordered heap iterator class. + * + * @author Erich Schubert + * + */ + public class UnorderedIter extends Heap<E>.UnorderedIter { + /** + * Constructor. + */ + protected UnorderedIter() { + super(); + } + + @Override + public boolean valid() { + return pos < size(); + } + + @Override + public E get() { + final int ssize = TiedTopBoundedHeap.super.size(); + if (pos < ssize) { + return super.get(); + } else { + return ties.get(pos - ssize); + } + } + } } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TiedTopBoundedUpdatableHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TiedTopBoundedUpdatableHeap.java index 8e39af1d..3905030f 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TiedTopBoundedUpdatableHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TiedTopBoundedUpdatableHeap.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -29,7 +29,6 @@ import java.util.Iterator; import java.util.List; import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; -import de.lmu.ifi.dbs.elki.utilities.iterator.MergedIterator; /** * A size-limited heap similar to {@link TopBoundedHeap}, discarding elements @@ -44,7 +43,7 @@ public class TiedTopBoundedUpdatableHeap<E> extends TopBoundedUpdatableHeap<E> { /** * List to keep ties in. */ - private List<E> ties = new ArrayList<E>(); + private List<E> ties = new ArrayList<>(); /** * Constructor with comparator. @@ -76,12 +75,6 @@ public class TiedTopBoundedUpdatableHeap<E> extends TopBoundedUpdatableHeap<E> { ties.clear(); } - @SuppressWarnings("unchecked") - @Override - public Iterator<E> iterator() { - return new MergedIterator<E>(ties.iterator(), super.iterator()); - } - @Override public void offerAt(int pos, E e) { if(pos == IN_TIES) { diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TopBoundedHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TopBoundedHeap.java index 07b595f6..9adda9f3 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TopBoundedHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TopBoundedHeap.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -69,7 +69,6 @@ public class TopBoundedHeap<E> extends Heap<E> { return; } // Peek at the top element, return if we are worse. - ensureValid(); final int comp; if (comparator == null) { @SuppressWarnings("unchecked") diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TopBoundedUpdatableHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TopBoundedUpdatableHeap.java index 75f2abcf..4a591d4c 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TopBoundedUpdatableHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TopBoundedUpdatableHeap.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -68,7 +68,6 @@ public class TopBoundedUpdatableHeap<E> extends UpdatableHeap<E> { super.offerAt(pos, e); return; } - ensureValid(); if (compare(e, queue[0]) < 0) { // while we did not change, this still was "successful". return; diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/UpdatableHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/UpdatableHeap.java index 1ab5f4df..a585d94d 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/UpdatableHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/UpdatableHeap.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -49,7 +49,7 @@ public class UpdatableHeap<O> extends Heap<O> { /** * Holds the indices in the heap of each element. */ - protected final TObjectIntMap<Object> index = new TObjectIntHashMap<Object>(100, 0.5f, NO_VALUE); + protected final TObjectIntMap<Object> index = new TObjectIntHashMap<>(100, 0.5f, NO_VALUE); /** * Simple constructor with default size. @@ -105,43 +105,32 @@ public class UpdatableHeap<O> extends Heap<O> { * @param e Element */ protected void offerAt(final int pos, O e) { - if(pos == NO_VALUE) { + if (pos == NO_VALUE) { // resize when needed - if(size + 1 > queue.length) { + if (size + 1 > queue.length) { resize(size + 1); } - // final int pos = size; - this.queue[size] = e; index.put(e, size); - size += 1; - // We do NOT YET update the heap. This is done lazily. + size++; + heapifyUp(size - 1, e); heapModified(); return; - } - else { + } else { assert (pos >= 0) : "Unexpected negative position."; assert (queue[pos].equals(e)); // Did the value improve? - if(comparator == null) { + if (comparator == null) { @SuppressWarnings("unchecked") Comparable<Object> c = (Comparable<Object>) e; - if(c.compareTo(queue[pos]) >= 0) { + if (c.compareTo(queue[pos]) >= 0) { return; } - } - else { - if(comparator.compare(e, queue[pos]) >= 0) { + } else { + if (comparator.compare(e, queue[pos]) >= 0) { return; } } - if(pos >= validSize) { - queue[pos] = e; - // validSize = Math.min(pos, validSize); - } - else { - // ensureValid(); - heapifyUp(pos, e); - } + heapifyUp(pos, e); heapModified(); return; } @@ -149,7 +138,7 @@ public class UpdatableHeap<O> extends Heap<O> { @Override protected O removeAt(int pos) { - if(pos < 0 || pos >= size) { + if (pos < 0 || pos >= size) { return null; } @SuppressWarnings("unchecked") @@ -158,34 +147,22 @@ public class UpdatableHeap<O> extends Heap<O> { final Object reinsert = queue[size - 1]; queue[size - 1] = null; // Keep heap in sync? - if(validSize == size) { - size -= 1; - validSize -= 1; - if(comparator != null) { - if(comparator.compare(ret, reinsert) > 0) { - heapifyUpComparator(pos, reinsert); - } - else { - heapifyDownComparator(pos, reinsert); - } + size--; + if (comparator != null) { + if (comparator.compare(ret, reinsert) > 0) { + heapifyUpComparator(pos, reinsert); + } else { + heapifyDownComparator(pos, reinsert); } - else { - @SuppressWarnings("unchecked") - Comparable<Object> comp = (Comparable<Object>) ret; - if(comp.compareTo(reinsert) > 0) { - heapifyUpComparable(pos, reinsert); - } - else { - heapifyDownComparable(pos, reinsert); - } + } else { + @SuppressWarnings("unchecked") + Comparable<Object> comp = (Comparable<Object>) ret; + if (comp.compareTo(reinsert) > 0) { + heapifyUpComparable(pos, reinsert); + } else { + heapifyDownComparable(pos, reinsert); } } - else { - size -= 1; - validSize = Math.min(pos >>> 1, validSize); - queue[pos] = reinsert; - index.put(reinsert, pos); - } heapModified(); // Keep index up to date index.remove(ret); @@ -200,10 +177,9 @@ public class UpdatableHeap<O> extends Heap<O> { */ public O removeObject(O e) { int pos = index.get(e); - if(pos >= 0) { + if (pos >= 0) { return removeAt(pos); - } - else { + } else { return null; } } @@ -214,7 +190,7 @@ public class UpdatableHeap<O> extends Heap<O> { index.remove(node); return node; } - + @Override public O replaceTopElement(O e) { O node = super.replaceTopElement(e); @@ -232,11 +208,11 @@ public class UpdatableHeap<O> extends Heap<O> { @SuppressWarnings("unchecked") protected void heapifyUpComparable(int pos, Object elem) { final Comparable<Object> cur = (Comparable<Object>) elem; // queue[pos]; - while(pos > 0) { + while (pos > 0) { final int parent = (pos - 1) >>> 1; Object par = queue[parent]; - if(cur.compareTo(par) >= 0) { + if (cur.compareTo(par) >= 0) { break; } queue[pos] = par; @@ -255,11 +231,11 @@ public class UpdatableHeap<O> extends Heap<O> { */ @Override protected void heapifyUpComparator(int pos, Object cur) { - while(pos > 0) { + while (pos > 0) { final int parent = (pos - 1) >>> 1; Object par = queue[parent]; - if(comparator.compare(cur, par) >= 0) { + if (comparator.compare(cur, par) >= 0) { break; } queue[pos] = par; @@ -276,21 +252,21 @@ public class UpdatableHeap<O> extends Heap<O> { Comparable<Object> cur = (Comparable<Object>) reinsert; int pos = ipos; final int half = size >>> 1; - while(pos < half) { + while (pos < half) { // Get left child (must exist!) int cpos = (pos << 1) + 1; Object child = queue[cpos]; // Test right child, if present final int rchild = cpos + 1; - if(rchild < size) { + if (rchild < size) { Object right = queue[rchild]; - if(((Comparable<Object>) child).compareTo(right) > 0) { + if (((Comparable<Object>) child).compareTo(right) > 0) { cpos = rchild; child = right; } } - if(cur.compareTo(child) <= 0) { + if (cur.compareTo(child) <= 0) { break; } queue[pos] = child; @@ -299,32 +275,32 @@ public class UpdatableHeap<O> extends Heap<O> { } queue[pos] = cur; index.put(cur, pos); - return (pos == ipos); + return (pos != ipos); } @Override protected boolean heapifyDownComparator(final int ipos, Object cur) { int pos = ipos; final int half = size >>> 1; - while(pos < half) { + while (pos < half) { int min = pos; Object best = cur; final int lchild = (pos << 1) + 1; Object left = queue[lchild]; - if(comparator.compare(best, left) > 0) { + if (comparator.compare(best, left) > 0) { min = lchild; best = left; } final int rchild = lchild + 1; - if(rchild < size) { + if (rchild < size) { Object right = queue[rchild]; - if(comparator.compare(best, right) > 0) { + if (comparator.compare(best, right) > 0) { min = rchild; best = right; } } - if(min == pos) { + if (min == pos) { break; } queue[pos] = best; @@ -333,6 +309,6 @@ public class UpdatableHeap<O> extends Heap<O> { } queue[pos] = cur; index.put(cur, pos); - return (pos == ipos); + return (pos != ipos); } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/package-info.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/package-info.java index 3f193171..83be37f4 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/package-info.java @@ -5,7 +5,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2012 +Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/HashMapHierarchy.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/HashMapHierarchy.java new file mode 100644 index 00000000..c77a9329 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/HashMapHierarchy.java @@ -0,0 +1,580 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Arrays; +import java.util.HashMap; +import java.util.Iterator; + +/** + * Centralized hierarchy implementation, using a HashMap of Lists. + * + * @author Erich Schubert + * + * @param <O> Object type (arbitrary!) + */ +public class HashMapHierarchy<O> implements ModifiableHierarchy<O> { + /** + * Reference storage. + */ + final private HashMap<O, Rec<O>> graph; + + /** + * Constructor. + */ + public HashMapHierarchy() { + super(); + this.graph = new HashMap<>(); + } + + @Override + public int size() { + return graph.size(); + } + + @Override + public void add(O parent, O child) { + // Add child to parent. + { + Rec<O> rec = graph.get(parent); + if (rec == null) { + rec = new Rec<>(); + graph.put(parent, rec); + } + rec.addChild(child); + } + // Add child to parent + { + Rec<O> rec = graph.get(child); + if (rec == null) { + rec = new Rec<>(); + graph.put(child, rec); + } + rec.addParent(parent); + } + } + + @Override + public void add(O entry) { + Rec<O> rec = graph.get(entry); + if (rec == null) { + rec = new Rec<>(); + graph.put(entry, rec); + } + } + + @Override + public void remove(O parent, O child) { + // Remove child from parent. + { + Rec<O> rec = graph.get(parent); + if (rec != null) { + rec.removeChild(child); + } + } + // Remove parent from child + { + Rec<O> rec = graph.get(child); + if (rec != null) { + rec.removeParent(parent); + } + } + } + + @Override + public void remove(O entry) { + Rec<O> rec = graph.get(entry); + if (rec == null) { + return; + } + for (int i = 0; i < rec.nump; i++) { + graph.get(rec.parents[i]).removeChild(entry); + rec.parents[i] = null; + } + for (int i = 0; i < rec.numc; i++) { + graph.get(rec.children[i]).removeParent(entry); + rec.children[i] = null; + } + graph.remove(entry); + } + + @SuppressWarnings("unchecked") + @Override + public void removeSubtree(O entry) { + Rec<O> rec = graph.get(entry); + if (rec == null) { + return; + } + for (int i = 0; i < rec.nump; i++) { + graph.get(rec.parents[i]).removeChild(entry); + rec.parents[i] = null; + } + for (int i = 0; i < rec.numc; i++) { + final Rec<O> crec = graph.get(rec.children[i]); + crec.removeParent(entry); + if (crec.nump == 0) { + removeSubtree((O) rec.children[i]); + } + rec.children[i] = null; + } + } + + @Override + public int numChildren(O obj) { + Rec<O> rec = graph.get(obj); + if (rec == null) { + return 0; + } + return rec.numc; + } + + @SuppressWarnings("unchecked") + @Override + public Iter<O> iterChildren(O obj) { + Rec<O> rec = graph.get(obj); + if (rec == null) { + return (Iter<O>) EMPTY_ITERATOR; + } + return rec.iterChildren(); + } + + @Override + public Iter<O> iterDescendants(O obj) { + return new ItrDesc(obj); + } + + @Override + public int numParents(O obj) { + Rec<O> rec = graph.get(obj); + if (rec == null) { + return 0; + } + return rec.nump; + } + + @SuppressWarnings("unchecked") + @Override + public Iter<O> iterParents(O obj) { + Rec<O> rec = graph.get(obj); + if (rec == null) { + return (Iter<O>) EMPTY_ITERATOR; + } + return rec.iterParents(); + } + + @Override + public Iter<O> iterAncestors(O obj) { + return new ItrAnc(obj); + } + + @Override + public Iter<O> iterAll() { + return new ItrAll(); + } + + /** + * Hierarchy pointers for an object. + * + * @author Erich Schubert + * + * @apiviz.exclude + * + * @param <O> object type + */ + private static class Rec<O> { + /** + * Number of parents, number of children. + */ + int nump = 0, numc = 0; + + /** + * Parents. + */ + Object[] parents = null; + + /** + * Children. + */ + Object[] children = null; + + /** + * Add a parent. + * + * @param parent Parent to add. + */ + void addParent(O parent) { + if (parents == null) { + parents = new Object[1]; + parents[0] = parent; + nump = 1; + } else { + for (int i = 0; i < nump; i++) { + if (parent.equals(parents[i])) { + return; + } + } + if (parents.length == nump) { + final int newsize = Math.min(5, (parents.length << 1) + 1); + parents = Arrays.copyOf(parents, newsize); + } + parents[nump] = parent; + nump++; + } + } + + /** + * Add a child. + * + * @param child Child to add + */ + void addChild(O child) { + if (children == null) { + children = new Object[5]; + children[0] = child; + numc = 1; + } else { + for (int i = 0; i < numc; i++) { + if (child.equals(children[i])) { + return; + } + } + if (children.length == numc) { + children = Arrays.copyOf(children, (children.length << 1) + 1); + } + children[numc] = child; + numc++; + } + } + + /** + * Remove a parent. + * + * @param parent Parent to remove. + */ + void removeParent(O parent) { + if (parents == null) { + return; + } + for (int i = 0; i < nump; i++) { + if (parent.equals(parents[i])) { + System.arraycopy(parents, i + 1, parents, i, nump - 1 - i); + parents[nump] = null; + nump--; + break; + } + } + if (nump == 0) { + parents = null; + } + } + + /** + * Remove a child. + * + * @param child Child to remove. + */ + void removeChild(O child) { + if (children == null) { + return; + } + for (int i = 0; i < numc; i++) { + if (child.equals(children[i])) { + System.arraycopy(children, i + 1, children, i, numc - 1 - i); + children[numc] = null; + numc--; + break; + } + } + if (numc == 0) { + children = null; + } + } + + /** + * Iterate over parents. + * + * @return Iterator for parents. + */ + @SuppressWarnings("unchecked") + public Iter<O> iterParents() { + if (nump == 0) { + return (Iter<O>) EMPTY_ITERATOR; + } + return new ItrParents(); + } + + /** + * Iterate over parents. + * + * @return Iterator for parents. + */ + @SuppressWarnings("unchecked") + public Iter<O> iterChildren() { + if (numc == 0) { + return (Iter<O>) EMPTY_ITERATOR; + } + return new ItrChildren(); + } + + /** + * Parent iterator. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + class ItrParents implements Iter<O> { + int pos = 0; + + @Override + public boolean valid() { + return pos < nump; + } + + @Override + public void advance() { + pos++; + } + + @SuppressWarnings("unchecked") + @Override + public O get() { + return (O) parents[pos]; + } + } + + /** + * Child iterator. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + class ItrChildren implements Iter<O> { + int pos = 0; + + @Override + public boolean valid() { + return pos < numc; + } + + @Override + public void advance() { + pos++; + } + + @SuppressWarnings("unchecked") + @Override + public O get() { + return (O) children[pos]; + } + } + } + + /** + * Iterator to collect into the descendants. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + private class ItrDesc implements Iter<O> { + /** + * Iterator over children + */ + final Iter<O> childiter; + + /** + * Iterator of current child + */ + Iter<O> subiter = null; + + /** + * Starting element. + * + * @param start + */ + ItrDesc(O start) { + childiter = iterChildren(start); + } + + @Override + public boolean valid() { + return childiter.valid() || (subiter != null && subiter.valid()); + } + + @Override + public void advance() { + if (subiter == null) { // Not yet descended + assert (childiter.valid()); + subiter = iterDescendants(childiter.get()); + } else { // Continue with subtree + subiter.advance(); + } + if (subiter.valid()) { + return; + } + // Proceed to next child. + childiter.advance(); + subiter = null; + } + + @Override + public O get() { + if (subiter != null) { + assert (subiter.valid()); + return subiter.get(); + } else { + assert (childiter.valid()); + return childiter.get(); + } + } + } + + /** + * Iterator over all Ancestors. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + private class ItrAnc implements Iter<O> { + /** + * Iterator over children + */ + final Iter<O> parentiter; + + /** + * Iterator of current child + */ + Iter<O> subiter = null; + + /** + * Starting element. + * + * @param start + */ + ItrAnc(O start) { + parentiter = iterParents(start); + } + + @Override + public boolean valid() { + return parentiter.valid() || (subiter != null && subiter.valid()); + } + + @Override + public void advance() { + if (subiter == null) { // Not yet descended + assert (parentiter.valid()); + subiter = iterAncestors(parentiter.get()); + } else { // Continue with subtree + subiter.advance(); + } + if (subiter.valid()) { + return; + } + // Proceed to next child. + parentiter.advance(); + subiter = null; + } + + @Override + public O get() { + if (subiter != null) { + assert (subiter.valid()); + return subiter.get(); + } else { + assert (parentiter.valid()); + return parentiter.get(); + } + } + } + + /** + * Iterator over all members of the hierarchy. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + private class ItrAll implements Iter<O> { + /** + * The true iterator. + */ + final Iterator<O> iter; + + /** + * Current object. + */ + O cur = null; + + /** + * Constructor. + */ + ItrAll() { + iter = graph.keySet().iterator(); + advance(); + } + + @Override + public boolean valid() { + return cur != null; + } + + @Override + public void advance() { + if (iter.hasNext()) { + cur = iter.next(); + } else { + cur = null; + } + } + + @Override + public O get() { + return cur; + } + } + + /** + * Empty iterator. + */ + private static final Iter<?> EMPTY_ITERATOR = new Iter<Object>() { + @Override + public boolean valid() { + return false; + } + + @Override + public void advance() { + throw new UnsupportedOperationException("Empty iterators must not be advanced."); + } + + @Override + public Object get() { + throw new UnsupportedOperationException("Iterator is empty."); + } + }; +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/Hierarchical.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/Hierarchical.java deleted file mode 100644 index 29909069..00000000 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/Hierarchical.java +++ /dev/null @@ -1,90 +0,0 @@ -package de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy; - -/* - This file is part of ELKI: - Environment for Developing KDD-Applications Supported by Index-Structures - - Copyright (C) 2012 - Ludwig-Maximilians-Universität München - Lehr- und Forschungseinheit für Datenbanksysteme - ELKI Development Team - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -import java.util.Iterator; -import java.util.List; - - -/** - * Interface for objects with an <b>internal</b> hierarchy interface. - * - * Note that the object can chose to delegate the hierarchy to an external hierarchy. - * - * @author Erich Schubert - * - * @param <O> Object type in hierarchy - */ -public interface Hierarchical<O> { - /** - * Test for hierarchical properties - * - * @return hierarchical data model. - */ - public boolean isHierarchical(); - - /** - * Get number of children - * - * @return number of children - */ - public int numChildren(); - - /** - * Get children list. Resulting list MAY be modified. Result MAY be null, if - * the model is not hierarchical. - * - * @return list of children - */ - public List<O> getChildren(); - - /** - * Iterate descendants (recursive children) - * - * @return iterator for descendants - */ - public Iterator<O> iterDescendants(); - - /** - * Get number of parents - * - * @return number of parents - */ - public int numParents(); - - /** - * Get parents list. Resulting list MAY be modified. Result MAY be null, if - * the model is not hierarchical. - * - * @return list of parents - */ - public List<O> getParents(); - - /** - * Iterate ancestors (recursive parents) - * - * @return iterator for ancestors - */ - public Iterator<O> iterAncestors(); -}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/Hierarchy.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/Hierarchy.java index 0a16e9b7..fec9c7b4 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/Hierarchy.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/Hierarchy.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,38 +23,40 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy; along with this program. If not, see <http://www.gnu.org/licenses/>. */ -import java.util.Iterator; -import java.util.List; - /** * This interface represents an (external) hierarchy of objects. It can contain * arbitrary objects, BUT the hierarchy has to be accessed using the hierarchy - * object, i.e. {@code hierarchy.getChildren(object);}. - * - * See {@link Hierarchical} for an interface for objects with an internal - * hierarchy (where you can use {@code object.getChildren();}) + * object, i.e. {@code hierarchy.iterChildren(object);}. * * @author Erich Schubert * + * @apiviz.has Iter + * * @param <O> Object type */ public interface Hierarchy<O> { /** + * Total size - number of objects contained. + * + * @return Size + */ + int size(); + + /** * Get number of children * * @param self object to get number of children for * @return number of children */ - public int numChildren(O self); + int numChildren(O self); /** - * Get children list. Resulting list MAY be modified. Result MAY be null, if - * the model is not hierarchical. + * Iterate over the (direct) children. * * @param self object to get children for - * @return list of children + * @return iterator for children */ - public List<O> getChildren(O self); + Iter<O> iterChildren(O self); /** * Iterate descendants (recursive children) @@ -62,7 +64,7 @@ public interface Hierarchy<O> { * @param self object to get descendants for * @return iterator for descendants */ - public Iterator<O> iterDescendants(O self); + Iter<O> iterDescendants(O self); /** * Get number of (direct) parents @@ -70,16 +72,15 @@ public interface Hierarchy<O> { * @param self reference object * @return number of parents */ - public int numParents(O self); + int numParents(O self); /** - * Get parents list. Resulting list MAY be modified. Result MAY be null, if - * the model is not hierarchical. + * Iterate over the (direct) parents. * * @param self object to get parents for - * @return list of parents + * @return iterator of parents */ - public List<O> getParents(O self); + Iter<O> iterParents(O self); /** * Iterate ancestors (recursive parents) @@ -87,5 +88,30 @@ public interface Hierarchy<O> { * @param self object to get ancestors for * @return iterator for ancestors */ - public Iterator<O> iterAncestors(O self); -}
\ No newline at end of file + Iter<O> iterAncestors(O self); + + /** + * Iterate over all members. + * + * @return Iterator over all members. + */ + Iter<O> iterAll(); + + /** + * Iterator interface. + * + * TODO: add a skipSubtree method? + * + * @author Erich Schubert + * + * @param <O> Object type. + */ + static interface Iter<O> extends de.lmu.ifi.dbs.elki.utilities.datastructures.iterator.Iter { + /** + * Access the current object. + * + * @return Current object + */ + O get(); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/HierarchyHashmapList.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/HierarchyHashmapList.java deleted file mode 100644 index bd6d67bf..00000000 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/HierarchyHashmapList.java +++ /dev/null @@ -1,299 +0,0 @@ -package de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy; - -/* - This file is part of ELKI: - Environment for Developing KDD-Applications Supported by Index-Structures - - Copyright (C) 2012 - Ludwig-Maximilians-Universität München - Lehr- und Forschungseinheit für Datenbanksysteme - ELKI Development Team - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -import java.util.Collections; -import java.util.HashMap; -import java.util.Iterator; -import java.util.LinkedList; -import java.util.List; - -import de.lmu.ifi.dbs.elki.logging.LoggingUtil; -import de.lmu.ifi.dbs.elki.utilities.iterator.EmptyIterator; - -/** - * Centralized hierarchy implementation, using a HashMap of Lists. - * - * @author Erich Schubert - * - * @param <O> Object type (arbitrary!) - */ -public class HierarchyHashmapList<O> implements ModifiableHierarchy<O> { - /** - * The data storage for parents - */ - final private HashMap<O, List<O>> pmap; - - /** - * The data storage for children - */ - final private HashMap<O, List<O>> cmap; - - /** - * Constructor - */ - public HierarchyHashmapList() { - super(); - this.pmap = new HashMap<O, List<O>>(); - this.cmap = new HashMap<O, List<O>>(); - } - - @Override - public void add(O parent, O child) { - // Add child to parent. - { - List<O> pchi = this.cmap.get(parent); - if(pchi == null) { - pchi = new LinkedList<O>(); - this.cmap.put(parent, pchi); - } - if(!pchi.contains(child)) { - pchi.add(child); - } else { - LoggingUtil.warning("Result added twice: "+parent+" -> "+child, new Throwable()); - } - } - // Add child to parent - { - List<O> cpar = this.pmap.get(child); - if(cpar == null) { - cpar = new LinkedList<O>(); - this.pmap.put(child, cpar); - } - if(!cpar.contains(parent)) { - cpar.add(parent); - } else { - LoggingUtil.warning("Result added twice: "+parent+" <- "+child, new Throwable()); - } - } - } - - @Override - public void remove(O parent, O child) { - // Remove child from parent. - { - List<O> pchi = this.cmap.get(parent); - if(pchi != null) { - while(pchi.remove(child)) { - // repeat - remove all instances - } - if(pchi.size() == 0) { - this.cmap.remove(parent); - } - } - } - // Remove parent from child - { - List<O> cpar = this.pmap.get(child); - if(cpar != null) { - while(cpar.remove(parent)) { - // repeat - remove all instances - } - if(cpar.size() == 0) { - this.pmap.remove(child); - } - } - } - } - - /** - * Put an object along with parent and child lists. - * - * @param obj Object - * @param parents Parent list - * @param children Child list - */ - public void put(O obj, List<O> parents, List<O> children) { - this.pmap.put(obj, parents); - this.cmap.put(obj, children); - } - - @Override - public int numChildren(O obj) { - List<O> children = this.cmap.get(obj); - if(children == null) { - return 0; - } - return children.size(); - } - - @Override - public List<O> getChildren(O obj) { - List<O> children = this.cmap.get(obj); - if(children == null) { - return Collections.emptyList(); - } - return children; - } - - @Override - public Iterator<O> iterDescendants(O obj) { - return new ItrDesc(obj); - } - - @Override - public int numParents(O obj) { - List<O> parents = this.pmap.get(obj); - if(parents == null) { - return 0; - } - return parents.size(); - } - - @Override - public List<O> getParents(O obj) { - List<O> parents = this.pmap.get(obj); - if(parents == null) { - return Collections.emptyList(); - } - return parents; - } - - @Override - public Iterator<O> iterAncestors(O obj) { - return new ItrAnc(obj); - } - - /** - * Iterator to collect into the descendants. - * - * @author Erich Schubert - * - * @apiviz.exclude - */ - private class ItrDesc implements Iterator<O> { - /** - * Starting object (for cloning); - */ - final O start; - - /** - * Iterator over children - */ - final Iterator<O> childiter; - - /** - * Iterator of current child - */ - Iterator<O> subiter; - - public ItrDesc(O start) { - this.start = start; - List<O> children = getChildren(start); - if(children != null) { - this.childiter = children.iterator(); - } - else { - this.childiter = EmptyIterator.STATIC(); - } - this.subiter = null; - } - - @Override - public boolean hasNext() { - if(subiter != null && subiter.hasNext()) { - return true; - } - return childiter.hasNext(); - } - - @Override - public O next() { - // Try nested iterator first ... - if(subiter != null && subiter.hasNext()) { - return subiter.next(); - } - // Next direct child, update subiter. - final O child = childiter.next(); - subiter = iterDescendants(child); - return child; - } - - @Override - public void remove() { - throw new UnsupportedOperationException(); - } - } - - /** - * Iterator over all Ancestors. - * - * @author Erich Schubert - * - * @apiviz.exclude - */ - private class ItrAnc implements Iterator<O> { - /** - * Starting object (for cloning); - */ - final O start; - - /** - * Iterator over parents - */ - final Iterator<O> parentiter; - - /** - * Iterator of current parent - */ - Iterator<O> subiter; - - public ItrAnc(O start) { - this.start = start; - List<O> parents = getParents(start); - if(parents != null) { - this.parentiter = parents.iterator(); - } - else { - this.parentiter = EmptyIterator.STATIC(); - } - this.subiter = null; - } - - @Override - public boolean hasNext() { - if(subiter != null && subiter.hasNext()) { - return true; - } - return parentiter.hasNext(); - } - - @Override - public O next() { - // Try nested iterator first ... - if(subiter != null && subiter.hasNext()) { - return subiter.next(); - } - // Next direct parent, update subiter. - final O parent = parentiter.next(); - subiter = iterAncestors(parent); - return parent; - } - - @Override - public void remove() { - throw new UnsupportedOperationException(); - } - } -}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/HierarchyReferenceLists.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/HierarchyReferenceLists.java deleted file mode 100644 index 76091298..00000000 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/HierarchyReferenceLists.java +++ /dev/null @@ -1,232 +0,0 @@ -package de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy; - -/* - This file is part of ELKI: - Environment for Developing KDD-Applications Supported by Index-Structures - - Copyright (C) 2012 - Ludwig-Maximilians-Universität München - Lehr- und Forschungseinheit für Datenbanksysteme - ELKI Development Team - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -import java.util.Iterator; -import java.util.List; - -import de.lmu.ifi.dbs.elki.utilities.iterator.EmptyIterator; - -/** - * Hierarchy implementation with a per-object representation. - * - * @author Erich Schubert - * - * @apiviz.uses Hierarchical - * - * @param <O> Type of objects in hierarchy - */ -public class HierarchyReferenceLists<O extends Hierarchical<O>> implements Hierarchy<O> { - /** - * Owner - */ - protected O owner; - - /** - * Storage for children - */ - protected List<O> children; - - /** - * Storage for parents - */ - protected List<O> parents; - - /** - * Constructor for hierarchy object. - * - * @param owner owning cluster. - * @param children child clusters. May be null. - * @param parents parent clusters. May be null. - */ - public HierarchyReferenceLists(O owner, List<O> children, List<O> parents) { - super(); - this.owner = owner; - this.children = children; - this.parents = parents; - } - - @Override - public int numChildren(O self) { - if(owner != self) { - throw new UnsupportedOperationException("Decentral hierarchy queried for wrong object!"); - } - if(children == null) { - return 0; - } - return children.size(); - } - - @Override - public List<O> getChildren(O self) { - if(owner != self) { - throw new UnsupportedOperationException("Decentral hierarchy queried for wrong object!"); - } - return children; - } - - @Override - public Iterator<O> iterDescendants(O self) { - if(owner != self) { - return EmptyIterator.STATIC(); - } - if (children == null) { - return EmptyIterator.STATIC(); - } - return new ItrDesc(self); - } - - @Override - public int numParents(O self) { - if(owner != self) { - throw new UnsupportedOperationException("Decentral hierarchy queried for wrong object!"); - } - if (parents == null) { - return 0; - } - return parents.size(); - } - - /** - * Return parents - */ - @Override - public List<O> getParents(O self) { - if(owner != self) { - throw new UnsupportedOperationException("Decentral hierarchy queried for wrong object!"); - } - return parents; - } - - @Override - public Iterator<O> iterAncestors(O self) { - if(owner != self) { - throw new UnsupportedOperationException("Decentral hierarchy queried for wrong object!"); - } - if (parents == null) { - return EmptyIterator.STATIC(); - } - return new ItrAnc(self); - } - - /** - * Iterator to collect into the descendants. - * - * @author Erich Schubert - * - * @apiviz.exclude - */ - private class ItrDesc implements Iterator<O> { - /** - * Iterator over children - */ - final Iterator<O> childiter; - - /** - * Iterator of current child - */ - Iterator<O> subiter; - - public ItrDesc(O start) { - assert (start == owner); - this.childiter = children.iterator(); - this.subiter = null; - } - - @Override - public boolean hasNext() { - if(subiter != null && subiter.hasNext()) { - return true; - } - return childiter.hasNext(); - } - - @Override - public O next() { - // Try nested iterator first ... - if(subiter != null && subiter.hasNext()) { - return subiter.next(); - } - // Next direct child, update subiter. - final O child = childiter.next(); - subiter = child.iterDescendants(); - return child; - } - - @Override - public void remove() { - throw new UnsupportedOperationException(); - } - } - - /** - * Iterator over all Ancestors. - * - * @author Erich Schubert - * - * @apiviz.exclude - */ - private class ItrAnc implements Iterator<O> { - /** - * Iterator over parents - */ - final Iterator<O> parentiter; - - /** - * Iterator of current parent - */ - Iterator<O> subiter; - - public ItrAnc(O start) { - assert (start == owner); - this.parentiter = parents.iterator(); - this.subiter = null; - } - - @Override - public boolean hasNext() { - if(subiter != null && subiter.hasNext()) { - return true; - } - return parentiter.hasNext(); - } - - @Override - public O next() { - // Try nested iterator first ... - if(subiter != null && subiter.hasNext()) { - return subiter.next(); - } - // Next direct parent, update subiter. - final O parent = parentiter.next(); - subiter = parent.iterAncestors(); - return parent; - } - - @Override - public void remove() { - throw new UnsupportedOperationException(); - } - } -}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/ModifiableHierarchy.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/ModifiableHierarchy.java index dadc6f66..06001d6b 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/ModifiableHierarchy.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/ModifiableHierarchy.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,8 +23,6 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy; along with this program. If not, see <http://www.gnu.org/licenses/>. */ - - /** * Modifiable Hierarchy. * @@ -39,8 +37,14 @@ public interface ModifiableHierarchy<O> extends Hierarchy<O> { * @param parent Parent * @param child Child */ - // TODO: return true when new? - public void add(O parent, O child); + void add(O parent, O child); + + /** + * Add an entry (initializes data structures). + * + * @param entry Entry + */ + void add(O entry); /** * Remove a parent-child relationship. @@ -48,6 +52,20 @@ public interface ModifiableHierarchy<O> extends Hierarchy<O> { * @param parent Parent * @param child Child */ - // TODO: return true when found? - public void remove(O parent, O child); + void remove(O parent, O child); + + /** + * Remove an entry and all its parent-child relationships. + * + * @param entry Entry + */ + void remove(O entry); + + /** + * Remove an entry and it's whole subtree (unless the elements are reachable + * by a different path!) + * + * @param entry Entry + */ + void removeSubtree(O entry); } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/package-info.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/package-info.java index 0aba31be..965b15fc 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/package-info.java @@ -5,7 +5,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2012 +Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractObjDynamicHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractObjDynamicHistogram.java index 9d0dba0d..165c2c8b 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractObjDynamicHistogram.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractObjDynamicHistogram.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -127,21 +127,36 @@ public abstract class AbstractObjDynamicHistogram<T> extends AbstractObjStaticHi // Store in cache if (cachefill >= 0) { if (cachefill < cacheposs.length) { - cacheposs[cachefill] = coord; cachevals[cachefill] = cloneForCache(value); - cachefill++; + ++cachefill; return; - } else { - materialize(); - // But continue below! } } - // Check if we need to resample to accomodate this bin. - testResample(coord); - // super class will handle histogram resizing / shifting - T exist = get(coord); - data[getBinNr(coord)] = aggregate(exist, value); + if (coord == Double.NEGATIVE_INFINITY) { + aggregateSpecial(value, 0); + } else if (coord == Double.POSITIVE_INFINITY) { + aggregateSpecial(value, 1); + } else if (Double.isNaN(coord)) { + aggregateSpecial(value, 2); + } else { + // super class will handle histogram resizing / shifting + T exist = get(coord); + data[getBinNr(coord)] = aggregate(exist, value); + } + } + + /** + * Aggregate for a special value. + * + * @param value Parameter value + * @param bin Special bin index. + */ + protected void aggregateSpecial(T value, int bin) { + final T exist = getSpecial(bin); + // Note: do not inline above accessor, as getSpecial will initialize the + // special variable used below! + special[bin] = aggregate(exist, value); } /** diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractObjStaticHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractObjStaticHistogram.java index c1882302..4a1649af 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractObjStaticHistogram.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractObjStaticHistogram.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -35,6 +35,16 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; */ public abstract class AbstractObjStaticHistogram<T> extends AbstractStaticHistogram implements ObjHistogram<T> { /** + * Data store + */ + Object[] data; + + /** + * Special value storage: infinity, NaN + */ + Object[] special = null; + + /** * Constructor. * * @param bins Number of bins @@ -46,15 +56,13 @@ public abstract class AbstractObjStaticHistogram<T> extends AbstractStaticHistog if (bins >= 0) { // -1 will be used by FlexiHistogram to delay initialization. data = new Object[bins]; + for (int i = 0; i < bins; i++) { + data[i] = makeObject(); + } } } /** - * Data store - */ - Object[] data; - - /** * Access the value of a bin with new data. * * @param coord Coordinate @@ -62,6 +70,15 @@ public abstract class AbstractObjStaticHistogram<T> extends AbstractStaticHistog */ @SuppressWarnings("unchecked") public T get(double coord) { + if (coord == Double.NEGATIVE_INFINITY) { + return getSpecial(0); + } + if (coord == Double.POSITIVE_INFINITY) { + return getSpecial(1); + } + if (Double.isNaN(coord)) { + return getSpecial(2); + } int bin = getBinNr(coord); if (bin < 0) { if (size - bin > data.length) { @@ -103,6 +120,19 @@ public abstract class AbstractObjStaticHistogram<T> extends AbstractStaticHistog } /** + * Ensure that we have storage for special values (infinity, NaN) + * + * @param idx Index to return. + */ + @SuppressWarnings("unchecked") + protected T getSpecial(int idx) { + if (special == null) { + special = new Object[] { makeObject(), makeObject(), makeObject() }; + } + return (T) special[idx]; + } + + /** * Class to make a new object for the data store. * * @return New instance. diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractStaticHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractStaticHistogram.java index 799ac009..3363e61e 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractStaticHistogram.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractStaticHistogram.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleArrayStaticHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleArrayStaticHistogram.java index aeba3c4b..86b53d03 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleArrayStaticHistogram.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleArrayStaticHistogram.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2012 +Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleDynamicHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleDynamicHistogram.java index 77a1f9e4..84f97dfe 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleDynamicHistogram.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleDynamicHistogram.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleHistogram.java index d5cee785..e4a24c95 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleHistogram.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleHistogram.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleStaticHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleStaticHistogram.java index db839d10..5a634cf2 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleStaticHistogram.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleStaticHistogram.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatDynamicHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatDynamicHistogram.java index a14ed00a..9829eaf8 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatDynamicHistogram.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatDynamicHistogram.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatHistogram.java index f5a65bfa..7f034152 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatHistogram.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatHistogram.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatStaticHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatStaticHistogram.java index b3f41994..063bd80a 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatStaticHistogram.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatStaticHistogram.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/Histogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/Histogram.java index 75be6830..8c8d9a87 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/Histogram.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/Histogram.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,7 +23,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; along with this program. If not, see <http://www.gnu.org/licenses/>. */ -import de.lmu.ifi.dbs.elki.utilities.iterator.ArrayIter; +import de.lmu.ifi.dbs.elki.utilities.datastructures.iterator.ArrayIter; /** * Abstract API for histograms. Without specific type information, to allow this diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntArrayStaticHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntArrayStaticHistogram.java index 8d00604b..ff9a82aa 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntArrayStaticHistogram.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntArrayStaticHistogram.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2012 +Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntDynamicHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntDynamicHistogram.java index 0967ebd5..b131af7d 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntDynamicHistogram.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntDynamicHistogram.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntHistogram.java index 9ec4ec56..9bfae100 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntHistogram.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntHistogram.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntStaticHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntStaticHistogram.java index d4de36d7..7b1eed94 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntStaticHistogram.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntStaticHistogram.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongArrayStaticHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongArrayStaticHistogram.java index efbf751f..e3580792 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongArrayStaticHistogram.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongArrayStaticHistogram.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongDynamicHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongDynamicHistogram.java index 676a5e8f..93c4eee5 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongDynamicHistogram.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongDynamicHistogram.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongHistogram.java index 9be15e65..16577c38 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongHistogram.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongHistogram.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongStaticHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongStaticHistogram.java index 63e15599..b270908d 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongStaticHistogram.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongStaticHistogram.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/MeanVarianceStaticHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/MeanVarianceStaticHistogram.java index 2a464382..0f1ea0a3 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/MeanVarianceStaticHistogram.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/MeanVarianceStaticHistogram.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ObjHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ObjHistogram.java index ac4d4e4b..bad4eec1 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ObjHistogram.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ObjHistogram.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortDynamicHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortDynamicHistogram.java index ff94928b..a49810ee 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortDynamicHistogram.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortDynamicHistogram.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortHistogram.java index 699df896..0b83bc4c 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortHistogram.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortHistogram.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortStaticHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortStaticHistogram.java index b2809e1e..2819d966 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortStaticHistogram.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortStaticHistogram.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/package-info.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/package-info.java index 65dd6446..cee1836b 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/package-info.java @@ -13,7 +13,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/iterator/ArrayIter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/iterator/ArrayIter.java new file mode 100644 index 00000000..7b2a96ad --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/iterator/ArrayIter.java @@ -0,0 +1,59 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.iterator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +/** + * Array iterators can also go backwards and seek. + * + * @author Erich Schubert + * + * @apiviz.excludeSubtypes + */ +public interface ArrayIter extends Iter { + /** + * Get current iterator offset. + * + * @return Iterator position + */ + public int getOffset(); + + /** + * Moves the iterator forward or backward by the given offset. + * + * @param count offset to move forward or backwards + */ + public void advance(int count); + + /** + * Moves the iterator backward to the previous entry. + */ + public void retract(); + + /** + * Moves the iterator to the given position + * + * @param off Seek offset + */ + public void seek(int off); +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/iterator/ArrayListIter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/iterator/ArrayListIter.java new file mode 100644 index 00000000..820217ec --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/iterator/ArrayListIter.java @@ -0,0 +1,99 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.iterator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.List; + +/** + * ELKI style Iterator for array lists. + * + * Note: this implementation is only efficient for lists with efficient random + * access and seeking (i.e. ArrayLists, but not Linked Lists!) + * + * @author Erich Schubert + * + * @apiviz.excludeSubtypes + * + * @param <O> contained object type. + */ +public class ArrayListIter<O> implements ArrayIter { + /** + * The array list to iterate over. + */ + final List<O> data; + + /** + * Current position. + */ + int pos = 0; + + /** + * Constructor. + * + * @param data Data array. + */ + public ArrayListIter(List<O> data) { + super(); + this.data = data; + } + + @Override + public boolean valid() { + return pos < data.size(); + } + + @Override + public void advance() { + pos++; + } + + @Override + public int getOffset() { + return pos; + } + + @Override + public void advance(int count) { + pos += count; + } + + @Override + public void retract() { + pos--; + } + + @Override + public void seek(int off) { + pos = off; + } + + /** + * Get the current element. + * + * @return current element + */ + public O get() { + return data.get(pos); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/iterator/Iter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/iterator/Iter.java new file mode 100644 index 00000000..3d111f14 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/iterator/Iter.java @@ -0,0 +1,71 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.iterator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +/** + * Iterator interface for more than one return value. + * + * The Java standard {@link java.util.Iterator} interface has some drawbacks: + * <ul> + * <li>the only way to get the current value is to advance the iterator</li> + * <li>the iterator can only point to a single value</li> + * <li>the iterator can only return objects, not primitives</li> + * </ul> + * + * This iterator interface is a bit more flexible. For example on a distance + * list, we can have a single type of iterator that allows access to the + * distance, the object ID or the combination of both. + * + * In some situations, this can save the creation of many small objects, which + * put load on the garbage collector. This super interface does not have a "get" + * operation, which is to come from specialized interfaces instead. + * + * Usage example: + * + * <pre> + * {@code + * for (Iter iter = ids.iter(); iter.valid(); iter.advance()) { + * iter.doSomething(); + * } + * } + * </pre> + * + * @author Erich Schubert + * + * @apiviz.landmark + * @apiviz.excludeSubtypes + */ +public interface Iter { + /** + * Returns true if the iterator currently points to a valid object. + * + * @return a <code>boolean</code> value, whether the position is valid. + */ + public boolean valid(); + + /** + * Moves the iterator forward to the next entry. + */ + public void advance(); +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/iterator/MIter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/iterator/MIter.java new file mode 100644 index 00000000..14e5443d --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/iterator/MIter.java @@ -0,0 +1,54 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.iterator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +/** + * Modifiable iterator, that also supports removal. + * + * Usage example: + * + * <pre> + * {@code + * for (MIter iter = ids.iter(); iter.valid(); iter.advance()) { + * if (testSomething(iter)) { + * iter.remove(); + * continue; // Iterator may point to something else + * } + * } + * } + * </pre> + * + * @author Erich Schubert + * + * @apiviz.excludeSubtypes + */ +public interface MIter extends Iter { + /** + * Remove the object the iterator currently points to. + * + * Note that, usually, the iterator will now point to a different object, very + * often to the previous one (but this is not guaranteed!) + */ + void remove(); +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/iterator/package-info.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/iterator/package-info.java new file mode 100644 index 00000000..d241fcc4 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/iterator/package-info.java @@ -0,0 +1,40 @@ +/** + * <p>ELKI Iterator API.</p> + * + * <p>ELKI uses a custom iterator API instead of the usual {@link java.util.Iterator} classes (the "Java Collections API"). + * The reason for this is largely efficiency. Benchmarking showed that the Java Iterator API can be quite expensive when dealing + * with primitive types, as {@link java.util.Iterator#next} is meant to always return an object.</p> + * + * <p>However, the benefits become more apparent when considering multi-valued iterators. + * For example an iterator over a k nearest neighbor set in ELKI both represents an object by its DBID, + * and a distance value. For double-valued distances, it can be retrieved using a primitive value getter + * (saving an extra object copy), and since the iterator can be used as a DBIDRef, it can also represent + * the current object without creating additional objects.</p> + * + * <p>While it may seem odd to depart from Java conventions such as the collections API, + * note that these iterators are very close to the standard C++ conventions, so nothing entirely unusual. + * Also the GNU trove libraries - used by ELKI in various places - use the same kind of iterators.</p> + */ +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +package de.lmu.ifi.dbs.elki.utilities.datastructures.iterator;
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/package-info.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/package-info.java index ae8308af..a0d894a9 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/package-info.java @@ -5,7 +5,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2012 +Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team |