diff options
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap')
35 files changed, 7778 insertions, 1646 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/AbstractHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/AbstractHeap.java deleted file mode 100644 index b6f098e6..00000000 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/AbstractHeap.java +++ /dev/null @@ -1,103 +0,0 @@ -package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; - -/* - This file is part of ELKI: - Environment for Developing KDD-Applications Supported by Index-Structures - - Copyright (C) 2012 - Ludwig-Maximilians-Universität München - Lehr- und Forschungseinheit für Datenbanksysteme - ELKI Development Team - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -/** - * Abstract base class for heaps. - * - * @author Erich Schubert - */ -public class AbstractHeap { - /** - * Default initial capacity - */ - public static final int DEFAULT_INITIAL_CAPACITY = 11; - - /** - * Current number of objects - */ - public int size = 0; - - /** - * Indicate up to where the heap is valid - */ - public int validSize = 0; - - /** - * (Structural) modification counter. Used to invalidate iterators. - */ - public int modCount = 0; - - /** - * Constructor. - */ - public AbstractHeap() { - super(); - } - - /** - * Query the size - * - * @return Size - */ - public int size() { - return this.size; - } - - /** - * Delete all elements from the heap. - */ - public void clear() { - this.size = 0; - this.validSize = -1; - heapModified(); - } - - /** - * Test whether we need to resize to have the requested capacity. - * - * @param requiredSize required capacity - * @param capacity Current capacity - * @return new capacity - */ - protected final int desiredSize(int requiredSize, int capacity) { - // Double until 64, then increase by 50% each time. - int newCapacity = ((capacity < 64) ? ((capacity + 1) * 2) : ((capacity / 2) * 3)); - // overflow? - if (newCapacity < 0) { - throw new OutOfMemoryError(); - } - if (requiredSize > newCapacity) { - newCapacity = requiredSize; - } - return newCapacity; - } - - /** - * Called at the end of each heap modification. - */ - protected void heapModified() { - modCount++; - } -} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMaxHeap.java index ab8ef1bb..222fe83a 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMaxHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMaxHeap.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,41 +23,409 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; along with this program. If not, see <http://www.gnu.org/licenses/>. */ +import java.util.Arrays; +import java.util.ConcurrentModificationException; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + /** - * Basic in-memory heap structure. + * Advanced priority queue class, based on a binary heap (for small sizes), + * which will for larger heaps be accompanied by a 4-ary heap (attached below + * the root of the two-ary heap, making the root actually 3-ary). + * + * This code was automatically instantiated for the type: Comparable + * + * This combination was found to work quite well in benchmarks, but YMMV. * - * This heap is built lazily: if you first add many elements, then poll the - * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log - * n). This is implemented via a simple validTo counter. + * Some other observations from benchmarking: + * <ul> + * <li>Bulk loading did not improve things</li> + * <li>Primitive heaps are substantially faster.</li> + * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and + * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> + * <li>Workload makes a huge difference. A load-once, poll-until-empty priority + * queue is something different than e.g. a top-k heap, which will see a lot of + * top element replacements.</li> + * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for + * top-k make a difference.</li> + * <li>Different day, different benchmark results ...</li> + * </ul> * * @author Erich Schubert + * + * @apiviz.has UnsortedIter + * @param <K> Key type */ -public class ComparableMaxHeap<K extends Comparable<K>> extends ObjectHeap<K> { +public class ComparableMaxHeap<K extends Comparable<? super K>> implements ObjectHeap<K> { + /** + * Base heap. + */ + protected Comparable<Object>[] twoheap; + + /** + * Extension heap. + */ + protected Comparable<Object>[] fourheap; + + /** + * Current size of heap. + */ + protected int size; + + /** + * (Structural) modification counter. Used to invalidate iterators. + */ + protected int modCount = 0; + + /** + * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. + */ + private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; + + /** + * Initial size of the 2-ary heap. + */ + private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; + + /** + * Initial size of 4-ary heap when initialized. + * + * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 + * + * 85 = 4-ary heap of height 3: 21 + 4*4*4 + * + * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 + * + * Since we last grew by 255 (to 511), let's use 341. + */ + private final static int FOUR_HEAP_INITIAL_SIZE = 341; + /** - * Constructor with default capacity. + * Constructor, with default size. */ + @SuppressWarnings("unchecked") public ComparableMaxHeap() { - super(DEFAULT_INITIAL_CAPACITY); + super(); + Comparable<Object>[] twoheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, TWO_HEAP_INITIAL_SIZE); + + this.twoheap = twoheap; + this.fourheap = null; + this.size = 0; + this.modCount = 0; } /** - * Constructor with initial capacity. + * Constructor, with given minimum size. * - * @param size initial capacity + * @param minsize Minimum size */ - public ComparableMaxHeap(int size) { - super(size); + @SuppressWarnings("unchecked") + public ComparableMaxHeap(int minsize) { + super(); + if (minsize < TWO_HEAP_MAX_SIZE) { + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + Comparable<Object>[] twoheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, size); + + this.twoheap = twoheap; + this.fourheap = null; + } else { + Comparable<Object>[] twoheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, TWO_HEAP_INITIAL_SIZE); + Comparable<Object>[] fourheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, minsize - TWO_HEAP_MAX_SIZE); + this.twoheap = twoheap; + this.fourheap = fourheap; + } + this.size = 0; + this.modCount = 0; + } + + @Override + public void clear() { + size = 0; + ++modCount; + fourheap = null; + Arrays.fill(twoheap, null); + } + + @Override + public int size() { + return size; + } + + @Override + public boolean isEmpty() { + return (size == 0); + } + + @Override + @SuppressWarnings("unchecked") + public void add(K o) { + final Comparable<Object> co = (Comparable<Object>)o; + // System.err.println("Add: " + o); + if (size < TWO_HEAP_MAX_SIZE) { + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); + } + final int twopos = size; + twoheap[twopos] = co; + ++size; + heapifyUp2(twopos, co); + ++modCount; + } else { + final int fourpos = size - TWO_HEAP_MAX_SIZE; + if (fourheap == null) { + fourheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, FOUR_HEAP_INITIAL_SIZE); + } else if (fourpos >= fourheap.length) { + // Grow extension heap by half. + fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); + } + fourheap[fourpos] = co; + ++size; + heapifyUp4(fourpos, co); + ++modCount; + } + } + + @Override + public void add(K key, int max) { + if (size < max) { + add(key); + } else if (twoheap[0].compareTo(key) >= 0) { + replaceTopElement(key); + } + } + + @Override + @SuppressWarnings("unchecked") + public K replaceTopElement(K reinsert) { + final Comparable<Object> ret = twoheap[0]; + heapifyDown((Comparable<Object>) reinsert); + ++modCount; + return (K)ret; } /** - * Compare two objects + * Heapify-Up method for 2-ary heap. * - * @param o1 First object - * @param o2 Second object + * @param twopos Position in 2-ary heap. + * @param cur Current object */ + private void heapifyUp2(int twopos, Comparable<Object> cur) { + while (twopos > 0) { + final int parent = (twopos - 1) >>> 1; + Comparable<Object> par = twoheap[parent]; + if (cur.compareTo(par) <= 0) { + break; + } + twoheap[twopos] = par; + twopos = parent; + } + twoheap[twopos] = cur; + } + + /** + * Heapify-Up method for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + */ + private void heapifyUp4(int fourpos, Comparable<Object> cur) { + while (fourpos > 0) { + final int parent = (fourpos - 1) >> 2; + Comparable<Object> par = fourheap[parent]; + if (cur.compareTo(par) <= 0) { + break; + } + fourheap[fourpos] = par; + fourpos = parent; + } + if (fourpos == 0 && twoheap[0].compareTo(cur) < 0) { + fourheap[0] = twoheap[0]; + twoheap[0] = cur; + } else { + fourheap[fourpos] = cur; + } + } + @Override @SuppressWarnings("unchecked") - protected boolean comp(Object o1, Object o2) { - return ((K) o1).compareTo((K) o2) < 0; + public K poll() { + final Comparable<Object> ret = twoheap[0]; + --size; + // Replacement object: + if (size >= TWO_HEAP_MAX_SIZE) { + final int last = size - TWO_HEAP_MAX_SIZE; + final Comparable<Object> reinsert = fourheap[last]; + fourheap[last] = null; + heapifyDown(reinsert); + } else if (size > 0) { + final Comparable<Object> reinsert = twoheap[size]; + twoheap[size] = null; + heapifyDown(reinsert); + } else { + twoheap[0] = null; + } + ++modCount; + return (K)ret; + } + + /** + * Invoke heapify-down for the root object. + * + * @param reinsert Object to insert. + */ + private void heapifyDown(Comparable<Object> reinsert) { + if (size > TWO_HEAP_MAX_SIZE) { + // Special case: 3-ary situation. + final int best = (twoheap[1].compareTo(twoheap[2]) >= 0) ? 1 : 2; + if (fourheap[0].compareTo(twoheap[best]) > 0) { + twoheap[0] = fourheap[0]; + heapifyDown4(0, reinsert); + } else { + twoheap[0] = twoheap[best]; + heapifyDown2(best, reinsert); + } + return; + } + heapifyDown2(0, reinsert); + } + + /** + * Heapify-Down for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + */ + private void heapifyDown2(int twopos, Comparable<Object> cur) { + final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + while (twopos < stop) { + int bestchild = (twopos << 1) + 1; + Comparable<Object> best = twoheap[bestchild]; + final int right = bestchild + 1; + if (right < size && best.compareTo(twoheap[right]) < 0) { + bestchild = right; + best = twoheap[right]; + } + if (cur.compareTo(best) >= 0) { + break; + } + twoheap[twopos] = best; + twopos = bestchild; + } + twoheap[twopos] = cur; + } + + /** + * Heapify-Down for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + */ + private void heapifyDown4(int fourpos, Comparable<Object> cur) { + final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; + while (fourpos < stop) { + final int child = (fourpos << 2) + 1; + Comparable<Object> best = fourheap[child]; + int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; + if (size > minsize) { + Comparable<Object> nextchild = fourheap[candidate]; + if (best.compareTo(nextchild) < 0) { + bestchild = candidate; + best = nextchild; + } + + minsize += 2; + if (size >= minsize) { + nextchild = fourheap[++candidate]; + if (best.compareTo(nextchild) < 0) { + bestchild = candidate; + best = nextchild; + } + + if (size > minsize) { + nextchild = fourheap[++candidate]; + if (best.compareTo(nextchild) < 0) { + bestchild = candidate; + best = nextchild; + } + } + } + } + if (cur.compareTo(best) >= 0) { + break; + } + fourheap[fourpos] = best; + fourpos = bestchild; + } + fourheap[fourpos] = cur; + } + + @Override + @SuppressWarnings("unchecked") + public K peek() { + return (K)twoheap[0]; + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append(ComparableMaxHeap.class.getSimpleName()).append(" ["); + for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) { + buf.append(iter.get()).append(','); + } + buf.append(']'); + return buf.toString(); + } + + @Override + public UnsortedIter unsortedIter() { + return new UnsortedIter(); + } + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * Use this class as follows: + * + * <pre> + * {@code + * for (ObjectHeap.UnsortedIter<K> iter = heap.unsortedIter(); iter.valid(); iter.next()) { + * doSomething(iter.get()); + * } + * } + * </pre> + * + * @author Erich Schubert + */ + private class UnsortedIter implements ObjectHeap.UnsortedIter<K> { + /** + * Iterator position. + */ + protected int pos = 0; + + /** + * Modification counter we were initialized at. + */ + protected final int myModCount = modCount; + + @Override + public boolean valid() { + if (modCount != myModCount) { + throw new ConcurrentModificationException(); + } + return pos < size; + } + + @Override + public void advance() { + pos++; + } + + @SuppressWarnings("unchecked") + + @Override + public K get() { + return (K)((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + } } } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMinHeap.java index 06d2cb32..3cc5a02f 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMinHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMinHeap.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,41 +23,409 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; along with this program. If not, see <http://www.gnu.org/licenses/>. */ +import java.util.Arrays; +import java.util.ConcurrentModificationException; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + /** - * Basic in-memory heap structure. + * Advanced priority queue class, based on a binary heap (for small sizes), + * which will for larger heaps be accompanied by a 4-ary heap (attached below + * the root of the two-ary heap, making the root actually 3-ary). + * + * This code was automatically instantiated for the type: Comparable + * + * This combination was found to work quite well in benchmarks, but YMMV. * - * This heap is built lazily: if you first add many elements, then poll the - * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log - * n). This is implemented via a simple validTo counter. + * Some other observations from benchmarking: + * <ul> + * <li>Bulk loading did not improve things</li> + * <li>Primitive heaps are substantially faster.</li> + * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and + * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> + * <li>Workload makes a huge difference. A load-once, poll-until-empty priority + * queue is something different than e.g. a top-k heap, which will see a lot of + * top element replacements.</li> + * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for + * top-k make a difference.</li> + * <li>Different day, different benchmark results ...</li> + * </ul> * * @author Erich Schubert + * + * @apiviz.has UnsortedIter + * @param <K> Key type */ -public class ComparableMinHeap<K extends Comparable<K>> extends ObjectHeap<K> { +public class ComparableMinHeap<K extends Comparable<? super K>> implements ObjectHeap<K> { + /** + * Base heap. + */ + protected Comparable<Object>[] twoheap; + + /** + * Extension heap. + */ + protected Comparable<Object>[] fourheap; + + /** + * Current size of heap. + */ + protected int size; + + /** + * (Structural) modification counter. Used to invalidate iterators. + */ + protected int modCount = 0; + + /** + * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. + */ + private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; + + /** + * Initial size of the 2-ary heap. + */ + private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; + + /** + * Initial size of 4-ary heap when initialized. + * + * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 + * + * 85 = 4-ary heap of height 3: 21 + 4*4*4 + * + * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 + * + * Since we last grew by 255 (to 511), let's use 341. + */ + private final static int FOUR_HEAP_INITIAL_SIZE = 341; + /** - * Constructor with default capacity. + * Constructor, with default size. */ + @SuppressWarnings("unchecked") public ComparableMinHeap() { - super(DEFAULT_INITIAL_CAPACITY); + super(); + Comparable<Object>[] twoheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, TWO_HEAP_INITIAL_SIZE); + + this.twoheap = twoheap; + this.fourheap = null; + this.size = 0; + this.modCount = 0; } /** - * Constructor with initial capacity. + * Constructor, with given minimum size. * - * @param size initial capacity + * @param minsize Minimum size */ - public ComparableMinHeap(int size) { - super(size); + @SuppressWarnings("unchecked") + public ComparableMinHeap(int minsize) { + super(); + if (minsize < TWO_HEAP_MAX_SIZE) { + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + Comparable<Object>[] twoheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, size); + + this.twoheap = twoheap; + this.fourheap = null; + } else { + Comparable<Object>[] twoheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, TWO_HEAP_INITIAL_SIZE); + Comparable<Object>[] fourheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, minsize - TWO_HEAP_MAX_SIZE); + this.twoheap = twoheap; + this.fourheap = fourheap; + } + this.size = 0; + this.modCount = 0; + } + + @Override + public void clear() { + size = 0; + ++modCount; + fourheap = null; + Arrays.fill(twoheap, null); + } + + @Override + public int size() { + return size; + } + + @Override + public boolean isEmpty() { + return (size == 0); + } + + @Override + @SuppressWarnings("unchecked") + public void add(K o) { + final Comparable<Object> co = (Comparable<Object>)o; + // System.err.println("Add: " + o); + if (size < TWO_HEAP_MAX_SIZE) { + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); + } + final int twopos = size; + twoheap[twopos] = co; + ++size; + heapifyUp2(twopos, co); + ++modCount; + } else { + final int fourpos = size - TWO_HEAP_MAX_SIZE; + if (fourheap == null) { + fourheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, FOUR_HEAP_INITIAL_SIZE); + } else if (fourpos >= fourheap.length) { + // Grow extension heap by half. + fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); + } + fourheap[fourpos] = co; + ++size; + heapifyUp4(fourpos, co); + ++modCount; + } + } + + @Override + public void add(K key, int max) { + if (size < max) { + add(key); + } else if (twoheap[0].compareTo(key) <= 0) { + replaceTopElement(key); + } + } + + @Override + @SuppressWarnings("unchecked") + public K replaceTopElement(K reinsert) { + final Comparable<Object> ret = twoheap[0]; + heapifyDown((Comparable<Object>) reinsert); + ++modCount; + return (K)ret; } /** - * Compare two objects + * Heapify-Up method for 2-ary heap. * - * @param o1 First object - * @param o2 Second object + * @param twopos Position in 2-ary heap. + * @param cur Current object */ + private void heapifyUp2(int twopos, Comparable<Object> cur) { + while (twopos > 0) { + final int parent = (twopos - 1) >>> 1; + Comparable<Object> par = twoheap[parent]; + if (cur.compareTo(par) >= 0) { + break; + } + twoheap[twopos] = par; + twopos = parent; + } + twoheap[twopos] = cur; + } + + /** + * Heapify-Up method for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + */ + private void heapifyUp4(int fourpos, Comparable<Object> cur) { + while (fourpos > 0) { + final int parent = (fourpos - 1) >> 2; + Comparable<Object> par = fourheap[parent]; + if (cur.compareTo(par) >= 0) { + break; + } + fourheap[fourpos] = par; + fourpos = parent; + } + if (fourpos == 0 && twoheap[0].compareTo(cur) > 0) { + fourheap[0] = twoheap[0]; + twoheap[0] = cur; + } else { + fourheap[fourpos] = cur; + } + } + @Override @SuppressWarnings("unchecked") - protected boolean comp(Object o1, Object o2) { - return ((K) o1).compareTo((K) o2) > 0; + public K poll() { + final Comparable<Object> ret = twoheap[0]; + --size; + // Replacement object: + if (size >= TWO_HEAP_MAX_SIZE) { + final int last = size - TWO_HEAP_MAX_SIZE; + final Comparable<Object> reinsert = fourheap[last]; + fourheap[last] = null; + heapifyDown(reinsert); + } else if (size > 0) { + final Comparable<Object> reinsert = twoheap[size]; + twoheap[size] = null; + heapifyDown(reinsert); + } else { + twoheap[0] = null; + } + ++modCount; + return (K)ret; + } + + /** + * Invoke heapify-down for the root object. + * + * @param reinsert Object to insert. + */ + private void heapifyDown(Comparable<Object> reinsert) { + if (size > TWO_HEAP_MAX_SIZE) { + // Special case: 3-ary situation. + final int best = (twoheap[1].compareTo(twoheap[2]) <= 0) ? 1 : 2; + if (fourheap[0].compareTo(twoheap[best]) < 0) { + twoheap[0] = fourheap[0]; + heapifyDown4(0, reinsert); + } else { + twoheap[0] = twoheap[best]; + heapifyDown2(best, reinsert); + } + return; + } + heapifyDown2(0, reinsert); + } + + /** + * Heapify-Down for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + */ + private void heapifyDown2(int twopos, Comparable<Object> cur) { + final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + while (twopos < stop) { + int bestchild = (twopos << 1) + 1; + Comparable<Object> best = twoheap[bestchild]; + final int right = bestchild + 1; + if (right < size && best.compareTo(twoheap[right]) > 0) { + bestchild = right; + best = twoheap[right]; + } + if (cur.compareTo(best) <= 0) { + break; + } + twoheap[twopos] = best; + twopos = bestchild; + } + twoheap[twopos] = cur; + } + + /** + * Heapify-Down for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + */ + private void heapifyDown4(int fourpos, Comparable<Object> cur) { + final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; + while (fourpos < stop) { + final int child = (fourpos << 2) + 1; + Comparable<Object> best = fourheap[child]; + int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; + if (size > minsize) { + Comparable<Object> nextchild = fourheap[candidate]; + if (best.compareTo(nextchild) > 0) { + bestchild = candidate; + best = nextchild; + } + + minsize += 2; + if (size >= minsize) { + nextchild = fourheap[++candidate]; + if (best.compareTo(nextchild) > 0) { + bestchild = candidate; + best = nextchild; + } + + if (size > minsize) { + nextchild = fourheap[++candidate]; + if (best.compareTo(nextchild) > 0) { + bestchild = candidate; + best = nextchild; + } + } + } + } + if (cur.compareTo(best) <= 0) { + break; + } + fourheap[fourpos] = best; + fourpos = bestchild; + } + fourheap[fourpos] = cur; + } + + @Override + @SuppressWarnings("unchecked") + public K peek() { + return (K)twoheap[0]; + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append(ComparableMinHeap.class.getSimpleName()).append(" ["); + for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) { + buf.append(iter.get()).append(','); + } + buf.append(']'); + return buf.toString(); + } + + @Override + public UnsortedIter unsortedIter() { + return new UnsortedIter(); + } + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * Use this class as follows: + * + * <pre> + * {@code + * for (ObjectHeap.UnsortedIter<K> iter = heap.unsortedIter(); iter.valid(); iter.next()) { + * doSomething(iter.get()); + * } + * } + * </pre> + * + * @author Erich Schubert + */ + private class UnsortedIter implements ObjectHeap.UnsortedIter<K> { + /** + * Iterator position. + */ + protected int pos = 0; + + /** + * Modification counter we were initialized at. + */ + protected final int myModCount = modCount; + + @Override + public boolean valid() { + if (modCount != myModCount) { + throw new ConcurrentModificationException(); + } + return pos < size; + } + + @Override + public void advance() { + pos++; + } + + @SuppressWarnings("unchecked") + + @Override + public K get() { + return (K)((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + } } } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparatorMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparatorMaxHeap.java new file mode 100644 index 00000000..7b660d31 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparatorMaxHeap.java @@ -0,0 +1,440 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Arrays; +import java.util.ConcurrentModificationException; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + +/** + * Advanced priority queue class, based on a binary heap (for small sizes), + * which will for larger heaps be accompanied by a 4-ary heap (attached below + * the root of the two-ary heap, making the root actually 3-ary). + * + * This code was automatically instantiated for the type: Comparator + * + * This combination was found to work quite well in benchmarks, but YMMV. + * + * Some other observations from benchmarking: + * <ul> + * <li>Bulk loading did not improve things</li> + * <li>Primitive heaps are substantially faster.</li> + * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and + * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> + * <li>Workload makes a huge difference. A load-once, poll-until-empty priority + * queue is something different than e.g. a top-k heap, which will see a lot of + * top element replacements.</li> + * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for + * top-k make a difference.</li> + * <li>Different day, different benchmark results ...</li> + * </ul> + * + * @author Erich Schubert + * + * @apiviz.has UnsortedIter + * @param <K> Key type + */ +public class ComparatorMaxHeap<K> implements ObjectHeap<K> { + /** + * Base heap. + */ + protected Object[] twoheap; + + /** + * Extension heap. + */ + protected Object[] fourheap; + + /** + * Current size of heap. + */ + protected int size; + + /** + * (Structural) modification counter. Used to invalidate iterators. + */ + protected int modCount = 0; + + /** + * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. + */ + private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; + + /** + * Initial size of the 2-ary heap. + */ + private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; + + /** + * Initial size of 4-ary heap when initialized. + * + * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 + * + * 85 = 4-ary heap of height 3: 21 + 4*4*4 + * + * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 + * + * Since we last grew by 255 (to 511), let's use 341. + */ + private final static int FOUR_HEAP_INITIAL_SIZE = 341; + + + /** + * Comparator + */ + protected java.util.Comparator<Object> comparator; + + /** + * Constructor, with default size. + * @param comparator Comparator + */ + @SuppressWarnings("unchecked") + public ComparatorMaxHeap(java.util.Comparator<? super K> comparator) { + super(); + this.comparator = (java.util.Comparator<Object>) java.util.Comparator.class.cast(comparator); + Object[] twoheap = new Object[TWO_HEAP_INITIAL_SIZE]; + + this.twoheap = twoheap; + this.fourheap = null; + this.size = 0; + this.modCount = 0; + } + + /** + * Constructor, with given minimum size. + * + * @param minsize Minimum size + * @param comparator Comparator + */ + @SuppressWarnings("unchecked") + public ComparatorMaxHeap(int minsize, java.util.Comparator<? super K> comparator) { + super(); + this.comparator = (java.util.Comparator<Object>) java.util.Comparator.class.cast(comparator); + if (minsize < TWO_HEAP_MAX_SIZE) { + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + Object[] twoheap = new Object[size]; + + this.twoheap = twoheap; + this.fourheap = null; + } else { + Object[] twoheap = new Object[TWO_HEAP_INITIAL_SIZE]; + Object[] fourheap = new Object[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + this.twoheap = twoheap; + this.fourheap = fourheap; + } + this.size = 0; + this.modCount = 0; + } + + @Override + public void clear() { + size = 0; + ++modCount; + fourheap = null; + Arrays.fill(twoheap, null); + } + + @Override + public int size() { + return size; + } + + @Override + public boolean isEmpty() { + return (size == 0); + } + + @Override + public void add(K o) { + final Object co = o; + // System.err.println("Add: " + o); + if (size < TWO_HEAP_MAX_SIZE) { + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); + } + final int twopos = size; + twoheap[twopos] = co; + ++size; + heapifyUp2(twopos, co); + ++modCount; + } else { + final int fourpos = size - TWO_HEAP_MAX_SIZE; + if (fourheap == null) { + fourheap = new Object[FOUR_HEAP_INITIAL_SIZE]; + } else if (fourpos >= fourheap.length) { + // Grow extension heap by half. + fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); + } + fourheap[fourpos] = co; + ++size; + heapifyUp4(fourpos, co); + ++modCount; + } + } + + @Override + public void add(K key, int max) { + if (size < max) { + add(key); + } else if (comparator.compare(twoheap[0], key) >= 0) { + replaceTopElement(key); + } + } + + @Override + @SuppressWarnings("unchecked") + public K replaceTopElement(K reinsert) { + final Object ret = twoheap[0]; + heapifyDown( reinsert); + ++modCount; + return (K)ret; + } + + /** + * Heapify-Up method for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + */ + private void heapifyUp2(int twopos, Object cur) { + while (twopos > 0) { + final int parent = (twopos - 1) >>> 1; + Object par = twoheap[parent]; + if (comparator.compare(cur, par) <= 0) { + break; + } + twoheap[twopos] = par; + twopos = parent; + } + twoheap[twopos] = cur; + } + + /** + * Heapify-Up method for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + */ + private void heapifyUp4(int fourpos, Object cur) { + while (fourpos > 0) { + final int parent = (fourpos - 1) >> 2; + Object par = fourheap[parent]; + if (comparator.compare(cur, par) <= 0) { + break; + } + fourheap[fourpos] = par; + fourpos = parent; + } + if (fourpos == 0 && comparator.compare(twoheap[0], cur) < 0) { + fourheap[0] = twoheap[0]; + twoheap[0] = cur; + } else { + fourheap[fourpos] = cur; + } + } + + @Override + @SuppressWarnings("unchecked") + public K poll() { + final Object ret = twoheap[0]; + --size; + // Replacement object: + if (size >= TWO_HEAP_MAX_SIZE) { + final int last = size - TWO_HEAP_MAX_SIZE; + final Object reinsert = fourheap[last]; + fourheap[last] = null; + heapifyDown(reinsert); + } else if (size > 0) { + final Object reinsert = twoheap[size]; + twoheap[size] = null; + heapifyDown(reinsert); + } else { + twoheap[0] = null; + } + ++modCount; + return (K)ret; + } + + /** + * Invoke heapify-down for the root object. + * + * @param reinsert Object to insert. + */ + private void heapifyDown(Object reinsert) { + if (size > TWO_HEAP_MAX_SIZE) { + // Special case: 3-ary situation. + final int best = (comparator.compare(twoheap[1], twoheap[2]) >= 0) ? 1 : 2; + if (comparator.compare(fourheap[0], twoheap[best]) > 0) { + twoheap[0] = fourheap[0]; + heapifyDown4(0, reinsert); + } else { + twoheap[0] = twoheap[best]; + heapifyDown2(best, reinsert); + } + return; + } + heapifyDown2(0, reinsert); + } + + /** + * Heapify-Down for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + */ + private void heapifyDown2(int twopos, Object cur) { + final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + while (twopos < stop) { + int bestchild = (twopos << 1) + 1; + Object best = twoheap[bestchild]; + final int right = bestchild + 1; + if (right < size && comparator.compare(best, twoheap[right]) < 0) { + bestchild = right; + best = twoheap[right]; + } + if (comparator.compare(cur, best) >= 0) { + break; + } + twoheap[twopos] = best; + twopos = bestchild; + } + twoheap[twopos] = cur; + } + + /** + * Heapify-Down for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + */ + private void heapifyDown4(int fourpos, Object cur) { + final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; + while (fourpos < stop) { + final int child = (fourpos << 2) + 1; + Object best = fourheap[child]; + int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; + if (size > minsize) { + Object nextchild = fourheap[candidate]; + if (comparator.compare(best, nextchild) < 0) { + bestchild = candidate; + best = nextchild; + } + + minsize += 2; + if (size >= minsize) { + nextchild = fourheap[++candidate]; + if (comparator.compare(best, nextchild) < 0) { + bestchild = candidate; + best = nextchild; + } + + if (size > minsize) { + nextchild = fourheap[++candidate]; + if (comparator.compare(best, nextchild) < 0) { + bestchild = candidate; + best = nextchild; + } + } + } + } + if (comparator.compare(cur, best) >= 0) { + break; + } + fourheap[fourpos] = best; + fourpos = bestchild; + } + fourheap[fourpos] = cur; + } + + @Override + @SuppressWarnings("unchecked") + public K peek() { + return (K)twoheap[0]; + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append(ComparatorMaxHeap.class.getSimpleName()).append(" ["); + for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) { + buf.append(iter.get()).append(','); + } + buf.append(']'); + return buf.toString(); + } + + @Override + public UnsortedIter unsortedIter() { + return new UnsortedIter(); + } + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * Use this class as follows: + * + * <pre> + * {@code + * for (ObjectHeap.UnsortedIter<K> iter = heap.unsortedIter(); iter.valid(); iter.next()) { + * doSomething(iter.get()); + * } + * } + * </pre> + * + * @author Erich Schubert + */ + private class UnsortedIter implements ObjectHeap.UnsortedIter<K> { + /** + * Iterator position. + */ + protected int pos = 0; + + /** + * Modification counter we were initialized at. + */ + protected final int myModCount = modCount; + + @Override + public boolean valid() { + if (modCount != myModCount) { + throw new ConcurrentModificationException(); + } + return pos < size; + } + + @Override + public void advance() { + pos++; + } + + @SuppressWarnings("unchecked") + + @Override + public K get() { + return (K)((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparatorMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparatorMinHeap.java new file mode 100644 index 00000000..e12c5f64 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparatorMinHeap.java @@ -0,0 +1,440 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Arrays; +import java.util.ConcurrentModificationException; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + +/** + * Advanced priority queue class, based on a binary heap (for small sizes), + * which will for larger heaps be accompanied by a 4-ary heap (attached below + * the root of the two-ary heap, making the root actually 3-ary). + * + * This code was automatically instantiated for the type: Comparator + * + * This combination was found to work quite well in benchmarks, but YMMV. + * + * Some other observations from benchmarking: + * <ul> + * <li>Bulk loading did not improve things</li> + * <li>Primitive heaps are substantially faster.</li> + * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and + * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> + * <li>Workload makes a huge difference. A load-once, poll-until-empty priority + * queue is something different than e.g. a top-k heap, which will see a lot of + * top element replacements.</li> + * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for + * top-k make a difference.</li> + * <li>Different day, different benchmark results ...</li> + * </ul> + * + * @author Erich Schubert + * + * @apiviz.has UnsortedIter + * @param <K> Key type + */ +public class ComparatorMinHeap<K> implements ObjectHeap<K> { + /** + * Base heap. + */ + protected Object[] twoheap; + + /** + * Extension heap. + */ + protected Object[] fourheap; + + /** + * Current size of heap. + */ + protected int size; + + /** + * (Structural) modification counter. Used to invalidate iterators. + */ + protected int modCount = 0; + + /** + * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. + */ + private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; + + /** + * Initial size of the 2-ary heap. + */ + private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; + + /** + * Initial size of 4-ary heap when initialized. + * + * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 + * + * 85 = 4-ary heap of height 3: 21 + 4*4*4 + * + * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 + * + * Since we last grew by 255 (to 511), let's use 341. + */ + private final static int FOUR_HEAP_INITIAL_SIZE = 341; + + + /** + * Comparator + */ + protected java.util.Comparator<Object> comparator; + + /** + * Constructor, with default size. + * @param comparator Comparator + */ + @SuppressWarnings("unchecked") + public ComparatorMinHeap(java.util.Comparator<? super K> comparator) { + super(); + this.comparator = (java.util.Comparator<Object>) java.util.Comparator.class.cast(comparator); + Object[] twoheap = new Object[TWO_HEAP_INITIAL_SIZE]; + + this.twoheap = twoheap; + this.fourheap = null; + this.size = 0; + this.modCount = 0; + } + + /** + * Constructor, with given minimum size. + * + * @param minsize Minimum size + * @param comparator Comparator + */ + @SuppressWarnings("unchecked") + public ComparatorMinHeap(int minsize, java.util.Comparator<? super K> comparator) { + super(); + this.comparator = (java.util.Comparator<Object>) java.util.Comparator.class.cast(comparator); + if (minsize < TWO_HEAP_MAX_SIZE) { + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + Object[] twoheap = new Object[size]; + + this.twoheap = twoheap; + this.fourheap = null; + } else { + Object[] twoheap = new Object[TWO_HEAP_INITIAL_SIZE]; + Object[] fourheap = new Object[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + this.twoheap = twoheap; + this.fourheap = fourheap; + } + this.size = 0; + this.modCount = 0; + } + + @Override + public void clear() { + size = 0; + ++modCount; + fourheap = null; + Arrays.fill(twoheap, null); + } + + @Override + public int size() { + return size; + } + + @Override + public boolean isEmpty() { + return (size == 0); + } + + @Override + public void add(K o) { + final Object co = o; + // System.err.println("Add: " + o); + if (size < TWO_HEAP_MAX_SIZE) { + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); + } + final int twopos = size; + twoheap[twopos] = co; + ++size; + heapifyUp2(twopos, co); + ++modCount; + } else { + final int fourpos = size - TWO_HEAP_MAX_SIZE; + if (fourheap == null) { + fourheap = new Object[FOUR_HEAP_INITIAL_SIZE]; + } else if (fourpos >= fourheap.length) { + // Grow extension heap by half. + fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); + } + fourheap[fourpos] = co; + ++size; + heapifyUp4(fourpos, co); + ++modCount; + } + } + + @Override + public void add(K key, int max) { + if (size < max) { + add(key); + } else if (comparator.compare(twoheap[0], key) <= 0) { + replaceTopElement(key); + } + } + + @Override + @SuppressWarnings("unchecked") + public K replaceTopElement(K reinsert) { + final Object ret = twoheap[0]; + heapifyDown( reinsert); + ++modCount; + return (K)ret; + } + + /** + * Heapify-Up method for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + */ + private void heapifyUp2(int twopos, Object cur) { + while (twopos > 0) { + final int parent = (twopos - 1) >>> 1; + Object par = twoheap[parent]; + if (comparator.compare(cur, par) >= 0) { + break; + } + twoheap[twopos] = par; + twopos = parent; + } + twoheap[twopos] = cur; + } + + /** + * Heapify-Up method for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + */ + private void heapifyUp4(int fourpos, Object cur) { + while (fourpos > 0) { + final int parent = (fourpos - 1) >> 2; + Object par = fourheap[parent]; + if (comparator.compare(cur, par) >= 0) { + break; + } + fourheap[fourpos] = par; + fourpos = parent; + } + if (fourpos == 0 && comparator.compare(twoheap[0], cur) > 0) { + fourheap[0] = twoheap[0]; + twoheap[0] = cur; + } else { + fourheap[fourpos] = cur; + } + } + + @Override + @SuppressWarnings("unchecked") + public K poll() { + final Object ret = twoheap[0]; + --size; + // Replacement object: + if (size >= TWO_HEAP_MAX_SIZE) { + final int last = size - TWO_HEAP_MAX_SIZE; + final Object reinsert = fourheap[last]; + fourheap[last] = null; + heapifyDown(reinsert); + } else if (size > 0) { + final Object reinsert = twoheap[size]; + twoheap[size] = null; + heapifyDown(reinsert); + } else { + twoheap[0] = null; + } + ++modCount; + return (K)ret; + } + + /** + * Invoke heapify-down for the root object. + * + * @param reinsert Object to insert. + */ + private void heapifyDown(Object reinsert) { + if (size > TWO_HEAP_MAX_SIZE) { + // Special case: 3-ary situation. + final int best = (comparator.compare(twoheap[1], twoheap[2]) <= 0) ? 1 : 2; + if (comparator.compare(fourheap[0], twoheap[best]) < 0) { + twoheap[0] = fourheap[0]; + heapifyDown4(0, reinsert); + } else { + twoheap[0] = twoheap[best]; + heapifyDown2(best, reinsert); + } + return; + } + heapifyDown2(0, reinsert); + } + + /** + * Heapify-Down for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + */ + private void heapifyDown2(int twopos, Object cur) { + final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + while (twopos < stop) { + int bestchild = (twopos << 1) + 1; + Object best = twoheap[bestchild]; + final int right = bestchild + 1; + if (right < size && comparator.compare(best, twoheap[right]) > 0) { + bestchild = right; + best = twoheap[right]; + } + if (comparator.compare(cur, best) <= 0) { + break; + } + twoheap[twopos] = best; + twopos = bestchild; + } + twoheap[twopos] = cur; + } + + /** + * Heapify-Down for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + */ + private void heapifyDown4(int fourpos, Object cur) { + final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; + while (fourpos < stop) { + final int child = (fourpos << 2) + 1; + Object best = fourheap[child]; + int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; + if (size > minsize) { + Object nextchild = fourheap[candidate]; + if (comparator.compare(best, nextchild) > 0) { + bestchild = candidate; + best = nextchild; + } + + minsize += 2; + if (size >= minsize) { + nextchild = fourheap[++candidate]; + if (comparator.compare(best, nextchild) > 0) { + bestchild = candidate; + best = nextchild; + } + + if (size > minsize) { + nextchild = fourheap[++candidate]; + if (comparator.compare(best, nextchild) > 0) { + bestchild = candidate; + best = nextchild; + } + } + } + } + if (comparator.compare(cur, best) <= 0) { + break; + } + fourheap[fourpos] = best; + fourpos = bestchild; + } + fourheap[fourpos] = cur; + } + + @Override + @SuppressWarnings("unchecked") + public K peek() { + return (K)twoheap[0]; + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append(ComparatorMinHeap.class.getSimpleName()).append(" ["); + for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) { + buf.append(iter.get()).append(','); + } + buf.append(']'); + return buf.toString(); + } + + @Override + public UnsortedIter unsortedIter() { + return new UnsortedIter(); + } + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * Use this class as follows: + * + * <pre> + * {@code + * for (ObjectHeap.UnsortedIter<K> iter = heap.unsortedIter(); iter.valid(); iter.next()) { + * doSomething(iter.get()); + * } + * } + * </pre> + * + * @author Erich Schubert + */ + private class UnsortedIter implements ObjectHeap.UnsortedIter<K> { + /** + * Iterator position. + */ + protected int pos = 0; + + /** + * Modification counter we were initialized at. + */ + protected final int myModCount = modCount; + + @Override + public boolean valid() { + if (modCount != myModCount) { + throw new ConcurrentModificationException(); + } + return pos < size; + } + + @Override + public void advance() { + pos++; + } + + @SuppressWarnings("unchecked") + + @Override + public K get() { + return (K)((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleHeap.java index f9f928bd..acf77d86 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleHeap.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,53 +23,22 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; along with this program. If not, see <http://www.gnu.org/licenses/>. */ -import java.util.Arrays; - -import de.lmu.ifi.dbs.elki.math.MathUtil; +import de.lmu.ifi.dbs.elki.utilities.datastructures.iterator.Iter; /** - * Basic in-memory heap structure. - * - * This heap is built lazily: if you first add many elements, then poll the - * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log - * n). This is implemented via a simple validTo counter. + * Basic in-memory heap for double values. * * @author Erich Schubert + * + * @apiviz.has UnsortedIter */ -public abstract class DoubleHeap extends AbstractHeap { - /** - * Heap storage: queue - */ - protected transient double[] queue; - - /** - * Constructor with initial capacity. - * - * @param size initial capacity - */ - public DoubleHeap(int size) { - super(); - this.size = 0; - this.queue = new double[size]; - } - +public interface DoubleHeap { /** * Add a key-value pair to the heap * * @param key Key */ - public void add(double key) { - // resize when needed - if (size + 1 > queue.length) { - resize(size + 1); - } - // final int pos = size; - this.queue[size] = key; - this.size += 1; - heapifyUp(size - 1, key); - validSize += 1; - heapModified(); - } + void add(double key); /** * Add a key-value pair to the heap, except if the new element is larger than @@ -78,13 +47,7 @@ public abstract class DoubleHeap extends AbstractHeap { * @param key Key * @param max Maximum size of heap */ - public void add(double key, int max) { - if (size < max) { - add(key); - } else if (comp(key, peek())) { - replaceTopElement(key); - } - } + void add(double key, int max); /** * Combined operation that removes the top element, and inserts a new element @@ -93,172 +56,67 @@ public abstract class DoubleHeap extends AbstractHeap { * @param e New element to insert * @return Previous top element of the heap */ - public double replaceTopElement(double e) { - ensureValid(); - double oldroot = queue[0]; - heapifyDown(0, e); - heapModified(); - return oldroot; - } + double replaceTopElement(double e); /** * Get the current top key * * @return Top key */ - public double peek() { - if (size == 0) { - throw new ArrayIndexOutOfBoundsException("Peek() on an empty heap!"); - } - ensureValid(); - return queue[0]; - } + double peek(); /** * Remove the first element * * @return Top element */ - public double poll() { - return removeAt(0); - } + double poll(); /** - * Repair the heap + * Delete all elements from the heap. */ - protected void ensureValid() { - if (validSize != size) { - if (size > 1) { - // Parent of first invalid - int nextmin = validSize > 0 ? ((validSize - 1) >>> 1) : 0; - int curmin = MathUtil.nextAllOnesInt(nextmin); // Next line - int nextmax = curmin - 1; // End of valid line - int pos = (size - 2) >>> 1; // Parent of last element - // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin+", "+nextmin); - while (pos >= nextmin) { - // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin); - while (pos >= curmin) { - if (!heapifyDown(pos, queue[pos])) { - final int parent = (pos - 1) >>> 1; - if (parent < curmin) { - nextmin = Math.min(nextmin, parent); - nextmax = Math.max(nextmax, parent); - } - } - pos--; - } - curmin = nextmin; - pos = Math.min(pos, nextmax); - nextmax = -1; - } - } - validSize = size; - } - } + void clear(); /** - * Remove the element at the given position. + * Query the size * - * @param pos Element position. - * @return Removed element + * @return Size */ - protected double removeAt(int pos) { - if (pos < 0 || pos >= size) { - return 0.0; - } - final double top = queue[0]; - // Replacement object: - final double reinkey = queue[size - 1]; - // Keep heap in sync - if (validSize == size) { - size -= 1; - validSize -= 1; - heapifyDown(pos, reinkey); - } else { - size -= 1; - validSize = Math.min(pos >>> 1, validSize); - queue[pos] = reinkey; - } - heapModified(); - return top; - } - + public int size(); + /** - * Execute a "Heapify Upwards" aka "SiftUp". Used in insertions. + * Is the heap empty? * - * @param pos insertion position - * @param curkey Current key + * @return {@code true} when the size is 0. */ - protected void heapifyUp(int pos, double curkey) { - while (pos > 0) { - final int parent = (pos - 1) >>> 1; - double parkey = queue[parent]; - - if (comp(curkey, parkey)) { // Compare - break; - } - queue[pos] = parkey; - pos = parent; - } - queue[pos] = curkey; - } + public boolean isEmpty(); /** - * Execute a "Heapify Downwards" aka "SiftDown". Used in deletions. + * Get an unsorted iterator to inspect the heap. * - * @param ipos re-insertion position - * @param curkey Current key - * @return true when the order was changed + * @return Iterator */ - protected boolean heapifyDown(final int ipos, double curkey) { - int pos = ipos; - final int half = size >>> 1; - while (pos < half) { - // Get left child (must exist!) - int cpos = (pos << 1) + 1; - double chikey = queue[cpos]; - // Test right child, if present - final int rchild = cpos + 1; - if (rchild < size) { - double right = queue[rchild]; - if (comp(chikey, right)) { // Compare - cpos = rchild; - chikey = right; - } - } - - if (comp(chikey, curkey)) { // Compare - break; - } - queue[pos] = chikey; - pos = cpos; - } - queue[pos] = curkey; - return (pos == ipos); - } + UnsortedIter unsortedIter(); /** - * Test whether we need to resize to have the requested capacity. + * Unsorted iterator - in heap order. Does not poll the heap. * - * @param requiredSize required capacity - */ - protected final void resize(int requiredSize) { - queue = Arrays.copyOf(queue, desiredSize(requiredSize, queue.length)); - } - - /** - * Delete all elements from the heap. + * <pre> + * {@code + * for (DoubleHeap.UnsortedIter iter = heap.unsortedIter(); iter.valid(); iter.next()) { + * doSomething(iter.get()); + * } + * } + * </pre> + * + * @author Erich Schubert */ - @Override - public void clear() { - super.clear(); - for (int i = 0; i < size; i++) { - queue[i] = 0.0; - } + public static interface UnsortedIter extends Iter { + /** + * Get the iterators current object. + * + * @return Current object + */ + double get(); } - - /** - * Compare two objects - */ - abstract protected boolean comp(double o1, double o2); } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerHeap.java new file mode 100644 index 00000000..c3bf85f4 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerHeap.java @@ -0,0 +1,127 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.utilities.datastructures.iterator.Iter; + +/** + * Basic in-memory heap interface, for double keys and int values. + * + * @author Erich Schubert + * + * @apiviz.has UnsortedIter + */ +public interface DoubleIntegerHeap { + /** + * Add a key-value pair to the heap + * + * @param key Key + * @param val Value + */ + void add(double key, int val); + + /** + * Add a key-value pair to the heap if it improves the top. + * + * @param key Key + * @param val Value + * @param k Desired maximum size + */ + void add(double key, int val, int k); + + /** + * Combined operation that removes the top element, and inserts a new element + * instead. + * + * @param key Key of new element + * @param val Value of new element + */ + void replaceTopElement(double key, int val); + + /** + * Get the current top key + * + * @return Top key + */ + double peekKey(); + + /** + * Get the current top value + * + * @return Value + */ + int peekValue(); + + /** + * Remove the first element + */ + void poll(); + + /** + * Clear the heap contents. + */ + void clear(); + + /** + * Query the size + * + * @return Size + */ + public int size(); + + /** + * Is the heap empty? + * + * @return {@code true} when the size is 0. + */ + public boolean isEmpty(); + + /** + * Get an unsorted iterator to inspect the heap. + * + * @return Iterator + */ + UnsortedIter unsortedIter(); + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * @author Erich Schubert + */ + public static interface UnsortedIter extends Iter { + /** + * Get the current key + * + * @return Current key + */ + double getKey(); + + /** + * Get the current value + * + * @return Current value + */ + int getValue(); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerMaxHeap.java new file mode 100644 index 00000000..34f1e889 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerMaxHeap.java @@ -0,0 +1,478 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Arrays; +import java.util.ConcurrentModificationException; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + +/** + * Advanced priority queue class, based on a binary heap (for small sizes), + * which will for larger heaps be accompanied by a 4-ary heap (attached below + * the root of the two-ary heap, making the root actually 3-ary). + * + * This code was automatically instantiated for the types: Double and Integer + * + * This combination was found to work quite well in benchmarks, but YMMV. + * + * Some other observations from benchmarking: + * <ul> + * <li>Bulk loading did not improve things</li> + * <li>Primitive heaps are substantially faster.</li> + * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and + * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> + * <li>Workload makes a huge difference. A load-once, poll-until-empty priority + * queue is something different than e.g. a top-k heap, which will see a lot of + * top element replacements.</li> + * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for + * top-k make a difference.</li> + * <li>Different day, different benchmark results ...</li> + * </ul> + * + * @author Erich Schubert + * + * @apiviz.has UnsortedIter + */ +public class DoubleIntegerMaxHeap implements DoubleIntegerHeap { + /** + * Base heap. + */ + protected double[] twoheap; + + /** + * Base heap values. + */ + protected int[] twovals; + + /** + * Extension heap. + */ + protected double[] fourheap; + + /** + * Extension heapvalues. + */ + protected int[] fourvals; + + /** + * Current size of heap. + */ + protected int size; + + /** + * (Structural) modification counter. Used to invalidate iterators. + */ + protected int modCount = 0; + + /** + * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. + */ + private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; + + /** + * Initial size of the 2-ary heap. + */ + private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; + + /** + * Initial size of 4-ary heap when initialized. + * + * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 + * + * 85 = 4-ary heap of height 3: 21 + 4*4*4 + * + * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 + * + * Since we last grew by 255 (to 511), let's use 341. + */ + private final static int FOUR_HEAP_INITIAL_SIZE = 341; + + /** + * Constructor, with default size. + */ + public DoubleIntegerMaxHeap() { + super(); + double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE]; + int[] twovals = new int[TWO_HEAP_INITIAL_SIZE]; + + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = null; + this.fourvals = null; + this.size = 0; + this.modCount = 0; + } + + /** + * Constructor, with given minimum size. + * + * @param minsize Minimum size + */ + public DoubleIntegerMaxHeap(int minsize) { + super(); + if (minsize < TWO_HEAP_MAX_SIZE) { + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + double[] twoheap = new double[size]; + int[] twovals = new int[size]; + + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = null; + this.fourvals = null; + } else { + double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE]; + int[] twovals = new int[TWO_HEAP_INITIAL_SIZE]; + double[] fourheap = new double[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + int[] fourvals = new int[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = fourheap; + this.fourvals = fourvals; + } + this.size = 0; + this.modCount = 0; + } + + @Override + public void clear() { + size = 0; + ++modCount; + fourheap = null; + fourvals = null; + Arrays.fill(twoheap, 0.0); + Arrays.fill(twovals, 0); + } + + @Override + public int size() { + return size; + } + + @Override + public boolean isEmpty() { + return (size == 0); + } + + @Override + public void add(double o, int v) { + final double co = o; + final int cv = v; + // System.err.println("Add: " + o); + if (size < TWO_HEAP_MAX_SIZE) { + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); + twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1); + } + final int twopos = size; + twoheap[twopos] = co; + twovals[twopos] = cv; + ++size; + heapifyUp2(twopos, co, cv); + ++modCount; + } else { + final int fourpos = size - TWO_HEAP_MAX_SIZE; + if (fourheap == null) { + fourheap = new double[FOUR_HEAP_INITIAL_SIZE]; + fourvals = new int[FOUR_HEAP_INITIAL_SIZE]; + } else if (fourpos >= fourheap.length) { + // Grow extension heap by half. + fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); + fourvals = Arrays.copyOf(fourvals, fourvals.length + (fourvals.length >> 1)); + } + fourheap[fourpos] = co; + fourvals[fourpos] = cv; + ++size; + heapifyUp4(fourpos, co, cv); + ++modCount; + } + } + + @Override + public void add(double key, int val, int max) { + if (size < max) { + add(key, val); + } else if (twoheap[0] >= key) { + replaceTopElement(key, val); + } + } + + @Override + public void replaceTopElement(double reinsert, int val) { + heapifyDown(reinsert, val); + ++modCount; + } + + /** + * Heapify-Up method for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + * @param val Current value + */ + private void heapifyUp2(int twopos, double cur, int val) { + while (twopos > 0) { + final int parent = (twopos - 1) >>> 1; + double par = twoheap[parent]; + if (cur <= par) { + break; + } + twoheap[twopos] = par; + twovals[twopos] = twovals[parent]; + twopos = parent; + } + twoheap[twopos] = cur; + twovals[twopos] = val; + } + + /** + * Heapify-Up method for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + * @param val Current value + */ + private void heapifyUp4(int fourpos, double cur, int val) { + while (fourpos > 0) { + final int parent = (fourpos - 1) >> 2; + double par = fourheap[parent]; + if (cur <= par) { + break; + } + fourheap[fourpos] = par; + fourvals[fourpos] = fourvals[parent]; + fourpos = parent; + } + if (fourpos == 0 && twoheap[0] < cur) { + fourheap[0] = twoheap[0]; + fourvals[0] = twovals[0]; + twoheap[0] = cur; + twovals[0] = val; + } else { + fourheap[fourpos] = cur; + fourvals[fourpos] = val; + } + } + + @Override + public void poll() { + --size; + // Replacement object: + if (size >= TWO_HEAP_MAX_SIZE) { + final int last = size - TWO_HEAP_MAX_SIZE; + final double reinsert = fourheap[last]; + final int reinsertv = fourvals[last]; + fourheap[last] = 0.0; + fourvals[last] = 0; + heapifyDown(reinsert, reinsertv); + } else if (size > 0) { + final double reinsert = twoheap[size]; + final int reinsertv = twovals[size]; + twoheap[size] = 0.0; + twovals[size] = 0; + heapifyDown(reinsert, reinsertv); + } else { + twoheap[0] = 0.0; + twovals[0] = 0; + } + ++modCount; + } + + /** + * Invoke heapify-down for the root object. + * + * @param reinsert Object to insert. + * @param val Value to reinsert. + */ + private void heapifyDown(double reinsert, int val) { + if (size > TWO_HEAP_MAX_SIZE) { + // Special case: 3-ary situation. + final int best = (twoheap[1] >= twoheap[2]) ? 1 : 2; + if (fourheap[0] > twoheap[best]) { + twoheap[0] = fourheap[0]; + twovals[0] = fourvals[0]; + heapifyDown4(0, reinsert, val); + } else { + twoheap[0] = twoheap[best]; + twovals[0] = twovals[best]; + heapifyDown2(best, reinsert, val); + } + return; + } + heapifyDown2(0, reinsert, val); + } + + /** + * Heapify-Down for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + * @param val Value to reinsert. + */ + private void heapifyDown2(int twopos, double cur, int val) { + final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + while (twopos < stop) { + int bestchild = (twopos << 1) + 1; + double best = twoheap[bestchild]; + final int right = bestchild + 1; + if (right < size && best < twoheap[right]) { + bestchild = right; + best = twoheap[right]; + } + if (cur >= best) { + break; + } + twoheap[twopos] = best; + twovals[twopos] = twovals[bestchild]; + twopos = bestchild; + } + twoheap[twopos] = cur; + twovals[twopos] = val; + } + + /** + * Heapify-Down for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + * @param val Value to reinsert. + */ + private void heapifyDown4(int fourpos, double cur, int val) { + final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; + while (fourpos < stop) { + final int child = (fourpos << 2) + 1; + double best = fourheap[child]; + int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; + if (size > minsize) { + double nextchild = fourheap[candidate]; + if (best < nextchild) { + bestchild = candidate; + best = nextchild; + } + + minsize += 2; + if (size >= minsize) { + nextchild = fourheap[++candidate]; + if (best < nextchild) { + bestchild = candidate; + best = nextchild; + } + + if (size > minsize) { + nextchild = fourheap[++candidate]; + if (best < nextchild) { + bestchild = candidate; + best = nextchild; + } + } + } + } + if (cur >= best) { + break; + } + fourheap[fourpos] = best; + fourvals[fourpos] = fourvals[bestchild]; + fourpos = bestchild; + } + fourheap[fourpos] = cur; + fourvals[fourpos] = val; + } + + @Override + public double peekKey() { + return twoheap[0]; + } + + @Override + public int peekValue() { + return twovals[0]; + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append(DoubleIntegerMaxHeap.class.getSimpleName()).append(" ["); + for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) { + buf.append(iter.getKey()).append(':').append(iter.getValue()).append(','); + } + buf.append(']'); + return buf.toString(); + } + + @Override + public UnsortedIter unsortedIter() { + return new UnsortedIter(); + } + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * Use this class as follows: + * + * <pre> + * {@code + * for (DoubleIntegerHeap.UnsortedIter iter = heap.unsortedIter(); iter.valid(); iter.next()) { + * doSomething(iter.get()); + * } + * } + * </pre> + * + * @author Erich Schubert + */ + private class UnsortedIter implements DoubleIntegerHeap.UnsortedIter { + /** + * Iterator position. + */ + protected int pos = 0; + + /** + * Modification counter we were initialized at. + */ + protected final int myModCount = modCount; + + @Override + public boolean valid() { + if (modCount != myModCount) { + throw new ConcurrentModificationException(); + } + return pos < size; + } + + @Override + public void advance() { + pos++; + } + + @Override + public double getKey() { + return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + } + + @Override + public int getValue() { + return ((pos < TWO_HEAP_MAX_SIZE) ? twovals[pos] : fourvals[pos - TWO_HEAP_MAX_SIZE]); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerMinHeap.java new file mode 100644 index 00000000..ca6192ad --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerMinHeap.java @@ -0,0 +1,478 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Arrays; +import java.util.ConcurrentModificationException; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + +/** + * Advanced priority queue class, based on a binary heap (for small sizes), + * which will for larger heaps be accompanied by a 4-ary heap (attached below + * the root of the two-ary heap, making the root actually 3-ary). + * + * This code was automatically instantiated for the types: Double and Integer + * + * This combination was found to work quite well in benchmarks, but YMMV. + * + * Some other observations from benchmarking: + * <ul> + * <li>Bulk loading did not improve things</li> + * <li>Primitive heaps are substantially faster.</li> + * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and + * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> + * <li>Workload makes a huge difference. A load-once, poll-until-empty priority + * queue is something different than e.g. a top-k heap, which will see a lot of + * top element replacements.</li> + * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for + * top-k make a difference.</li> + * <li>Different day, different benchmark results ...</li> + * </ul> + * + * @author Erich Schubert + * + * @apiviz.has UnsortedIter + */ +public class DoubleIntegerMinHeap implements DoubleIntegerHeap { + /** + * Base heap. + */ + protected double[] twoheap; + + /** + * Base heap values. + */ + protected int[] twovals; + + /** + * Extension heap. + */ + protected double[] fourheap; + + /** + * Extension heapvalues. + */ + protected int[] fourvals; + + /** + * Current size of heap. + */ + protected int size; + + /** + * (Structural) modification counter. Used to invalidate iterators. + */ + protected int modCount = 0; + + /** + * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. + */ + private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; + + /** + * Initial size of the 2-ary heap. + */ + private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; + + /** + * Initial size of 4-ary heap when initialized. + * + * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 + * + * 85 = 4-ary heap of height 3: 21 + 4*4*4 + * + * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 + * + * Since we last grew by 255 (to 511), let's use 341. + */ + private final static int FOUR_HEAP_INITIAL_SIZE = 341; + + /** + * Constructor, with default size. + */ + public DoubleIntegerMinHeap() { + super(); + double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE]; + int[] twovals = new int[TWO_HEAP_INITIAL_SIZE]; + + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = null; + this.fourvals = null; + this.size = 0; + this.modCount = 0; + } + + /** + * Constructor, with given minimum size. + * + * @param minsize Minimum size + */ + public DoubleIntegerMinHeap(int minsize) { + super(); + if (minsize < TWO_HEAP_MAX_SIZE) { + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + double[] twoheap = new double[size]; + int[] twovals = new int[size]; + + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = null; + this.fourvals = null; + } else { + double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE]; + int[] twovals = new int[TWO_HEAP_INITIAL_SIZE]; + double[] fourheap = new double[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + int[] fourvals = new int[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = fourheap; + this.fourvals = fourvals; + } + this.size = 0; + this.modCount = 0; + } + + @Override + public void clear() { + size = 0; + ++modCount; + fourheap = null; + fourvals = null; + Arrays.fill(twoheap, 0.0); + Arrays.fill(twovals, 0); + } + + @Override + public int size() { + return size; + } + + @Override + public boolean isEmpty() { + return (size == 0); + } + + @Override + public void add(double o, int v) { + final double co = o; + final int cv = v; + // System.err.println("Add: " + o); + if (size < TWO_HEAP_MAX_SIZE) { + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); + twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1); + } + final int twopos = size; + twoheap[twopos] = co; + twovals[twopos] = cv; + ++size; + heapifyUp2(twopos, co, cv); + ++modCount; + } else { + final int fourpos = size - TWO_HEAP_MAX_SIZE; + if (fourheap == null) { + fourheap = new double[FOUR_HEAP_INITIAL_SIZE]; + fourvals = new int[FOUR_HEAP_INITIAL_SIZE]; + } else if (fourpos >= fourheap.length) { + // Grow extension heap by half. + fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); + fourvals = Arrays.copyOf(fourvals, fourvals.length + (fourvals.length >> 1)); + } + fourheap[fourpos] = co; + fourvals[fourpos] = cv; + ++size; + heapifyUp4(fourpos, co, cv); + ++modCount; + } + } + + @Override + public void add(double key, int val, int max) { + if (size < max) { + add(key, val); + } else if (twoheap[0] <= key) { + replaceTopElement(key, val); + } + } + + @Override + public void replaceTopElement(double reinsert, int val) { + heapifyDown(reinsert, val); + ++modCount; + } + + /** + * Heapify-Up method for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + * @param val Current value + */ + private void heapifyUp2(int twopos, double cur, int val) { + while (twopos > 0) { + final int parent = (twopos - 1) >>> 1; + double par = twoheap[parent]; + if (cur >= par) { + break; + } + twoheap[twopos] = par; + twovals[twopos] = twovals[parent]; + twopos = parent; + } + twoheap[twopos] = cur; + twovals[twopos] = val; + } + + /** + * Heapify-Up method for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + * @param val Current value + */ + private void heapifyUp4(int fourpos, double cur, int val) { + while (fourpos > 0) { + final int parent = (fourpos - 1) >> 2; + double par = fourheap[parent]; + if (cur >= par) { + break; + } + fourheap[fourpos] = par; + fourvals[fourpos] = fourvals[parent]; + fourpos = parent; + } + if (fourpos == 0 && twoheap[0] > cur) { + fourheap[0] = twoheap[0]; + fourvals[0] = twovals[0]; + twoheap[0] = cur; + twovals[0] = val; + } else { + fourheap[fourpos] = cur; + fourvals[fourpos] = val; + } + } + + @Override + public void poll() { + --size; + // Replacement object: + if (size >= TWO_HEAP_MAX_SIZE) { + final int last = size - TWO_HEAP_MAX_SIZE; + final double reinsert = fourheap[last]; + final int reinsertv = fourvals[last]; + fourheap[last] = 0.0; + fourvals[last] = 0; + heapifyDown(reinsert, reinsertv); + } else if (size > 0) { + final double reinsert = twoheap[size]; + final int reinsertv = twovals[size]; + twoheap[size] = 0.0; + twovals[size] = 0; + heapifyDown(reinsert, reinsertv); + } else { + twoheap[0] = 0.0; + twovals[0] = 0; + } + ++modCount; + } + + /** + * Invoke heapify-down for the root object. + * + * @param reinsert Object to insert. + * @param val Value to reinsert. + */ + private void heapifyDown(double reinsert, int val) { + if (size > TWO_HEAP_MAX_SIZE) { + // Special case: 3-ary situation. + final int best = (twoheap[1] <= twoheap[2]) ? 1 : 2; + if (fourheap[0] < twoheap[best]) { + twoheap[0] = fourheap[0]; + twovals[0] = fourvals[0]; + heapifyDown4(0, reinsert, val); + } else { + twoheap[0] = twoheap[best]; + twovals[0] = twovals[best]; + heapifyDown2(best, reinsert, val); + } + return; + } + heapifyDown2(0, reinsert, val); + } + + /** + * Heapify-Down for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + * @param val Value to reinsert. + */ + private void heapifyDown2(int twopos, double cur, int val) { + final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + while (twopos < stop) { + int bestchild = (twopos << 1) + 1; + double best = twoheap[bestchild]; + final int right = bestchild + 1; + if (right < size && best > twoheap[right]) { + bestchild = right; + best = twoheap[right]; + } + if (cur <= best) { + break; + } + twoheap[twopos] = best; + twovals[twopos] = twovals[bestchild]; + twopos = bestchild; + } + twoheap[twopos] = cur; + twovals[twopos] = val; + } + + /** + * Heapify-Down for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + * @param val Value to reinsert. + */ + private void heapifyDown4(int fourpos, double cur, int val) { + final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; + while (fourpos < stop) { + final int child = (fourpos << 2) + 1; + double best = fourheap[child]; + int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; + if (size > minsize) { + double nextchild = fourheap[candidate]; + if (best > nextchild) { + bestchild = candidate; + best = nextchild; + } + + minsize += 2; + if (size >= minsize) { + nextchild = fourheap[++candidate]; + if (best > nextchild) { + bestchild = candidate; + best = nextchild; + } + + if (size > minsize) { + nextchild = fourheap[++candidate]; + if (best > nextchild) { + bestchild = candidate; + best = nextchild; + } + } + } + } + if (cur <= best) { + break; + } + fourheap[fourpos] = best; + fourvals[fourpos] = fourvals[bestchild]; + fourpos = bestchild; + } + fourheap[fourpos] = cur; + fourvals[fourpos] = val; + } + + @Override + public double peekKey() { + return twoheap[0]; + } + + @Override + public int peekValue() { + return twovals[0]; + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append(DoubleIntegerMinHeap.class.getSimpleName()).append(" ["); + for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) { + buf.append(iter.getKey()).append(':').append(iter.getValue()).append(','); + } + buf.append(']'); + return buf.toString(); + } + + @Override + public UnsortedIter unsortedIter() { + return new UnsortedIter(); + } + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * Use this class as follows: + * + * <pre> + * {@code + * for (DoubleIntegerHeap.UnsortedIter iter = heap.unsortedIter(); iter.valid(); iter.next()) { + * doSomething(iter.get()); + * } + * } + * </pre> + * + * @author Erich Schubert + */ + private class UnsortedIter implements DoubleIntegerHeap.UnsortedIter { + /** + * Iterator position. + */ + protected int pos = 0; + + /** + * Modification counter we were initialized at. + */ + protected final int myModCount = modCount; + + @Override + public boolean valid() { + if (modCount != myModCount) { + throw new ConcurrentModificationException(); + } + return pos < size; + } + + @Override + public void advance() { + pos++; + } + + @Override + public double getKey() { + return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + } + + @Override + public int getValue() { + return ((pos < TWO_HEAP_MAX_SIZE) ? twovals[pos] : fourvals[pos - TWO_HEAP_MAX_SIZE]); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleLongHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleLongHeap.java new file mode 100644 index 00000000..b93adafa --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleLongHeap.java @@ -0,0 +1,127 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.utilities.datastructures.iterator.Iter; + +/** + * Basic in-memory heap interface, for double keys and long values. + * + * @author Erich Schubert + * + * @apiviz.has UnsortedIter + */ +public interface DoubleLongHeap { + /** + * Add a key-value pair to the heap + * + * @param key Key + * @param val Value + */ + void add(double key, long val); + + /** + * Add a key-value pair to the heap if it improves the top. + * + * @param key Key + * @param val Value + * @param k Desired maximum size + */ + void add(double key, long val, int k); + + /** + * Combined operation that removes the top element, and inserts a new element + * instead. + * + * @param key Key of new element + * @param val Value of new element + */ + void replaceTopElement(double key, long val); + + /** + * Get the current top key + * + * @return Top key + */ + double peekKey(); + + /** + * Get the current top value + * + * @return Value + */ + long peekValue(); + + /** + * Remove the first element + */ + void poll(); + + /** + * Clear the heap contents. + */ + void clear(); + + /** + * Query the size + * + * @return Size + */ + public int size(); + + /** + * Is the heap empty? + * + * @return {@code true} when the size is 0. + */ + public boolean isEmpty(); + + /** + * Get an unsorted iterator to inspect the heap. + * + * @return Iterator + */ + UnsortedIter unsortedIter(); + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * @author Erich Schubert + */ + public static interface UnsortedIter extends Iter { + /** + * Get the current key + * + * @return Current key + */ + double getKey(); + + /** + * Get the current value + * + * @return Current value + */ + long getValue(); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleLongMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleLongMaxHeap.java new file mode 100644 index 00000000..6d15656c --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleLongMaxHeap.java @@ -0,0 +1,478 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Arrays; +import java.util.ConcurrentModificationException; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + +/** + * Advanced priority queue class, based on a binary heap (for small sizes), + * which will for larger heaps be accompanied by a 4-ary heap (attached below + * the root of the two-ary heap, making the root actually 3-ary). + * + * This code was automatically instantiated for the types: Double and Long + * + * This combination was found to work quite well in benchmarks, but YMMV. + * + * Some other observations from benchmarking: + * <ul> + * <li>Bulk loading did not improve things</li> + * <li>Primitive heaps are substantially faster.</li> + * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and + * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> + * <li>Workload makes a huge difference. A load-once, poll-until-empty priority + * queue is something different than e.g. a top-k heap, which will see a lot of + * top element replacements.</li> + * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for + * top-k make a difference.</li> + * <li>Different day, different benchmark results ...</li> + * </ul> + * + * @author Erich Schubert + * + * @apiviz.has UnsortedIter + */ +public class DoubleLongMaxHeap implements DoubleLongHeap { + /** + * Base heap. + */ + protected double[] twoheap; + + /** + * Base heap values. + */ + protected long[] twovals; + + /** + * Extension heap. + */ + protected double[] fourheap; + + /** + * Extension heapvalues. + */ + protected long[] fourvals; + + /** + * Current size of heap. + */ + protected int size; + + /** + * (Structural) modification counter. Used to invalidate iterators. + */ + protected int modCount = 0; + + /** + * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. + */ + private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; + + /** + * Initial size of the 2-ary heap. + */ + private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; + + /** + * Initial size of 4-ary heap when initialized. + * + * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 + * + * 85 = 4-ary heap of height 3: 21 + 4*4*4 + * + * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 + * + * Since we last grew by 255 (to 511), let's use 341. + */ + private final static int FOUR_HEAP_INITIAL_SIZE = 341; + + /** + * Constructor, with default size. + */ + public DoubleLongMaxHeap() { + super(); + double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE]; + long[] twovals = new long[TWO_HEAP_INITIAL_SIZE]; + + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = null; + this.fourvals = null; + this.size = 0; + this.modCount = 0; + } + + /** + * Constructor, with given minimum size. + * + * @param minsize Minimum size + */ + public DoubleLongMaxHeap(int minsize) { + super(); + if (minsize < TWO_HEAP_MAX_SIZE) { + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + double[] twoheap = new double[size]; + long[] twovals = new long[size]; + + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = null; + this.fourvals = null; + } else { + double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE]; + long[] twovals = new long[TWO_HEAP_INITIAL_SIZE]; + double[] fourheap = new double[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + long[] fourvals = new long[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = fourheap; + this.fourvals = fourvals; + } + this.size = 0; + this.modCount = 0; + } + + @Override + public void clear() { + size = 0; + ++modCount; + fourheap = null; + fourvals = null; + Arrays.fill(twoheap, 0.0); + Arrays.fill(twovals, 0); + } + + @Override + public int size() { + return size; + } + + @Override + public boolean isEmpty() { + return (size == 0); + } + + @Override + public void add(double o, long v) { + final double co = o; + final long cv = v; + // System.err.println("Add: " + o); + if (size < TWO_HEAP_MAX_SIZE) { + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); + twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1); + } + final int twopos = size; + twoheap[twopos] = co; + twovals[twopos] = cv; + ++size; + heapifyUp2(twopos, co, cv); + ++modCount; + } else { + final int fourpos = size - TWO_HEAP_MAX_SIZE; + if (fourheap == null) { + fourheap = new double[FOUR_HEAP_INITIAL_SIZE]; + fourvals = new long[FOUR_HEAP_INITIAL_SIZE]; + } else if (fourpos >= fourheap.length) { + // Grow extension heap by half. + fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); + fourvals = Arrays.copyOf(fourvals, fourvals.length + (fourvals.length >> 1)); + } + fourheap[fourpos] = co; + fourvals[fourpos] = cv; + ++size; + heapifyUp4(fourpos, co, cv); + ++modCount; + } + } + + @Override + public void add(double key, long val, int max) { + if (size < max) { + add(key, val); + } else if (twoheap[0] >= key) { + replaceTopElement(key, val); + } + } + + @Override + public void replaceTopElement(double reinsert, long val) { + heapifyDown(reinsert, val); + ++modCount; + } + + /** + * Heapify-Up method for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + * @param val Current value + */ + private void heapifyUp2(int twopos, double cur, long val) { + while (twopos > 0) { + final int parent = (twopos - 1) >>> 1; + double par = twoheap[parent]; + if (cur <= par) { + break; + } + twoheap[twopos] = par; + twovals[twopos] = twovals[parent]; + twopos = parent; + } + twoheap[twopos] = cur; + twovals[twopos] = val; + } + + /** + * Heapify-Up method for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + * @param val Current value + */ + private void heapifyUp4(int fourpos, double cur, long val) { + while (fourpos > 0) { + final int parent = (fourpos - 1) >> 2; + double par = fourheap[parent]; + if (cur <= par) { + break; + } + fourheap[fourpos] = par; + fourvals[fourpos] = fourvals[parent]; + fourpos = parent; + } + if (fourpos == 0 && twoheap[0] < cur) { + fourheap[0] = twoheap[0]; + fourvals[0] = twovals[0]; + twoheap[0] = cur; + twovals[0] = val; + } else { + fourheap[fourpos] = cur; + fourvals[fourpos] = val; + } + } + + @Override + public void poll() { + --size; + // Replacement object: + if (size >= TWO_HEAP_MAX_SIZE) { + final int last = size - TWO_HEAP_MAX_SIZE; + final double reinsert = fourheap[last]; + final long reinsertv = fourvals[last]; + fourheap[last] = 0.0; + fourvals[last] = 0; + heapifyDown(reinsert, reinsertv); + } else if (size > 0) { + final double reinsert = twoheap[size]; + final long reinsertv = twovals[size]; + twoheap[size] = 0.0; + twovals[size] = 0; + heapifyDown(reinsert, reinsertv); + } else { + twoheap[0] = 0.0; + twovals[0] = 0; + } + ++modCount; + } + + /** + * Invoke heapify-down for the root object. + * + * @param reinsert Object to insert. + * @param val Value to reinsert. + */ + private void heapifyDown(double reinsert, long val) { + if (size > TWO_HEAP_MAX_SIZE) { + // Special case: 3-ary situation. + final int best = (twoheap[1] >= twoheap[2]) ? 1 : 2; + if (fourheap[0] > twoheap[best]) { + twoheap[0] = fourheap[0]; + twovals[0] = fourvals[0]; + heapifyDown4(0, reinsert, val); + } else { + twoheap[0] = twoheap[best]; + twovals[0] = twovals[best]; + heapifyDown2(best, reinsert, val); + } + return; + } + heapifyDown2(0, reinsert, val); + } + + /** + * Heapify-Down for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + * @param val Value to reinsert. + */ + private void heapifyDown2(int twopos, double cur, long val) { + final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + while (twopos < stop) { + int bestchild = (twopos << 1) + 1; + double best = twoheap[bestchild]; + final int right = bestchild + 1; + if (right < size && best < twoheap[right]) { + bestchild = right; + best = twoheap[right]; + } + if (cur >= best) { + break; + } + twoheap[twopos] = best; + twovals[twopos] = twovals[bestchild]; + twopos = bestchild; + } + twoheap[twopos] = cur; + twovals[twopos] = val; + } + + /** + * Heapify-Down for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + * @param val Value to reinsert. + */ + private void heapifyDown4(int fourpos, double cur, long val) { + final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; + while (fourpos < stop) { + final int child = (fourpos << 2) + 1; + double best = fourheap[child]; + int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; + if (size > minsize) { + double nextchild = fourheap[candidate]; + if (best < nextchild) { + bestchild = candidate; + best = nextchild; + } + + minsize += 2; + if (size >= minsize) { + nextchild = fourheap[++candidate]; + if (best < nextchild) { + bestchild = candidate; + best = nextchild; + } + + if (size > minsize) { + nextchild = fourheap[++candidate]; + if (best < nextchild) { + bestchild = candidate; + best = nextchild; + } + } + } + } + if (cur >= best) { + break; + } + fourheap[fourpos] = best; + fourvals[fourpos] = fourvals[bestchild]; + fourpos = bestchild; + } + fourheap[fourpos] = cur; + fourvals[fourpos] = val; + } + + @Override + public double peekKey() { + return twoheap[0]; + } + + @Override + public long peekValue() { + return twovals[0]; + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append(DoubleLongMaxHeap.class.getSimpleName()).append(" ["); + for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) { + buf.append(iter.getKey()).append(':').append(iter.getValue()).append(','); + } + buf.append(']'); + return buf.toString(); + } + + @Override + public UnsortedIter unsortedIter() { + return new UnsortedIter(); + } + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * Use this class as follows: + * + * <pre> + * {@code + * for (DoubleLongHeap.UnsortedIter iter = heap.unsortedIter(); iter.valid(); iter.next()) { + * doSomething(iter.get()); + * } + * } + * </pre> + * + * @author Erich Schubert + */ + private class UnsortedIter implements DoubleLongHeap.UnsortedIter { + /** + * Iterator position. + */ + protected int pos = 0; + + /** + * Modification counter we were initialized at. + */ + protected final int myModCount = modCount; + + @Override + public boolean valid() { + if (modCount != myModCount) { + throw new ConcurrentModificationException(); + } + return pos < size; + } + + @Override + public void advance() { + pos++; + } + + @Override + public double getKey() { + return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + } + + @Override + public long getValue() { + return ((pos < TWO_HEAP_MAX_SIZE) ? twovals[pos] : fourvals[pos - TWO_HEAP_MAX_SIZE]); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleLongMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleLongMinHeap.java new file mode 100644 index 00000000..d38eb6e3 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleLongMinHeap.java @@ -0,0 +1,478 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Arrays; +import java.util.ConcurrentModificationException; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + +/** + * Advanced priority queue class, based on a binary heap (for small sizes), + * which will for larger heaps be accompanied by a 4-ary heap (attached below + * the root of the two-ary heap, making the root actually 3-ary). + * + * This code was automatically instantiated for the types: Double and Long + * + * This combination was found to work quite well in benchmarks, but YMMV. + * + * Some other observations from benchmarking: + * <ul> + * <li>Bulk loading did not improve things</li> + * <li>Primitive heaps are substantially faster.</li> + * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and + * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> + * <li>Workload makes a huge difference. A load-once, poll-until-empty priority + * queue is something different than e.g. a top-k heap, which will see a lot of + * top element replacements.</li> + * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for + * top-k make a difference.</li> + * <li>Different day, different benchmark results ...</li> + * </ul> + * + * @author Erich Schubert + * + * @apiviz.has UnsortedIter + */ +public class DoubleLongMinHeap implements DoubleLongHeap { + /** + * Base heap. + */ + protected double[] twoheap; + + /** + * Base heap values. + */ + protected long[] twovals; + + /** + * Extension heap. + */ + protected double[] fourheap; + + /** + * Extension heapvalues. + */ + protected long[] fourvals; + + /** + * Current size of heap. + */ + protected int size; + + /** + * (Structural) modification counter. Used to invalidate iterators. + */ + protected int modCount = 0; + + /** + * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. + */ + private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; + + /** + * Initial size of the 2-ary heap. + */ + private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; + + /** + * Initial size of 4-ary heap when initialized. + * + * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 + * + * 85 = 4-ary heap of height 3: 21 + 4*4*4 + * + * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 + * + * Since we last grew by 255 (to 511), let's use 341. + */ + private final static int FOUR_HEAP_INITIAL_SIZE = 341; + + /** + * Constructor, with default size. + */ + public DoubleLongMinHeap() { + super(); + double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE]; + long[] twovals = new long[TWO_HEAP_INITIAL_SIZE]; + + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = null; + this.fourvals = null; + this.size = 0; + this.modCount = 0; + } + + /** + * Constructor, with given minimum size. + * + * @param minsize Minimum size + */ + public DoubleLongMinHeap(int minsize) { + super(); + if (minsize < TWO_HEAP_MAX_SIZE) { + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + double[] twoheap = new double[size]; + long[] twovals = new long[size]; + + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = null; + this.fourvals = null; + } else { + double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE]; + long[] twovals = new long[TWO_HEAP_INITIAL_SIZE]; + double[] fourheap = new double[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + long[] fourvals = new long[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = fourheap; + this.fourvals = fourvals; + } + this.size = 0; + this.modCount = 0; + } + + @Override + public void clear() { + size = 0; + ++modCount; + fourheap = null; + fourvals = null; + Arrays.fill(twoheap, 0.0); + Arrays.fill(twovals, 0); + } + + @Override + public int size() { + return size; + } + + @Override + public boolean isEmpty() { + return (size == 0); + } + + @Override + public void add(double o, long v) { + final double co = o; + final long cv = v; + // System.err.println("Add: " + o); + if (size < TWO_HEAP_MAX_SIZE) { + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); + twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1); + } + final int twopos = size; + twoheap[twopos] = co; + twovals[twopos] = cv; + ++size; + heapifyUp2(twopos, co, cv); + ++modCount; + } else { + final int fourpos = size - TWO_HEAP_MAX_SIZE; + if (fourheap == null) { + fourheap = new double[FOUR_HEAP_INITIAL_SIZE]; + fourvals = new long[FOUR_HEAP_INITIAL_SIZE]; + } else if (fourpos >= fourheap.length) { + // Grow extension heap by half. + fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); + fourvals = Arrays.copyOf(fourvals, fourvals.length + (fourvals.length >> 1)); + } + fourheap[fourpos] = co; + fourvals[fourpos] = cv; + ++size; + heapifyUp4(fourpos, co, cv); + ++modCount; + } + } + + @Override + public void add(double key, long val, int max) { + if (size < max) { + add(key, val); + } else if (twoheap[0] <= key) { + replaceTopElement(key, val); + } + } + + @Override + public void replaceTopElement(double reinsert, long val) { + heapifyDown(reinsert, val); + ++modCount; + } + + /** + * Heapify-Up method for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + * @param val Current value + */ + private void heapifyUp2(int twopos, double cur, long val) { + while (twopos > 0) { + final int parent = (twopos - 1) >>> 1; + double par = twoheap[parent]; + if (cur >= par) { + break; + } + twoheap[twopos] = par; + twovals[twopos] = twovals[parent]; + twopos = parent; + } + twoheap[twopos] = cur; + twovals[twopos] = val; + } + + /** + * Heapify-Up method for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + * @param val Current value + */ + private void heapifyUp4(int fourpos, double cur, long val) { + while (fourpos > 0) { + final int parent = (fourpos - 1) >> 2; + double par = fourheap[parent]; + if (cur >= par) { + break; + } + fourheap[fourpos] = par; + fourvals[fourpos] = fourvals[parent]; + fourpos = parent; + } + if (fourpos == 0 && twoheap[0] > cur) { + fourheap[0] = twoheap[0]; + fourvals[0] = twovals[0]; + twoheap[0] = cur; + twovals[0] = val; + } else { + fourheap[fourpos] = cur; + fourvals[fourpos] = val; + } + } + + @Override + public void poll() { + --size; + // Replacement object: + if (size >= TWO_HEAP_MAX_SIZE) { + final int last = size - TWO_HEAP_MAX_SIZE; + final double reinsert = fourheap[last]; + final long reinsertv = fourvals[last]; + fourheap[last] = 0.0; + fourvals[last] = 0; + heapifyDown(reinsert, reinsertv); + } else if (size > 0) { + final double reinsert = twoheap[size]; + final long reinsertv = twovals[size]; + twoheap[size] = 0.0; + twovals[size] = 0; + heapifyDown(reinsert, reinsertv); + } else { + twoheap[0] = 0.0; + twovals[0] = 0; + } + ++modCount; + } + + /** + * Invoke heapify-down for the root object. + * + * @param reinsert Object to insert. + * @param val Value to reinsert. + */ + private void heapifyDown(double reinsert, long val) { + if (size > TWO_HEAP_MAX_SIZE) { + // Special case: 3-ary situation. + final int best = (twoheap[1] <= twoheap[2]) ? 1 : 2; + if (fourheap[0] < twoheap[best]) { + twoheap[0] = fourheap[0]; + twovals[0] = fourvals[0]; + heapifyDown4(0, reinsert, val); + } else { + twoheap[0] = twoheap[best]; + twovals[0] = twovals[best]; + heapifyDown2(best, reinsert, val); + } + return; + } + heapifyDown2(0, reinsert, val); + } + + /** + * Heapify-Down for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + * @param val Value to reinsert. + */ + private void heapifyDown2(int twopos, double cur, long val) { + final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + while (twopos < stop) { + int bestchild = (twopos << 1) + 1; + double best = twoheap[bestchild]; + final int right = bestchild + 1; + if (right < size && best > twoheap[right]) { + bestchild = right; + best = twoheap[right]; + } + if (cur <= best) { + break; + } + twoheap[twopos] = best; + twovals[twopos] = twovals[bestchild]; + twopos = bestchild; + } + twoheap[twopos] = cur; + twovals[twopos] = val; + } + + /** + * Heapify-Down for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + * @param val Value to reinsert. + */ + private void heapifyDown4(int fourpos, double cur, long val) { + final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; + while (fourpos < stop) { + final int child = (fourpos << 2) + 1; + double best = fourheap[child]; + int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; + if (size > minsize) { + double nextchild = fourheap[candidate]; + if (best > nextchild) { + bestchild = candidate; + best = nextchild; + } + + minsize += 2; + if (size >= minsize) { + nextchild = fourheap[++candidate]; + if (best > nextchild) { + bestchild = candidate; + best = nextchild; + } + + if (size > minsize) { + nextchild = fourheap[++candidate]; + if (best > nextchild) { + bestchild = candidate; + best = nextchild; + } + } + } + } + if (cur <= best) { + break; + } + fourheap[fourpos] = best; + fourvals[fourpos] = fourvals[bestchild]; + fourpos = bestchild; + } + fourheap[fourpos] = cur; + fourvals[fourpos] = val; + } + + @Override + public double peekKey() { + return twoheap[0]; + } + + @Override + public long peekValue() { + return twovals[0]; + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append(DoubleLongMinHeap.class.getSimpleName()).append(" ["); + for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) { + buf.append(iter.getKey()).append(':').append(iter.getValue()).append(','); + } + buf.append(']'); + return buf.toString(); + } + + @Override + public UnsortedIter unsortedIter() { + return new UnsortedIter(); + } + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * Use this class as follows: + * + * <pre> + * {@code + * for (DoubleLongHeap.UnsortedIter iter = heap.unsortedIter(); iter.valid(); iter.next()) { + * doSomething(iter.get()); + * } + * } + * </pre> + * + * @author Erich Schubert + */ + private class UnsortedIter implements DoubleLongHeap.UnsortedIter { + /** + * Iterator position. + */ + protected int pos = 0; + + /** + * Modification counter we were initialized at. + */ + protected final int myModCount = modCount; + + @Override + public boolean valid() { + if (modCount != myModCount) { + throw new ConcurrentModificationException(); + } + return pos < size; + } + + @Override + public void advance() { + pos++; + } + + @Override + public double getKey() { + return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + } + + @Override + public long getValue() { + return ((pos < TWO_HEAP_MAX_SIZE) ? twovals[pos] : fourvals[pos - TWO_HEAP_MAX_SIZE]); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMaxHeap.java index 1b7d6037..7ea28f14 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMaxHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMaxHeap.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,40 +23,400 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; along with this program. If not, see <http://www.gnu.org/licenses/>. */ +import java.util.Arrays; +import java.util.ConcurrentModificationException; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + /** - * Basic in-memory heap structure. + * Advanced priority queue class, based on a binary heap (for small sizes), + * which will for larger heaps be accompanied by a 4-ary heap (attached below + * the root of the two-ary heap, making the root actually 3-ary). + * + * This code was automatically instantiated for the type: Double * - * This heap is built lazily: if you first add many elements, then poll the - * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log - * n). This is implemented via a simple validTo counter. + * This combination was found to work quite well in benchmarks, but YMMV. + * + * Some other observations from benchmarking: + * <ul> + * <li>Bulk loading did not improve things</li> + * <li>Primitive heaps are substantially faster.</li> + * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and + * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> + * <li>Workload makes a huge difference. A load-once, poll-until-empty priority + * queue is something different than e.g. a top-k heap, which will see a lot of + * top element replacements.</li> + * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for + * top-k make a difference.</li> + * <li>Different day, different benchmark results ...</li> + * </ul> * * @author Erich Schubert + * + * @apiviz.has UnsortedIter */ -public class DoubleMaxHeap extends DoubleHeap { +public class DoubleMaxHeap implements DoubleHeap { + /** + * Base heap. + */ + protected double[] twoheap; + + /** + * Extension heap. + */ + protected double[] fourheap; + + /** + * Current size of heap. + */ + protected int size; + + /** + * (Structural) modification counter. Used to invalidate iterators. + */ + protected int modCount = 0; + + /** + * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. + */ + private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; + /** - * Constructor with default capacity. + * Initial size of the 2-ary heap. + */ + private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; + + /** + * Initial size of 4-ary heap when initialized. + * + * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 + * + * 85 = 4-ary heap of height 3: 21 + 4*4*4 + * + * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 + * + * Since we last grew by 255 (to 511), let's use 341. + */ + private final static int FOUR_HEAP_INITIAL_SIZE = 341; + + /** + * Constructor, with default size. */ public DoubleMaxHeap() { - super(DEFAULT_INITIAL_CAPACITY); + super(); + double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE]; + + this.twoheap = twoheap; + this.fourheap = null; + this.size = 0; + this.modCount = 0; } /** - * Constructor with initial capacity. + * Constructor, with given minimum size. * - * @param size initial capacity + * @param minsize Minimum size */ - public DoubleMaxHeap(int size) { - super(size); + public DoubleMaxHeap(int minsize) { + super(); + if (minsize < TWO_HEAP_MAX_SIZE) { + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + double[] twoheap = new double[size]; + + this.twoheap = twoheap; + this.fourheap = null; + } else { + double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE]; + double[] fourheap = new double[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + this.twoheap = twoheap; + this.fourheap = fourheap; + } + this.size = 0; + this.modCount = 0; + } + + @Override + public void clear() { + size = 0; + ++modCount; + fourheap = null; + Arrays.fill(twoheap, 0.0); + } + + @Override + public int size() { + return size; + } + + @Override + public boolean isEmpty() { + return (size == 0); + } + + @Override + public void add(double o) { + final double co = o; + // System.err.println("Add: " + o); + if (size < TWO_HEAP_MAX_SIZE) { + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); + } + final int twopos = size; + twoheap[twopos] = co; + ++size; + heapifyUp2(twopos, co); + ++modCount; + } else { + final int fourpos = size - TWO_HEAP_MAX_SIZE; + if (fourheap == null) { + fourheap = new double[FOUR_HEAP_INITIAL_SIZE]; + } else if (fourpos >= fourheap.length) { + // Grow extension heap by half. + fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); + } + fourheap[fourpos] = co; + ++size; + heapifyUp4(fourpos, co); + ++modCount; + } + } + + @Override + public void add(double key, int max) { + if (size < max) { + add(key); + } else if (twoheap[0] >= key) { + replaceTopElement(key); + } + } + + @Override + public double replaceTopElement(double reinsert) { + final double ret = twoheap[0]; + heapifyDown( reinsert); + ++modCount; + return ret; + } + + /** + * Heapify-Up method for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + */ + private void heapifyUp2(int twopos, double cur) { + while (twopos > 0) { + final int parent = (twopos - 1) >>> 1; + double par = twoheap[parent]; + if (cur <= par) { + break; + } + twoheap[twopos] = par; + twopos = parent; + } + twoheap[twopos] = cur; + } + + /** + * Heapify-Up method for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + */ + private void heapifyUp4(int fourpos, double cur) { + while (fourpos > 0) { + final int parent = (fourpos - 1) >> 2; + double par = fourheap[parent]; + if (cur <= par) { + break; + } + fourheap[fourpos] = par; + fourpos = parent; + } + if (fourpos == 0 && twoheap[0] < cur) { + fourheap[0] = twoheap[0]; + twoheap[0] = cur; + } else { + fourheap[fourpos] = cur; + } + } + + @Override + public double poll() { + final double ret = twoheap[0]; + --size; + // Replacement object: + if (size >= TWO_HEAP_MAX_SIZE) { + final int last = size - TWO_HEAP_MAX_SIZE; + final double reinsert = fourheap[last]; + fourheap[last] = 0.0; + heapifyDown(reinsert); + } else if (size > 0) { + final double reinsert = twoheap[size]; + twoheap[size] = 0.0; + heapifyDown(reinsert); + } else { + twoheap[0] = 0.0; + } + ++modCount; + return ret; + } + + /** + * Invoke heapify-down for the root object. + * + * @param reinsert Object to insert. + */ + private void heapifyDown(double reinsert) { + if (size > TWO_HEAP_MAX_SIZE) { + // Special case: 3-ary situation. + final int best = (twoheap[1] >= twoheap[2]) ? 1 : 2; + if (fourheap[0] > twoheap[best]) { + twoheap[0] = fourheap[0]; + heapifyDown4(0, reinsert); + } else { + twoheap[0] = twoheap[best]; + heapifyDown2(best, reinsert); + } + return; + } + heapifyDown2(0, reinsert); + } + + /** + * Heapify-Down for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + */ + private void heapifyDown2(int twopos, double cur) { + final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + while (twopos < stop) { + int bestchild = (twopos << 1) + 1; + double best = twoheap[bestchild]; + final int right = bestchild + 1; + if (right < size && best < twoheap[right]) { + bestchild = right; + best = twoheap[right]; + } + if (cur >= best) { + break; + } + twoheap[twopos] = best; + twopos = bestchild; + } + twoheap[twopos] = cur; } /** - * Compare two objects + * Heapify-Down for 4-ary heap. * - * @param o1 First object - * @param o2 Second object + * @param fourpos Position in 4-ary heap. + * @param cur Current object */ + private void heapifyDown4(int fourpos, double cur) { + final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; + while (fourpos < stop) { + final int child = (fourpos << 2) + 1; + double best = fourheap[child]; + int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; + if (size > minsize) { + double nextchild = fourheap[candidate]; + if (best < nextchild) { + bestchild = candidate; + best = nextchild; + } + + minsize += 2; + if (size >= minsize) { + nextchild = fourheap[++candidate]; + if (best < nextchild) { + bestchild = candidate; + best = nextchild; + } + + if (size > minsize) { + nextchild = fourheap[++candidate]; + if (best < nextchild) { + bestchild = candidate; + best = nextchild; + } + } + } + } + if (cur >= best) { + break; + } + fourheap[fourpos] = best; + fourpos = bestchild; + } + fourheap[fourpos] = cur; + } + + @Override + public double peek() { + return twoheap[0]; + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append(DoubleMaxHeap.class.getSimpleName()).append(" ["); + for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) { + buf.append(iter.get()).append(','); + } + buf.append(']'); + return buf.toString(); + } + @Override - protected boolean comp(double o1, double o2) { - return o1 < o2; + public UnsortedIter unsortedIter() { + return new UnsortedIter(); + } + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * Use this class as follows: + * + * <pre> + * {@code + * for (DoubleHeap.UnsortedIter iter = heap.unsortedIter(); iter.valid(); iter.next()) { + * doSomething(iter.get()); + * } + * } + * </pre> + * + * @author Erich Schubert + */ + private class UnsortedIter implements DoubleHeap.UnsortedIter { + /** + * Iterator position. + */ + protected int pos = 0; + + /** + * Modification counter we were initialized at. + */ + protected final int myModCount = modCount; + + @Override + public boolean valid() { + if (modCount != myModCount) { + throw new ConcurrentModificationException(); + } + return pos < size; + } + + @Override + public void advance() { + pos++; + } + + @Override + public double get() { + return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + } } } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMinHeap.java index 2ce05ff9..e9334153 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMinHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMinHeap.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,40 +23,400 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; along with this program. If not, see <http://www.gnu.org/licenses/>. */ +import java.util.Arrays; +import java.util.ConcurrentModificationException; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + /** - * Basic in-memory heap structure. + * Advanced priority queue class, based on a binary heap (for small sizes), + * which will for larger heaps be accompanied by a 4-ary heap (attached below + * the root of the two-ary heap, making the root actually 3-ary). + * + * This code was automatically instantiated for the type: Double * - * This heap is built lazily: if you first add many elements, then poll the - * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log - * n). This is implemented via a simple validTo counter. + * This combination was found to work quite well in benchmarks, but YMMV. + * + * Some other observations from benchmarking: + * <ul> + * <li>Bulk loading did not improve things</li> + * <li>Primitive heaps are substantially faster.</li> + * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and + * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> + * <li>Workload makes a huge difference. A load-once, poll-until-empty priority + * queue is something different than e.g. a top-k heap, which will see a lot of + * top element replacements.</li> + * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for + * top-k make a difference.</li> + * <li>Different day, different benchmark results ...</li> + * </ul> * * @author Erich Schubert + * + * @apiviz.has UnsortedIter */ -public class DoubleMinHeap extends DoubleHeap { +public class DoubleMinHeap implements DoubleHeap { + /** + * Base heap. + */ + protected double[] twoheap; + + /** + * Extension heap. + */ + protected double[] fourheap; + + /** + * Current size of heap. + */ + protected int size; + + /** + * (Structural) modification counter. Used to invalidate iterators. + */ + protected int modCount = 0; + + /** + * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. + */ + private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; + /** - * Constructor with default capacity. + * Initial size of the 2-ary heap. + */ + private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; + + /** + * Initial size of 4-ary heap when initialized. + * + * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 + * + * 85 = 4-ary heap of height 3: 21 + 4*4*4 + * + * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 + * + * Since we last grew by 255 (to 511), let's use 341. + */ + private final static int FOUR_HEAP_INITIAL_SIZE = 341; + + /** + * Constructor, with default size. */ public DoubleMinHeap() { - super(DEFAULT_INITIAL_CAPACITY); + super(); + double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE]; + + this.twoheap = twoheap; + this.fourheap = null; + this.size = 0; + this.modCount = 0; } /** - * Constructor with initial capacity. + * Constructor, with given minimum size. * - * @param size initial capacity + * @param minsize Minimum size */ - public DoubleMinHeap(int size) { - super(size); + public DoubleMinHeap(int minsize) { + super(); + if (minsize < TWO_HEAP_MAX_SIZE) { + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + double[] twoheap = new double[size]; + + this.twoheap = twoheap; + this.fourheap = null; + } else { + double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE]; + double[] fourheap = new double[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + this.twoheap = twoheap; + this.fourheap = fourheap; + } + this.size = 0; + this.modCount = 0; + } + + @Override + public void clear() { + size = 0; + ++modCount; + fourheap = null; + Arrays.fill(twoheap, 0.0); + } + + @Override + public int size() { + return size; + } + + @Override + public boolean isEmpty() { + return (size == 0); + } + + @Override + public void add(double o) { + final double co = o; + // System.err.println("Add: " + o); + if (size < TWO_HEAP_MAX_SIZE) { + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); + } + final int twopos = size; + twoheap[twopos] = co; + ++size; + heapifyUp2(twopos, co); + ++modCount; + } else { + final int fourpos = size - TWO_HEAP_MAX_SIZE; + if (fourheap == null) { + fourheap = new double[FOUR_HEAP_INITIAL_SIZE]; + } else if (fourpos >= fourheap.length) { + // Grow extension heap by half. + fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); + } + fourheap[fourpos] = co; + ++size; + heapifyUp4(fourpos, co); + ++modCount; + } + } + + @Override + public void add(double key, int max) { + if (size < max) { + add(key); + } else if (twoheap[0] <= key) { + replaceTopElement(key); + } + } + + @Override + public double replaceTopElement(double reinsert) { + final double ret = twoheap[0]; + heapifyDown( reinsert); + ++modCount; + return ret; + } + + /** + * Heapify-Up method for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + */ + private void heapifyUp2(int twopos, double cur) { + while (twopos > 0) { + final int parent = (twopos - 1) >>> 1; + double par = twoheap[parent]; + if (cur >= par) { + break; + } + twoheap[twopos] = par; + twopos = parent; + } + twoheap[twopos] = cur; + } + + /** + * Heapify-Up method for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + */ + private void heapifyUp4(int fourpos, double cur) { + while (fourpos > 0) { + final int parent = (fourpos - 1) >> 2; + double par = fourheap[parent]; + if (cur >= par) { + break; + } + fourheap[fourpos] = par; + fourpos = parent; + } + if (fourpos == 0 && twoheap[0] > cur) { + fourheap[0] = twoheap[0]; + twoheap[0] = cur; + } else { + fourheap[fourpos] = cur; + } + } + + @Override + public double poll() { + final double ret = twoheap[0]; + --size; + // Replacement object: + if (size >= TWO_HEAP_MAX_SIZE) { + final int last = size - TWO_HEAP_MAX_SIZE; + final double reinsert = fourheap[last]; + fourheap[last] = 0.0; + heapifyDown(reinsert); + } else if (size > 0) { + final double reinsert = twoheap[size]; + twoheap[size] = 0.0; + heapifyDown(reinsert); + } else { + twoheap[0] = 0.0; + } + ++modCount; + return ret; + } + + /** + * Invoke heapify-down for the root object. + * + * @param reinsert Object to insert. + */ + private void heapifyDown(double reinsert) { + if (size > TWO_HEAP_MAX_SIZE) { + // Special case: 3-ary situation. + final int best = (twoheap[1] <= twoheap[2]) ? 1 : 2; + if (fourheap[0] < twoheap[best]) { + twoheap[0] = fourheap[0]; + heapifyDown4(0, reinsert); + } else { + twoheap[0] = twoheap[best]; + heapifyDown2(best, reinsert); + } + return; + } + heapifyDown2(0, reinsert); + } + + /** + * Heapify-Down for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + */ + private void heapifyDown2(int twopos, double cur) { + final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + while (twopos < stop) { + int bestchild = (twopos << 1) + 1; + double best = twoheap[bestchild]; + final int right = bestchild + 1; + if (right < size && best > twoheap[right]) { + bestchild = right; + best = twoheap[right]; + } + if (cur <= best) { + break; + } + twoheap[twopos] = best; + twopos = bestchild; + } + twoheap[twopos] = cur; } /** - * Compare two objects + * Heapify-Down for 4-ary heap. * - * @param o1 First object - * @param o2 Second object + * @param fourpos Position in 4-ary heap. + * @param cur Current object */ + private void heapifyDown4(int fourpos, double cur) { + final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; + while (fourpos < stop) { + final int child = (fourpos << 2) + 1; + double best = fourheap[child]; + int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; + if (size > minsize) { + double nextchild = fourheap[candidate]; + if (best > nextchild) { + bestchild = candidate; + best = nextchild; + } + + minsize += 2; + if (size >= minsize) { + nextchild = fourheap[++candidate]; + if (best > nextchild) { + bestchild = candidate; + best = nextchild; + } + + if (size > minsize) { + nextchild = fourheap[++candidate]; + if (best > nextchild) { + bestchild = candidate; + best = nextchild; + } + } + } + } + if (cur <= best) { + break; + } + fourheap[fourpos] = best; + fourpos = bestchild; + } + fourheap[fourpos] = cur; + } + + @Override + public double peek() { + return twoheap[0]; + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append(DoubleMinHeap.class.getSimpleName()).append(" ["); + for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) { + buf.append(iter.get()).append(','); + } + buf.append(']'); + return buf.toString(); + } + @Override - protected boolean comp(double o1, double o2) { - return o1 > o2; + public UnsortedIter unsortedIter() { + return new UnsortedIter(); + } + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * Use this class as follows: + * + * <pre> + * {@code + * for (DoubleHeap.UnsortedIter iter = heap.unsortedIter(); iter.valid(); iter.next()) { + * doSomething(iter.get()); + * } + * } + * </pre> + * + * @author Erich Schubert + */ + private class UnsortedIter implements DoubleHeap.UnsortedIter { + /** + * Iterator position. + */ + protected int pos = 0; + + /** + * Modification counter we were initialized at. + */ + protected final int myModCount = modCount; + + @Override + public boolean valid() { + if (modCount != myModCount) { + throw new ConcurrentModificationException(); + } + return pos < size; + } + + @Override + public void advance() { + pos++; + } + + @Override + public double get() { + return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + } } } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjMaxHeap.java deleted file mode 100644 index 8417309a..00000000 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjMaxHeap.java +++ /dev/null @@ -1,328 +0,0 @@ -package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; - -/* - This file is part of ELKI: - Environment for Developing KDD-Applications Supported by Index-Structures - - Copyright (C) 2012 - Ludwig-Maximilians-Universität München - Lehr- und Forschungseinheit für Datenbanksysteme - ELKI Development Team - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -import java.util.Arrays; -import java.util.Comparator; - -import de.lmu.ifi.dbs.elki.math.MathUtil; - -/** - * Basic in-memory heap structure. - * - * This heap is built lazily: if you first add many elements, then poll the - * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log - * n). This is implemented via a simple validTo counter. - * - * @author Erich Schubert - * - * @param <V> value type - */ -public class DoubleObjMaxHeap<V> { - /** - * Heap storage: keys - */ - protected double[] keys; - - /** - * Heap storage: values - */ - protected Object[] values; - - /** - * Current number of objects - */ - protected int size = 0; - - /** - * Indicate up to where the heap is valid - */ - protected int validSize = 0; - - /** - * (Structural) modification counter. Used to invalidate iterators. - */ - public transient int modCount = 0; - - /** - * Default initial capacity - */ - private static final int DEFAULT_INITIAL_CAPACITY = 11; - - /** - * Default constructor: default capacity, natural ordering. - */ - public DoubleObjMaxHeap() { - this(DEFAULT_INITIAL_CAPACITY); - } - - /** - * Constructor with initial capacity and {@link Comparator}. - * - * @param size initial capacity - */ - public DoubleObjMaxHeap(int size) { - super(); - this.size = 0; - this.keys = new double[size]; - this.values = new Object[size]; - } - - /** - * Add a key-value pair to the heap - * - * @param key Key - * @param val Value - * @return Success code - */ - public boolean add(double key, V val) { - // resize when needed - if(size + 1 > keys.length) { - resize(size + 1); - } - // final int pos = size; - this.keys[size] = key; - this.values[size] = val; - this.size += 1; - heapifyUp(size - 1, key, val); - validSize += 1; - // We have changed - return true according to {@link Collection#put} - modCount++; - return true; - } - - /** - * Get the current top key - * - * @return Top key - */ - public double peekKey() { - if(size == 0) { - throw new ArrayIndexOutOfBoundsException("Peek() on an empty heap!"); - } - ensureValid(); - return keys[0]; - } - - /** - * Get the current top value - * - * @return Value - */ - @SuppressWarnings("unchecked") - public V peekValue() { - if(size == 0) { - throw new ArrayIndexOutOfBoundsException("Peek() on an empty heap!"); - } - ensureValid(); - return (V) values[0]; - } - - /** - * Remove the first element - */ - public void poll() { - removeAt(0); - } - - /** - * Repair the heap - */ - protected void ensureValid() { - if(validSize != size) { - if(size > 1) { - // Parent of first invalid - int nextmin = validSize > 0 ? ((validSize - 1) >>> 1) : 0; - int curmin = MathUtil.nextAllOnesInt(nextmin); // Next line - int nextmax = curmin - 1; // End of valid line - int pos = (size - 2) >>> 1; // Parent of last element - // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin+", "+nextmin); - while(pos >= nextmin) { - // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin); - while(pos >= curmin) { - if(!heapifyDown(pos, keys[pos], values[pos])) { - final int parent = (pos - 1) >>> 1; - if(parent < curmin) { - nextmin = Math.min(nextmin, parent); - nextmax = Math.max(nextmax, parent); - } - } - pos--; - } - curmin = nextmin; - pos = Math.min(pos, nextmax); - nextmax = -1; - } - } - validSize = size; - } - } - - /** - * Remove the element at the given position. - * - * @param pos Element position. - */ - protected void removeAt(int pos) { - if(pos < 0 || pos >= size) { - return; - } - // Replacement object: - final double reinkey = keys[size - 1]; - final Object reinval = values[size - 1]; - values[size - 1] = null; - // Keep heap in sync - if(validSize == size) { - size -= 1; - validSize -= 1; - heapifyDown(pos, reinkey, reinval); - } - else { - size -= 1; - validSize = Math.min(pos >>> 1, validSize); - keys[pos] = reinkey; - values[pos] = reinval; - } - modCount++; - } - - /** - * Execute a "Heapify Upwards" aka "SiftUp". Used in insertions. - * - * @param pos insertion position - * @param curkey Current key - * @param curval Current value - */ - protected void heapifyUp(int pos, double curkey, Object curval) { - while(pos > 0) { - final int parent = (pos - 1) >>> 1; - double parkey = keys[parent]; - - if(curkey <= parkey) { // Compare - break; - } - keys[pos] = parkey; - values[pos] = values[parent]; - pos = parent; - } - keys[pos] = curkey; - values[pos] = curval; - } - - /** - * Execute a "Heapify Downwards" aka "SiftDown". Used in deletions. - * - * @param ipos re-insertion position - * @param curkey Current key - * @param curval Current value - * @return true when the order was changed - */ - protected boolean heapifyDown(final int ipos, double curkey, Object curval) { - int pos = ipos; - final int half = size >>> 1; - while(pos < half) { - // Get left child (must exist!) - int cpos = (pos << 1) + 1; - double chikey = keys[cpos]; - Object chival = values[cpos]; - // Test right child, if present - final int rchild = cpos + 1; - if(rchild < size) { - double right = keys[rchild]; - if(chikey < right) { // Compare - cpos = rchild; - chikey = right; - chival = values[rchild]; - } - } - - if(curkey >= chikey) { // Compare - break; - } - keys[pos] = chikey; - values[pos] = chival; - pos = cpos; - } - keys[pos] = curkey; - values[pos] = curval; - return (pos == ipos); - } - - /** - * Query the size - * - * @return Size - */ - public int size() { - return this.size; - } - - /** - * Test whether we need to resize to have the requested capacity. - * - * @param requiredSize required capacity - */ - protected final void resize(int requiredSize) { - // Double until 64, then increase by 50% each time. - int newCapacity = ((keys.length < 64) ? ((keys.length + 1) << 1) : ((keys.length >> 1) * 3)); - // overflow? - if(newCapacity < 0) { - throw new OutOfMemoryError(); - } - if(requiredSize > newCapacity) { - newCapacity = requiredSize; - } - keys = Arrays.copyOf(keys, newCapacity); - values = Arrays.copyOf(values, newCapacity); - } - - /** - * Delete all elements from the heap. - */ - public void clear() { - // clean up references in the array for memory management - Arrays.fill(values, null); - this.size = 0; - this.validSize = -1; - modCount++; - } - - /** - * Test whether the heap is still valid. - * - * Debug method. - * - * @return {@code null} when the heap is correct - */ - protected String checkHeap() { - ensureValid(); - for(int i = 1; i < size; i++) { - final int parent = (i - 1) >>> 1; - if(keys[parent] < keys[i]) { // Compare - return "@" + parent + ": " + keys[parent] + " < @" + i + ": " + keys[i]; - } - } - return null; - } -} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjMinHeap.java deleted file mode 100644 index 244277e8..00000000 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjMinHeap.java +++ /dev/null @@ -1,328 +0,0 @@ -package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; - -/* - This file is part of ELKI: - Environment for Developing KDD-Applications Supported by Index-Structures - - Copyright (C) 2012 - Ludwig-Maximilians-Universität München - Lehr- und Forschungseinheit für Datenbanksysteme - ELKI Development Team - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -import java.util.Arrays; -import java.util.Comparator; - -import de.lmu.ifi.dbs.elki.math.MathUtil; - -/** - * Basic in-memory heap structure. - * - * This heap is built lazily: if you first add many elements, then poll the - * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log - * n). This is implemented via a simple validTo counter. - * - * @author Erich Schubert - * - * @param <V> value type - */ -public class DoubleObjMinHeap<V> { - /** - * Heap storage: keys - */ - protected double[] keys; - - /** - * Heap storage: values - */ - protected Object[] values; - - /** - * Current number of objects - */ - protected int size = 0; - - /** - * Indicate up to where the heap is valid - */ - protected int validSize = 0; - - /** - * (Structural) modification counter. Used to invalidate iterators. - */ - public transient int modCount = 0; - - /** - * Default initial capacity - */ - private static final int DEFAULT_INITIAL_CAPACITY = 11; - - /** - * Default constructor: default capacity, natural ordering. - */ - public DoubleObjMinHeap() { - this(DEFAULT_INITIAL_CAPACITY); - } - - /** - * Constructor with initial capacity and {@link Comparator}. - * - * @param size initial capacity - */ - public DoubleObjMinHeap(int size) { - super(); - this.size = 0; - this.keys = new double[size]; - this.values = new Object[size]; - } - - /** - * Add a key-value pair to the heap - * - * @param key Key - * @param val Value - * @return Success code - */ - public boolean add(double key, V val) { - // resize when needed - if(size + 1 > keys.length) { - resize(size + 1); - } - // final int pos = size; - this.keys[size] = key; - this.values[size] = val; - this.size += 1; - heapifyUp(size - 1, key, val); - validSize += 1; - // We have changed - return true according to {@link Collection#put} - modCount++; - return true; - } - - /** - * Get the current top key - * - * @return Top key - */ - public double peekKey() { - if(size == 0) { - throw new ArrayIndexOutOfBoundsException("Peek() on an empty heap!"); - } - ensureValid(); - return keys[0]; - } - - /** - * Get the current top value - * - * @return Value - */ - @SuppressWarnings("unchecked") - public V peekValue() { - if(size == 0) { - throw new ArrayIndexOutOfBoundsException("Peek() on an empty heap!"); - } - ensureValid(); - return (V) values[0]; - } - - /** - * Remove the first element - */ - public void poll() { - removeAt(0); - } - - /** - * Repair the heap - */ - protected void ensureValid() { - if(validSize != size) { - if(size > 1) { - // Parent of first invalid - int nextmin = validSize > 0 ? ((validSize - 1) >>> 1) : 0; - int curmin = MathUtil.nextAllOnesInt(nextmin); // Next line - int nextmax = curmin - 1; // End of valid line - int pos = (size - 2) >>> 1; // Parent of last element - // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin+", "+nextmin); - while(pos >= nextmin) { - // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin); - while(pos >= curmin) { - if(!heapifyDown(pos, keys[pos], values[pos])) { - final int parent = (pos - 1) >>> 1; - if(parent < curmin) { - nextmin = Math.min(nextmin, parent); - nextmax = Math.max(nextmax, parent); - } - } - pos--; - } - curmin = nextmin; - pos = Math.min(pos, nextmax); - nextmax = -1; - } - } - validSize = size; - } - } - - /** - * Remove the element at the given position. - * - * @param pos Element position. - */ - protected void removeAt(int pos) { - if(pos < 0 || pos >= size) { - return; - } - // Replacement object: - final double reinkey = keys[size - 1]; - final Object reinval = values[size - 1]; - values[size - 1] = null; - // Keep heap in sync - if(validSize == size) { - size -= 1; - validSize -= 1; - heapifyDown(pos, reinkey, reinval); - } - else { - size -= 1; - validSize = Math.min(pos >>> 1, validSize); - keys[pos] = reinkey; - values[pos] = reinval; - } - modCount++; - } - - /** - * Execute a "Heapify Upwards" aka "SiftUp". Used in insertions. - * - * @param pos insertion position - * @param curkey Current key - * @param curval Current value - */ - protected void heapifyUp(int pos, double curkey, Object curval) { - while(pos > 0) { - final int parent = (pos - 1) >>> 1; - double parkey = keys[parent]; - - if(curkey >= parkey) { // Compare - break; - } - keys[pos] = parkey; - values[pos] = values[parent]; - pos = parent; - } - keys[pos] = curkey; - values[pos] = curval; - } - - /** - * Execute a "Heapify Downwards" aka "SiftDown". Used in deletions. - * - * @param ipos re-insertion position - * @param curkey Current key - * @param curval Current value - * @return true when the order was changed - */ - protected boolean heapifyDown(final int ipos, double curkey, Object curval) { - int pos = ipos; - final int half = size >>> 1; - while(pos < half) { - // Get left child (must exist!) - int cpos = (pos << 1) + 1; - double chikey = keys[cpos]; - Object chival = values[cpos]; - // Test right child, if present - final int rchild = cpos + 1; - if(rchild < size) { - double right = keys[rchild]; - if(chikey > right) { // Compare - cpos = rchild; - chikey = right; - chival = values[rchild]; - } - } - - if(curkey <= chikey) { // Compare - break; - } - keys[pos] = chikey; - values[pos] = chival; - pos = cpos; - } - keys[pos] = curkey; - values[pos] = curval; - return (pos == ipos); - } - - /** - * Query the size - * - * @return Size - */ - public int size() { - return this.size; - } - - /** - * Test whether we need to resize to have the requested capacity. - * - * @param requiredSize required capacity - */ - protected final void resize(int requiredSize) { - // Double until 64, then increase by 50% each time. - int newCapacity = ((keys.length < 64) ? ((keys.length + 1) << 1) : ((keys.length >> 1) * 3)); - // overflow? - if(newCapacity < 0) { - throw new OutOfMemoryError(); - } - if(requiredSize > newCapacity) { - newCapacity = requiredSize; - } - keys = Arrays.copyOf(keys, newCapacity); - values = Arrays.copyOf(values, newCapacity); - } - - /** - * Delete all elements from the heap. - */ - public void clear() { - // clean up references in the array for memory management - Arrays.fill(values, null); - this.size = 0; - this.validSize = -1; - modCount++; - } - - /** - * Test whether the heap is still valid. - * - * Debug method. - * - * @return {@code null} when the heap is correct - */ - protected String checkHeap() { - ensureValid(); - for(int i = 1; i < size; i++) { - final int parent = (i - 1) >>> 1; - if(keys[parent] > keys[i]) { // Compare - return "@" + parent + ": " + keys[parent] + " < @" + i + ": " + keys[i]; - } - } - return null; - } -} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectHeap.java new file mode 100644 index 00000000..db65ce81 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectHeap.java @@ -0,0 +1,129 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.utilities.datastructures.iterator.Iter; + +/** + * Basic in-memory heap interface, for double keys and V values. + * + * @author Erich Schubert + * + * @apiviz.has UnsortedIter + * @param <V> Value type + */ +public interface DoubleObjectHeap<V> { + /** + * Add a key-value pair to the heap + * + * @param key Key + * @param val Value + */ + void add(double key, V val); + + /** + * Add a key-value pair to the heap if it improves the top. + * + * @param key Key + * @param val Value + * @param k Desired maximum size + */ + void add(double key, V val, int k); + + /** + * Combined operation that removes the top element, and inserts a new element + * instead. + * + * @param key Key of new element + * @param val Value of new element + */ + void replaceTopElement(double key, V val); + + /** + * Get the current top key + * + * @return Top key + */ + double peekKey(); + + /** + * Get the current top value + * + * @return Value + */ + V peekValue(); + + /** + * Remove the first element + */ + void poll(); + + /** + * Clear the heap contents. + */ + void clear(); + + /** + * Query the size + * + * @return Size + */ + public int size(); + + /** + * Is the heap empty? + * + * @return {@code true} when the size is 0. + */ + public boolean isEmpty(); + + /** + * Get an unsorted iterator to inspect the heap. + * + * @return Iterator + */ + UnsortedIter<V> unsortedIter(); + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * @author Erich Schubert + * @param <V> Value type + */ + public static interface UnsortedIter<V> extends Iter { + /** + * Get the current key + * + * @return Current key + */ + double getKey(); + + /** + * Get the current value + * + * @return Current value + */ + V getValue(); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectMaxHeap.java new file mode 100644 index 00000000..dd89573c --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectMaxHeap.java @@ -0,0 +1,482 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Arrays; +import java.util.ConcurrentModificationException; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + +/** + * Advanced priority queue class, based on a binary heap (for small sizes), + * which will for larger heaps be accompanied by a 4-ary heap (attached below + * the root of the two-ary heap, making the root actually 3-ary). + * + * This code was automatically instantiated for the types: Double and Object + * + * This combination was found to work quite well in benchmarks, but YMMV. + * + * Some other observations from benchmarking: + * <ul> + * <li>Bulk loading did not improve things</li> + * <li>Primitive heaps are substantially faster.</li> + * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and + * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> + * <li>Workload makes a huge difference. A load-once, poll-until-empty priority + * queue is something different than e.g. a top-k heap, which will see a lot of + * top element replacements.</li> + * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for + * top-k make a difference.</li> + * <li>Different day, different benchmark results ...</li> + * </ul> + * + * @author Erich Schubert + * + * @apiviz.has UnsortedIter + * @param <V> Value type + */ +public class DoubleObjectMaxHeap<V> implements DoubleObjectHeap<V> { + /** + * Base heap. + */ + protected double[] twoheap; + + /** + * Base heap values. + */ + protected Object[] twovals; + + /** + * Extension heap. + */ + protected double[] fourheap; + + /** + * Extension heapvalues. + */ + protected Object[] fourvals; + + /** + * Current size of heap. + */ + protected int size; + + /** + * (Structural) modification counter. Used to invalidate iterators. + */ + protected int modCount = 0; + + /** + * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. + */ + private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; + + /** + * Initial size of the 2-ary heap. + */ + private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; + + /** + * Initial size of 4-ary heap when initialized. + * + * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 + * + * 85 = 4-ary heap of height 3: 21 + 4*4*4 + * + * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 + * + * Since we last grew by 255 (to 511), let's use 341. + */ + private final static int FOUR_HEAP_INITIAL_SIZE = 341; + + /** + * Constructor, with default size. + */ + public DoubleObjectMaxHeap() { + super(); + double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE]; + Object[] twovals = new Object[TWO_HEAP_INITIAL_SIZE]; + + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = null; + this.fourvals = null; + this.size = 0; + this.modCount = 0; + } + + /** + * Constructor, with given minimum size. + * + * @param minsize Minimum size + */ + public DoubleObjectMaxHeap(int minsize) { + super(); + if (minsize < TWO_HEAP_MAX_SIZE) { + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + double[] twoheap = new double[size]; + Object[] twovals = new Object[size]; + + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = null; + this.fourvals = null; + } else { + double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE]; + Object[] twovals = new Object[TWO_HEAP_INITIAL_SIZE]; + double[] fourheap = new double[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + Object[] fourvals = new Object[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = fourheap; + this.fourvals = fourvals; + } + this.size = 0; + this.modCount = 0; + } + + @Override + public void clear() { + size = 0; + ++modCount; + fourheap = null; + fourvals = null; + Arrays.fill(twoheap, 0.0); + Arrays.fill(twovals, null); + } + + @Override + public int size() { + return size; + } + + @Override + public boolean isEmpty() { + return (size == 0); + } + + @Override + public void add(double o, V v) { + final double co = o; + final Object cv = (Object)v; + // System.err.println("Add: " + o); + if (size < TWO_HEAP_MAX_SIZE) { + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); + twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1); + } + final int twopos = size; + twoheap[twopos] = co; + twovals[twopos] = cv; + ++size; + heapifyUp2(twopos, co, cv); + ++modCount; + } else { + final int fourpos = size - TWO_HEAP_MAX_SIZE; + if (fourheap == null) { + fourheap = new double[FOUR_HEAP_INITIAL_SIZE]; + fourvals = new Object[FOUR_HEAP_INITIAL_SIZE]; + } else if (fourpos >= fourheap.length) { + // Grow extension heap by half. + fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); + fourvals = Arrays.copyOf(fourvals, fourvals.length + (fourvals.length >> 1)); + } + fourheap[fourpos] = co; + fourvals[fourpos] = cv; + ++size; + heapifyUp4(fourpos, co, cv); + ++modCount; + } + } + + @Override + public void add(double key, V val, int max) { + if (size < max) { + add(key, val); + } else if (twoheap[0] >= key) { + replaceTopElement(key, val); + } + } + + @Override + public void replaceTopElement(double reinsert, V val) { + heapifyDown(reinsert, (Object)val); + ++modCount; + } + + /** + * Heapify-Up method for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + * @param val Current value + */ + private void heapifyUp2(int twopos, double cur, Object val) { + while (twopos > 0) { + final int parent = (twopos - 1) >>> 1; + double par = twoheap[parent]; + if (cur <= par) { + break; + } + twoheap[twopos] = par; + twovals[twopos] = twovals[parent]; + twopos = parent; + } + twoheap[twopos] = cur; + twovals[twopos] = val; + } + + /** + * Heapify-Up method for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + * @param val Current value + */ + private void heapifyUp4(int fourpos, double cur, Object val) { + while (fourpos > 0) { + final int parent = (fourpos - 1) >> 2; + double par = fourheap[parent]; + if (cur <= par) { + break; + } + fourheap[fourpos] = par; + fourvals[fourpos] = fourvals[parent]; + fourpos = parent; + } + if (fourpos == 0 && twoheap[0] < cur) { + fourheap[0] = twoheap[0]; + fourvals[0] = twovals[0]; + twoheap[0] = cur; + twovals[0] = val; + } else { + fourheap[fourpos] = cur; + fourvals[fourpos] = val; + } + } + + @Override + public void poll() { + --size; + // Replacement object: + if (size >= TWO_HEAP_MAX_SIZE) { + final int last = size - TWO_HEAP_MAX_SIZE; + final double reinsert = fourheap[last]; + final Object reinsertv = fourvals[last]; + fourheap[last] = 0.0; + fourvals[last] = null; + heapifyDown(reinsert, reinsertv); + } else if (size > 0) { + final double reinsert = twoheap[size]; + final Object reinsertv = twovals[size]; + twoheap[size] = 0.0; + twovals[size] = null; + heapifyDown(reinsert, reinsertv); + } else { + twoheap[0] = 0.0; + twovals[0] = null; + } + ++modCount; + } + + /** + * Invoke heapify-down for the root object. + * + * @param reinsert Object to insert. + * @param val Value to reinsert. + */ + private void heapifyDown(double reinsert, Object val) { + if (size > TWO_HEAP_MAX_SIZE) { + // Special case: 3-ary situation. + final int best = (twoheap[1] >= twoheap[2]) ? 1 : 2; + if (fourheap[0] > twoheap[best]) { + twoheap[0] = fourheap[0]; + twovals[0] = fourvals[0]; + heapifyDown4(0, reinsert, val); + } else { + twoheap[0] = twoheap[best]; + twovals[0] = twovals[best]; + heapifyDown2(best, reinsert, val); + } + return; + } + heapifyDown2(0, reinsert, val); + } + + /** + * Heapify-Down for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + * @param val Value to reinsert. + */ + private void heapifyDown2(int twopos, double cur, Object val) { + final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + while (twopos < stop) { + int bestchild = (twopos << 1) + 1; + double best = twoheap[bestchild]; + final int right = bestchild + 1; + if (right < size && best < twoheap[right]) { + bestchild = right; + best = twoheap[right]; + } + if (cur >= best) { + break; + } + twoheap[twopos] = best; + twovals[twopos] = twovals[bestchild]; + twopos = bestchild; + } + twoheap[twopos] = cur; + twovals[twopos] = val; + } + + /** + * Heapify-Down for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + * @param val Value to reinsert. + */ + private void heapifyDown4(int fourpos, double cur, Object val) { + final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; + while (fourpos < stop) { + final int child = (fourpos << 2) + 1; + double best = fourheap[child]; + int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; + if (size > minsize) { + double nextchild = fourheap[candidate]; + if (best < nextchild) { + bestchild = candidate; + best = nextchild; + } + + minsize += 2; + if (size >= minsize) { + nextchild = fourheap[++candidate]; + if (best < nextchild) { + bestchild = candidate; + best = nextchild; + } + + if (size > minsize) { + nextchild = fourheap[++candidate]; + if (best < nextchild) { + bestchild = candidate; + best = nextchild; + } + } + } + } + if (cur >= best) { + break; + } + fourheap[fourpos] = best; + fourvals[fourpos] = fourvals[bestchild]; + fourpos = bestchild; + } + fourheap[fourpos] = cur; + fourvals[fourpos] = val; + } + + @Override + public double peekKey() { + return twoheap[0]; + } + + @Override + @SuppressWarnings("unchecked") + public V peekValue() { + return (V)twovals[0]; + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append(DoubleObjectMaxHeap.class.getSimpleName()).append(" ["); + for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) { + buf.append(iter.getKey()).append(':').append(iter.getValue()).append(','); + } + buf.append(']'); + return buf.toString(); + } + + @Override + public UnsortedIter unsortedIter() { + return new UnsortedIter(); + } + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * Use this class as follows: + * + * <pre> + * {@code + * for (DoubleObjectHeap.UnsortedIter<V> iter = heap.unsortedIter(); iter.valid(); iter.next()) { + * doSomething(iter.get()); + * } + * } + * </pre> + * + * @author Erich Schubert + */ + private class UnsortedIter implements DoubleObjectHeap.UnsortedIter<V> { + /** + * Iterator position. + */ + protected int pos = 0; + + /** + * Modification counter we were initialized at. + */ + protected final int myModCount = modCount; + + @Override + public boolean valid() { + if (modCount != myModCount) { + throw new ConcurrentModificationException(); + } + return pos < size; + } + + @Override + public void advance() { + pos++; + } + + @Override + public double getKey() { + return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + } + + @SuppressWarnings("unchecked") + + @Override + public V getValue() { + return (V)((pos < TWO_HEAP_MAX_SIZE) ? twovals[pos] : fourvals[pos - TWO_HEAP_MAX_SIZE]); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectMinHeap.java new file mode 100644 index 00000000..905cdedb --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectMinHeap.java @@ -0,0 +1,482 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Arrays; +import java.util.ConcurrentModificationException; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + +/** + * Advanced priority queue class, based on a binary heap (for small sizes), + * which will for larger heaps be accompanied by a 4-ary heap (attached below + * the root of the two-ary heap, making the root actually 3-ary). + * + * This code was automatically instantiated for the types: Double and Object + * + * This combination was found to work quite well in benchmarks, but YMMV. + * + * Some other observations from benchmarking: + * <ul> + * <li>Bulk loading did not improve things</li> + * <li>Primitive heaps are substantially faster.</li> + * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and + * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> + * <li>Workload makes a huge difference. A load-once, poll-until-empty priority + * queue is something different than e.g. a top-k heap, which will see a lot of + * top element replacements.</li> + * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for + * top-k make a difference.</li> + * <li>Different day, different benchmark results ...</li> + * </ul> + * + * @author Erich Schubert + * + * @apiviz.has UnsortedIter + * @param <V> Value type + */ +public class DoubleObjectMinHeap<V> implements DoubleObjectHeap<V> { + /** + * Base heap. + */ + protected double[] twoheap; + + /** + * Base heap values. + */ + protected Object[] twovals; + + /** + * Extension heap. + */ + protected double[] fourheap; + + /** + * Extension heapvalues. + */ + protected Object[] fourvals; + + /** + * Current size of heap. + */ + protected int size; + + /** + * (Structural) modification counter. Used to invalidate iterators. + */ + protected int modCount = 0; + + /** + * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. + */ + private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; + + /** + * Initial size of the 2-ary heap. + */ + private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; + + /** + * Initial size of 4-ary heap when initialized. + * + * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 + * + * 85 = 4-ary heap of height 3: 21 + 4*4*4 + * + * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 + * + * Since we last grew by 255 (to 511), let's use 341. + */ + private final static int FOUR_HEAP_INITIAL_SIZE = 341; + + /** + * Constructor, with default size. + */ + public DoubleObjectMinHeap() { + super(); + double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE]; + Object[] twovals = new Object[TWO_HEAP_INITIAL_SIZE]; + + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = null; + this.fourvals = null; + this.size = 0; + this.modCount = 0; + } + + /** + * Constructor, with given minimum size. + * + * @param minsize Minimum size + */ + public DoubleObjectMinHeap(int minsize) { + super(); + if (minsize < TWO_HEAP_MAX_SIZE) { + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + double[] twoheap = new double[size]; + Object[] twovals = new Object[size]; + + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = null; + this.fourvals = null; + } else { + double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE]; + Object[] twovals = new Object[TWO_HEAP_INITIAL_SIZE]; + double[] fourheap = new double[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + Object[] fourvals = new Object[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = fourheap; + this.fourvals = fourvals; + } + this.size = 0; + this.modCount = 0; + } + + @Override + public void clear() { + size = 0; + ++modCount; + fourheap = null; + fourvals = null; + Arrays.fill(twoheap, 0.0); + Arrays.fill(twovals, null); + } + + @Override + public int size() { + return size; + } + + @Override + public boolean isEmpty() { + return (size == 0); + } + + @Override + public void add(double o, V v) { + final double co = o; + final Object cv = (Object)v; + // System.err.println("Add: " + o); + if (size < TWO_HEAP_MAX_SIZE) { + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); + twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1); + } + final int twopos = size; + twoheap[twopos] = co; + twovals[twopos] = cv; + ++size; + heapifyUp2(twopos, co, cv); + ++modCount; + } else { + final int fourpos = size - TWO_HEAP_MAX_SIZE; + if (fourheap == null) { + fourheap = new double[FOUR_HEAP_INITIAL_SIZE]; + fourvals = new Object[FOUR_HEAP_INITIAL_SIZE]; + } else if (fourpos >= fourheap.length) { + // Grow extension heap by half. + fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); + fourvals = Arrays.copyOf(fourvals, fourvals.length + (fourvals.length >> 1)); + } + fourheap[fourpos] = co; + fourvals[fourpos] = cv; + ++size; + heapifyUp4(fourpos, co, cv); + ++modCount; + } + } + + @Override + public void add(double key, V val, int max) { + if (size < max) { + add(key, val); + } else if (twoheap[0] <= key) { + replaceTopElement(key, val); + } + } + + @Override + public void replaceTopElement(double reinsert, V val) { + heapifyDown(reinsert, (Object)val); + ++modCount; + } + + /** + * Heapify-Up method for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + * @param val Current value + */ + private void heapifyUp2(int twopos, double cur, Object val) { + while (twopos > 0) { + final int parent = (twopos - 1) >>> 1; + double par = twoheap[parent]; + if (cur >= par) { + break; + } + twoheap[twopos] = par; + twovals[twopos] = twovals[parent]; + twopos = parent; + } + twoheap[twopos] = cur; + twovals[twopos] = val; + } + + /** + * Heapify-Up method for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + * @param val Current value + */ + private void heapifyUp4(int fourpos, double cur, Object val) { + while (fourpos > 0) { + final int parent = (fourpos - 1) >> 2; + double par = fourheap[parent]; + if (cur >= par) { + break; + } + fourheap[fourpos] = par; + fourvals[fourpos] = fourvals[parent]; + fourpos = parent; + } + if (fourpos == 0 && twoheap[0] > cur) { + fourheap[0] = twoheap[0]; + fourvals[0] = twovals[0]; + twoheap[0] = cur; + twovals[0] = val; + } else { + fourheap[fourpos] = cur; + fourvals[fourpos] = val; + } + } + + @Override + public void poll() { + --size; + // Replacement object: + if (size >= TWO_HEAP_MAX_SIZE) { + final int last = size - TWO_HEAP_MAX_SIZE; + final double reinsert = fourheap[last]; + final Object reinsertv = fourvals[last]; + fourheap[last] = 0.0; + fourvals[last] = null; + heapifyDown(reinsert, reinsertv); + } else if (size > 0) { + final double reinsert = twoheap[size]; + final Object reinsertv = twovals[size]; + twoheap[size] = 0.0; + twovals[size] = null; + heapifyDown(reinsert, reinsertv); + } else { + twoheap[0] = 0.0; + twovals[0] = null; + } + ++modCount; + } + + /** + * Invoke heapify-down for the root object. + * + * @param reinsert Object to insert. + * @param val Value to reinsert. + */ + private void heapifyDown(double reinsert, Object val) { + if (size > TWO_HEAP_MAX_SIZE) { + // Special case: 3-ary situation. + final int best = (twoheap[1] <= twoheap[2]) ? 1 : 2; + if (fourheap[0] < twoheap[best]) { + twoheap[0] = fourheap[0]; + twovals[0] = fourvals[0]; + heapifyDown4(0, reinsert, val); + } else { + twoheap[0] = twoheap[best]; + twovals[0] = twovals[best]; + heapifyDown2(best, reinsert, val); + } + return; + } + heapifyDown2(0, reinsert, val); + } + + /** + * Heapify-Down for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + * @param val Value to reinsert. + */ + private void heapifyDown2(int twopos, double cur, Object val) { + final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + while (twopos < stop) { + int bestchild = (twopos << 1) + 1; + double best = twoheap[bestchild]; + final int right = bestchild + 1; + if (right < size && best > twoheap[right]) { + bestchild = right; + best = twoheap[right]; + } + if (cur <= best) { + break; + } + twoheap[twopos] = best; + twovals[twopos] = twovals[bestchild]; + twopos = bestchild; + } + twoheap[twopos] = cur; + twovals[twopos] = val; + } + + /** + * Heapify-Down for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + * @param val Value to reinsert. + */ + private void heapifyDown4(int fourpos, double cur, Object val) { + final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; + while (fourpos < stop) { + final int child = (fourpos << 2) + 1; + double best = fourheap[child]; + int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; + if (size > minsize) { + double nextchild = fourheap[candidate]; + if (best > nextchild) { + bestchild = candidate; + best = nextchild; + } + + minsize += 2; + if (size >= minsize) { + nextchild = fourheap[++candidate]; + if (best > nextchild) { + bestchild = candidate; + best = nextchild; + } + + if (size > minsize) { + nextchild = fourheap[++candidate]; + if (best > nextchild) { + bestchild = candidate; + best = nextchild; + } + } + } + } + if (cur <= best) { + break; + } + fourheap[fourpos] = best; + fourvals[fourpos] = fourvals[bestchild]; + fourpos = bestchild; + } + fourheap[fourpos] = cur; + fourvals[fourpos] = val; + } + + @Override + public double peekKey() { + return twoheap[0]; + } + + @Override + @SuppressWarnings("unchecked") + public V peekValue() { + return (V)twovals[0]; + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append(DoubleObjectMinHeap.class.getSimpleName()).append(" ["); + for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) { + buf.append(iter.getKey()).append(':').append(iter.getValue()).append(','); + } + buf.append(']'); + return buf.toString(); + } + + @Override + public UnsortedIter unsortedIter() { + return new UnsortedIter(); + } + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * Use this class as follows: + * + * <pre> + * {@code + * for (DoubleObjectHeap.UnsortedIter<V> iter = heap.unsortedIter(); iter.valid(); iter.next()) { + * doSomething(iter.get()); + * } + * } + * </pre> + * + * @author Erich Schubert + */ + private class UnsortedIter implements DoubleObjectHeap.UnsortedIter<V> { + /** + * Iterator position. + */ + protected int pos = 0; + + /** + * Modification counter we were initialized at. + */ + protected final int myModCount = modCount; + + @Override + public boolean valid() { + if (modCount != myModCount) { + throw new ConcurrentModificationException(); + } + return pos < size; + } + + @Override + public void advance() { + pos++; + } + + @Override + public double getKey() { + return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + } + + @SuppressWarnings("unchecked") + + @Override + public V getValue() { + return (V)((pos < TWO_HEAP_MAX_SIZE) ? twovals[pos] : fourvals[pos - TWO_HEAP_MAX_SIZE]); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoublePriorityObject.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoublePriorityObject.java index 92d548cb..82453885 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoublePriorityObject.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoublePriorityObject.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/Heap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/Heap.java index 86d3ae08..2c278110 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/Heap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/Heap.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -25,11 +25,6 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; import java.util.Arrays; import java.util.Comparator; -import java.util.ConcurrentModificationException; -import java.util.Iterator; -import java.util.NoSuchElementException; - -import de.lmu.ifi.dbs.elki.math.MathUtil; /** * Basic in-memory heap structure. Closely related to a @@ -45,11 +40,11 @@ import de.lmu.ifi.dbs.elki.math.MathUtil; * @param <E> Element type. Should be {@link java.lang.Comparable} or a * {@link java.util.Comparator} needs to be given. */ -public class Heap<E> implements Iterable<E> { +public class Heap<E> { /** * Heap storage. */ - protected transient Object[] queue; + protected Object[] queue; /** * Current number of objects. @@ -57,11 +52,6 @@ public class Heap<E> implements Iterable<E> { protected int size = 0; /** - * Indicate up to where the heap is valid. - */ - protected int validSize = 0; - - /** * The comparator or {@code null}. */ protected final Comparator<Object> comparator; @@ -69,7 +59,7 @@ public class Heap<E> implements Iterable<E> { /** * (Structural) modification counter. Used to invalidate iterators. */ - private transient int modCount = 0; + private int modCount = 0; /** * Default initial capacity. @@ -126,10 +116,8 @@ public class Heap<E> implements Iterable<E> { resize(size + 1); } // final int pos = size; - this.queue[size] = e; this.size += 1; heapifyUp(size - 1, e); - validSize += 1; heapModified(); } @@ -142,7 +130,6 @@ public class Heap<E> implements Iterable<E> { */ @SuppressWarnings("unchecked") public E replaceTopElement(E e) { - ensureValid(); E oldroot = (E) queue[0]; heapifyDown(0, e); heapModified(); @@ -159,7 +146,6 @@ public class Heap<E> implements Iterable<E> { if (size == 0) { return null; } - ensureValid(); return (E) queue[0]; } @@ -169,70 +155,10 @@ public class Heap<E> implements Iterable<E> { * @return Top element. */ public E poll() { - ensureValid(); return removeAt(0); } /** - * Perform pending heap repair operations in a single bulk operation. - */ - protected void ensureValid() { - if (validSize != size) { - if (size > 1) { - // Bottom up heap update. - if (comparator != null) { - // Parent of first invalid - int nextmin = validSize > 0 ? ((validSize - 1) >>> 1) : 0; - int curmin = MathUtil.nextAllOnesInt(nextmin); // Next line - int nextmax = curmin - 1; // End of valid line - int pos = (size - 2) >>> 1; // Parent of last element - // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin+", "+nextmin); - while (pos >= nextmin) { - // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin); - while (pos >= curmin) { - if (!heapifyDownComparator(pos, queue[pos])) { - final int parent = (pos - 1) >>> 1; - if (parent < curmin) { - nextmin = Math.min(nextmin, parent); - nextmax = Math.max(nextmax, parent); - } - } - pos--; - } - curmin = nextmin; - pos = Math.min(pos, nextmax); - nextmax = -1; - } - } else { - // Parent of first invalid - int nextmin = validSize > 0 ? ((validSize - 1) >>> 1) : 0; - int curmin = MathUtil.nextAllOnesInt(nextmin); // Next line - int nextmax = curmin - 1; // End of valid line - int pos = (size - 2) >>> 1; // Parent of last element - // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin+", "+nextmin); - while (pos >= nextmin) { - // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin); - while (pos >= curmin) { - if (!heapifyDownComparable(pos, queue[pos])) { - final int parent = (pos - 1) >>> 1; - if (parent < curmin) { - nextmin = Math.min(nextmin, parent); - nextmax = Math.max(nextmax, parent); - } - } - pos--; - } - curmin = nextmin; - pos = Math.min(pos, nextmax); - nextmax = -1; - } - } - } - validSize = size; - } - } - - /** * Remove the element at the given position. * * @param pos Element position. @@ -247,16 +173,8 @@ public class Heap<E> implements Iterable<E> { // Replacement object: final Object reinsert = queue[size - 1]; queue[size - 1] = null; - // Keep heap in sync - if (validSize == size) { - size -= 1; - validSize -= 1; - heapifyDown(pos, reinsert); - } else { - size -= 1; - validSize = Math.min(pos >>> 1, validSize); - queue[pos] = reinsert; - } + size--; + heapifyDown(pos, reinsert); heapModified(); return ret; } @@ -367,7 +285,7 @@ public class Heap<E> implements Iterable<E> { pos = cpos; } queue[pos] = cur; - return (pos == ipos); + return (pos != ipos); } /** @@ -405,7 +323,7 @@ public class Heap<E> implements Iterable<E> { pos = min; } queue[pos] = cur; - return (pos == ipos); + return (pos != ipos); } /** @@ -453,15 +371,9 @@ public class Heap<E> implements Iterable<E> { queue[i] = null; } this.size = 0; - this.validSize = -1; heapModified(); } - @Override - public Iterator<E> iterator() { - return new Itr(); - } - /** * Called at the end of each heap modification. */ @@ -470,52 +382,12 @@ public class Heap<E> implements Iterable<E> { } /** - * Iterator over queue elements. No particular order (i.e. heap order!) + * Get an unordered heap iterator. * - * @author Erich Schubert - * - * @apiviz.exclude + * @return Iterator. */ - protected final class Itr implements Iterator<E> { - /** - * Cursor position. - */ - private int cursor = 0; - - /** - * Modification counter this iterator is valid for. - */ - private int expectedModCount = modCount; - - @Override - public boolean hasNext() { - return cursor < size; - } - - @SuppressWarnings("unchecked") - @Override - public E next() { - if (expectedModCount != modCount) { - throw new ConcurrentModificationException(); - } - if (cursor < size) { - return (E) queue[cursor++]; - } - throw new NoSuchElementException(); - } - - @Override - public void remove() { - if (expectedModCount != modCount) { - throw new ConcurrentModificationException(); - } - if (cursor > 0) { - cursor--; - } else { - throw new IllegalStateException(); - } - expectedModCount = modCount; - } + public UnorderedIter unorderedIter() { + return new UnorderedIter(); } /** @@ -526,7 +398,6 @@ public class Heap<E> implements Iterable<E> { * @return {@code null} when the heap is correct */ protected String checkHeap() { - ensureValid(); if (comparator == null) { for (int i = 1; i < size; i++) { final int parent = (i - 1) >>> 1; @@ -546,4 +417,43 @@ public class Heap<E> implements Iterable<E> { } return null; } + + /** + * Heap iterator. + * + * @author Erich Schubert + */ + public class UnorderedIter implements de.lmu.ifi.dbs.elki.utilities.datastructures.iterator.Iter { + /** + * Current iterator position. + */ + int pos = 0; + + /** + * Constructor. + */ + protected UnorderedIter() { + super(); + } + + @Override + public boolean valid() { + return pos < size(); + } + + @Override + public void advance() { + pos++; + } + + /** + * Get the current queue element. + * + * @return Element + */ + @SuppressWarnings("unchecked") + public E get() { + return (E) queue[pos]; + } + } } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerHeap.java index 6203ad96..3235926b 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerHeap.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,53 +23,22 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; along with this program. If not, see <http://www.gnu.org/licenses/>. */ -import java.util.Arrays; - -import de.lmu.ifi.dbs.elki.math.MathUtil; +import de.lmu.ifi.dbs.elki.utilities.datastructures.iterator.Iter; /** - * Basic in-memory heap structure. - * - * This heap is built lazily: if you first add many elements, then poll the - * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log - * n). This is implemented via a simple validTo counter. + * Basic in-memory heap for int values. * * @author Erich Schubert + * + * @apiviz.has UnsortedIter */ -public abstract class IntegerHeap extends AbstractHeap { - /** - * Heap storage: queue - */ - protected transient int[] queue; - - /** - * Constructor with initial capacity. - * - * @param size initial capacity - */ - public IntegerHeap(int size) { - super(); - this.size = 0; - this.queue = new int[size]; - } - +public interface IntegerHeap { /** * Add a key-value pair to the heap * * @param key Key */ - public void add(int key) { - // resize when needed - if (size + 1 > queue.length) { - resize(size + 1); - } - // final int pos = size; - this.queue[size] = key; - this.size += 1; - heapifyUp(size - 1, key); - validSize += 1; - heapModified(); - } + void add(int key); /** * Add a key-value pair to the heap, except if the new element is larger than @@ -78,13 +47,7 @@ public abstract class IntegerHeap extends AbstractHeap { * @param key Key * @param max Maximum size of heap */ - public void add(int key, int max) { - if (size < max) { - add(key); - } else if (comp(key, peek())) { - replaceTopElement(key); - } - } + void add(int key, int max); /** * Combined operation that removes the top element, and inserts a new element @@ -93,172 +56,67 @@ public abstract class IntegerHeap extends AbstractHeap { * @param e New element to insert * @return Previous top element of the heap */ - public int replaceTopElement(int e) { - ensureValid(); - int oldroot = queue[0]; - heapifyDown(0, e); - heapModified(); - return oldroot; - } + int replaceTopElement(int e); /** * Get the current top key * * @return Top key */ - public int peek() { - if (size == 0) { - throw new ArrayIndexOutOfBoundsException("Peek() on an empty heap!"); - } - ensureValid(); - return queue[0]; - } + int peek(); /** * Remove the first element * * @return Top element */ - public int poll() { - return removeAt(0); - } + int poll(); /** - * Repair the heap + * Delete all elements from the heap. */ - protected void ensureValid() { - if (validSize != size) { - if (size > 1) { - // Parent of first invalid - int nextmin = validSize > 0 ? ((validSize - 1) >>> 1) : 0; - int curmin = MathUtil.nextAllOnesInt(nextmin); // Next line - int nextmax = curmin - 1; // End of valid line - int pos = (size - 2) >>> 1; // Parent of last element - // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin+", "+nextmin); - while (pos >= nextmin) { - // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin); - while (pos >= curmin) { - if (!heapifyDown(pos, queue[pos])) { - final int parent = (pos - 1) >>> 1; - if (parent < curmin) { - nextmin = Math.min(nextmin, parent); - nextmax = Math.max(nextmax, parent); - } - } - pos--; - } - curmin = nextmin; - pos = Math.min(pos, nextmax); - nextmax = -1; - } - } - validSize = size; - } - } + void clear(); /** - * Remove the element at the given position. + * Query the size * - * @param pos Element position. - * @return Removed element + * @return Size */ - protected int removeAt(int pos) { - if (pos < 0 || pos >= size) { - return 0; - } - final int top = queue[0]; - // Replacement object: - final int reinkey = queue[size - 1]; - // Keep heap in sync - if (validSize == size) { - size -= 1; - validSize -= 1; - heapifyDown(pos, reinkey); - } else { - size -= 1; - validSize = Math.min(pos >>> 1, validSize); - queue[pos] = reinkey; - } - heapModified(); - return top; - } - + public int size(); + /** - * Execute a "Heapify Upwards" aka "SiftUp". Used in insertions. + * Is the heap empty? * - * @param pos insertion position - * @param curkey Current key + * @return {@code true} when the size is 0. */ - protected void heapifyUp(int pos, int curkey) { - while (pos > 0) { - final int parent = (pos - 1) >>> 1; - int parkey = queue[parent]; - - if (comp(curkey, parkey)) { // Compare - break; - } - queue[pos] = parkey; - pos = parent; - } - queue[pos] = curkey; - } + public boolean isEmpty(); /** - * Execute a "Heapify Downwards" aka "SiftDown". Used in deletions. + * Get an unsorted iterator to inspect the heap. * - * @param ipos re-insertion position - * @param curkey Current key - * @return true when the order was changed + * @return Iterator */ - protected boolean heapifyDown(final int ipos, int curkey) { - int pos = ipos; - final int half = size >>> 1; - while (pos < half) { - // Get left child (must exist!) - int cpos = (pos << 1) + 1; - int chikey = queue[cpos]; - // Test right child, if present - final int rchild = cpos + 1; - if (rchild < size) { - int right = queue[rchild]; - if (comp(chikey, right)) { // Compare - cpos = rchild; - chikey = right; - } - } - - if (comp(chikey, curkey)) { // Compare - break; - } - queue[pos] = chikey; - pos = cpos; - } - queue[pos] = curkey; - return (pos == ipos); - } + UnsortedIter unsortedIter(); /** - * Test whether we need to resize to have the requested capacity. + * Unsorted iterator - in heap order. Does not poll the heap. * - * @param requiredSize required capacity - */ - protected final void resize(int requiredSize) { - queue = Arrays.copyOf(queue, desiredSize(requiredSize, queue.length)); - } - - /** - * Delete all elements from the heap. + * <pre> + * {@code + * for (IntegerHeap.UnsortedIter iter = heap.unsortedIter(); iter.valid(); iter.next()) { + * doSomething(iter.get()); + * } + * } + * </pre> + * + * @author Erich Schubert */ - @Override - public void clear() { - super.clear(); - for (int i = 0; i < size; i++) { - queue[i] = 0; - } + public static interface UnsortedIter extends Iter { + /** + * Get the iterators current object. + * + * @return Current object + */ + int get(); } - - /** - * Compare two objects - */ - abstract protected boolean comp(int o1, int o2); } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMaxHeap.java index 383eb727..60f61d99 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMaxHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMaxHeap.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,40 +23,400 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; along with this program. If not, see <http://www.gnu.org/licenses/>. */ +import java.util.Arrays; +import java.util.ConcurrentModificationException; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + /** - * Basic in-memory heap structure. + * Advanced priority queue class, based on a binary heap (for small sizes), + * which will for larger heaps be accompanied by a 4-ary heap (attached below + * the root of the two-ary heap, making the root actually 3-ary). + * + * This code was automatically instantiated for the type: Integer * - * This heap is built lazily: if you first add many elements, then poll the - * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log - * n). This is implemented via a simple validTo counter. + * This combination was found to work quite well in benchmarks, but YMMV. + * + * Some other observations from benchmarking: + * <ul> + * <li>Bulk loading did not improve things</li> + * <li>Primitive heaps are substantially faster.</li> + * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and + * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> + * <li>Workload makes a huge difference. A load-once, poll-until-empty priority + * queue is something different than e.g. a top-k heap, which will see a lot of + * top element replacements.</li> + * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for + * top-k make a difference.</li> + * <li>Different day, different benchmark results ...</li> + * </ul> * * @author Erich Schubert + * + * @apiviz.has UnsortedIter */ -public class IntegerMaxHeap extends IntegerHeap { +public class IntegerMaxHeap implements IntegerHeap { + /** + * Base heap. + */ + protected int[] twoheap; + + /** + * Extension heap. + */ + protected int[] fourheap; + + /** + * Current size of heap. + */ + protected int size; + + /** + * (Structural) modification counter. Used to invalidate iterators. + */ + protected int modCount = 0; + + /** + * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. + */ + private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; + /** - * Constructor with default capacity. + * Initial size of the 2-ary heap. + */ + private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; + + /** + * Initial size of 4-ary heap when initialized. + * + * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 + * + * 85 = 4-ary heap of height 3: 21 + 4*4*4 + * + * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 + * + * Since we last grew by 255 (to 511), let's use 341. + */ + private final static int FOUR_HEAP_INITIAL_SIZE = 341; + + /** + * Constructor, with default size. */ public IntegerMaxHeap() { - super(DEFAULT_INITIAL_CAPACITY); + super(); + int[] twoheap = new int[TWO_HEAP_INITIAL_SIZE]; + + this.twoheap = twoheap; + this.fourheap = null; + this.size = 0; + this.modCount = 0; } /** - * Constructor with initial capacity. + * Constructor, with given minimum size. * - * @param size initial capacity + * @param minsize Minimum size */ - public IntegerMaxHeap(int size) { - super(size); + public IntegerMaxHeap(int minsize) { + super(); + if (minsize < TWO_HEAP_MAX_SIZE) { + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + int[] twoheap = new int[size]; + + this.twoheap = twoheap; + this.fourheap = null; + } else { + int[] twoheap = new int[TWO_HEAP_INITIAL_SIZE]; + int[] fourheap = new int[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + this.twoheap = twoheap; + this.fourheap = fourheap; + } + this.size = 0; + this.modCount = 0; + } + + @Override + public void clear() { + size = 0; + ++modCount; + fourheap = null; + Arrays.fill(twoheap, 0); + } + + @Override + public int size() { + return size; + } + + @Override + public boolean isEmpty() { + return (size == 0); + } + + @Override + public void add(int o) { + final int co = o; + // System.err.println("Add: " + o); + if (size < TWO_HEAP_MAX_SIZE) { + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); + } + final int twopos = size; + twoheap[twopos] = co; + ++size; + heapifyUp2(twopos, co); + ++modCount; + } else { + final int fourpos = size - TWO_HEAP_MAX_SIZE; + if (fourheap == null) { + fourheap = new int[FOUR_HEAP_INITIAL_SIZE]; + } else if (fourpos >= fourheap.length) { + // Grow extension heap by half. + fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); + } + fourheap[fourpos] = co; + ++size; + heapifyUp4(fourpos, co); + ++modCount; + } + } + + @Override + public void add(int key, int max) { + if (size < max) { + add(key); + } else if (twoheap[0] >= key) { + replaceTopElement(key); + } + } + + @Override + public int replaceTopElement(int reinsert) { + final int ret = twoheap[0]; + heapifyDown( reinsert); + ++modCount; + return ret; + } + + /** + * Heapify-Up method for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + */ + private void heapifyUp2(int twopos, int cur) { + while (twopos > 0) { + final int parent = (twopos - 1) >>> 1; + int par = twoheap[parent]; + if (cur <= par) { + break; + } + twoheap[twopos] = par; + twopos = parent; + } + twoheap[twopos] = cur; + } + + /** + * Heapify-Up method for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + */ + private void heapifyUp4(int fourpos, int cur) { + while (fourpos > 0) { + final int parent = (fourpos - 1) >> 2; + int par = fourheap[parent]; + if (cur <= par) { + break; + } + fourheap[fourpos] = par; + fourpos = parent; + } + if (fourpos == 0 && twoheap[0] < cur) { + fourheap[0] = twoheap[0]; + twoheap[0] = cur; + } else { + fourheap[fourpos] = cur; + } + } + + @Override + public int poll() { + final int ret = twoheap[0]; + --size; + // Replacement object: + if (size >= TWO_HEAP_MAX_SIZE) { + final int last = size - TWO_HEAP_MAX_SIZE; + final int reinsert = fourheap[last]; + fourheap[last] = 0; + heapifyDown(reinsert); + } else if (size > 0) { + final int reinsert = twoheap[size]; + twoheap[size] = 0; + heapifyDown(reinsert); + } else { + twoheap[0] = 0; + } + ++modCount; + return ret; + } + + /** + * Invoke heapify-down for the root object. + * + * @param reinsert Object to insert. + */ + private void heapifyDown(int reinsert) { + if (size > TWO_HEAP_MAX_SIZE) { + // Special case: 3-ary situation. + final int best = (twoheap[1] >= twoheap[2]) ? 1 : 2; + if (fourheap[0] > twoheap[best]) { + twoheap[0] = fourheap[0]; + heapifyDown4(0, reinsert); + } else { + twoheap[0] = twoheap[best]; + heapifyDown2(best, reinsert); + } + return; + } + heapifyDown2(0, reinsert); + } + + /** + * Heapify-Down for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + */ + private void heapifyDown2(int twopos, int cur) { + final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + while (twopos < stop) { + int bestchild = (twopos << 1) + 1; + int best = twoheap[bestchild]; + final int right = bestchild + 1; + if (right < size && best < twoheap[right]) { + bestchild = right; + best = twoheap[right]; + } + if (cur >= best) { + break; + } + twoheap[twopos] = best; + twopos = bestchild; + } + twoheap[twopos] = cur; } /** - * Compare two objects + * Heapify-Down for 4-ary heap. * - * @param o1 First object - * @param o2 Second object + * @param fourpos Position in 4-ary heap. + * @param cur Current object */ + private void heapifyDown4(int fourpos, int cur) { + final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; + while (fourpos < stop) { + final int child = (fourpos << 2) + 1; + int best = fourheap[child]; + int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; + if (size > minsize) { + int nextchild = fourheap[candidate]; + if (best < nextchild) { + bestchild = candidate; + best = nextchild; + } + + minsize += 2; + if (size >= minsize) { + nextchild = fourheap[++candidate]; + if (best < nextchild) { + bestchild = candidate; + best = nextchild; + } + + if (size > minsize) { + nextchild = fourheap[++candidate]; + if (best < nextchild) { + bestchild = candidate; + best = nextchild; + } + } + } + } + if (cur >= best) { + break; + } + fourheap[fourpos] = best; + fourpos = bestchild; + } + fourheap[fourpos] = cur; + } + + @Override + public int peek() { + return twoheap[0]; + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append(IntegerMaxHeap.class.getSimpleName()).append(" ["); + for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) { + buf.append(iter.get()).append(','); + } + buf.append(']'); + return buf.toString(); + } + @Override - protected boolean comp(int o1, int o2) { - return o1 < o2; + public UnsortedIter unsortedIter() { + return new UnsortedIter(); + } + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * Use this class as follows: + * + * <pre> + * {@code + * for (IntegerHeap.UnsortedIter iter = heap.unsortedIter(); iter.valid(); iter.next()) { + * doSomething(iter.get()); + * } + * } + * </pre> + * + * @author Erich Schubert + */ + private class UnsortedIter implements IntegerHeap.UnsortedIter { + /** + * Iterator position. + */ + protected int pos = 0; + + /** + * Modification counter we were initialized at. + */ + protected final int myModCount = modCount; + + @Override + public boolean valid() { + if (modCount != myModCount) { + throw new ConcurrentModificationException(); + } + return pos < size; + } + + @Override + public void advance() { + pos++; + } + + @Override + public int get() { + return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + } } } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMinHeap.java index f81fe275..c352ece4 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMinHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMinHeap.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,40 +23,400 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; along with this program. If not, see <http://www.gnu.org/licenses/>. */ +import java.util.Arrays; +import java.util.ConcurrentModificationException; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + /** - * Basic in-memory heap structure. + * Advanced priority queue class, based on a binary heap (for small sizes), + * which will for larger heaps be accompanied by a 4-ary heap (attached below + * the root of the two-ary heap, making the root actually 3-ary). + * + * This code was automatically instantiated for the type: Integer * - * This heap is built lazily: if you first add many elements, then poll the - * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log - * n). This is implemented via a simple validTo counter. + * This combination was found to work quite well in benchmarks, but YMMV. + * + * Some other observations from benchmarking: + * <ul> + * <li>Bulk loading did not improve things</li> + * <li>Primitive heaps are substantially faster.</li> + * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and + * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> + * <li>Workload makes a huge difference. A load-once, poll-until-empty priority + * queue is something different than e.g. a top-k heap, which will see a lot of + * top element replacements.</li> + * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for + * top-k make a difference.</li> + * <li>Different day, different benchmark results ...</li> + * </ul> * * @author Erich Schubert + * + * @apiviz.has UnsortedIter */ -public class IntegerMinHeap extends IntegerHeap { +public class IntegerMinHeap implements IntegerHeap { + /** + * Base heap. + */ + protected int[] twoheap; + + /** + * Extension heap. + */ + protected int[] fourheap; + + /** + * Current size of heap. + */ + protected int size; + + /** + * (Structural) modification counter. Used to invalidate iterators. + */ + protected int modCount = 0; + + /** + * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. + */ + private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; + /** - * Constructor with default capacity. + * Initial size of the 2-ary heap. + */ + private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; + + /** + * Initial size of 4-ary heap when initialized. + * + * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 + * + * 85 = 4-ary heap of height 3: 21 + 4*4*4 + * + * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 + * + * Since we last grew by 255 (to 511), let's use 341. + */ + private final static int FOUR_HEAP_INITIAL_SIZE = 341; + + /** + * Constructor, with default size. */ public IntegerMinHeap() { - super(DEFAULT_INITIAL_CAPACITY); + super(); + int[] twoheap = new int[TWO_HEAP_INITIAL_SIZE]; + + this.twoheap = twoheap; + this.fourheap = null; + this.size = 0; + this.modCount = 0; } /** - * Constructor with initial capacity. + * Constructor, with given minimum size. * - * @param size initial capacity + * @param minsize Minimum size */ - public IntegerMinHeap(int size) { - super(size); + public IntegerMinHeap(int minsize) { + super(); + if (minsize < TWO_HEAP_MAX_SIZE) { + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + int[] twoheap = new int[size]; + + this.twoheap = twoheap; + this.fourheap = null; + } else { + int[] twoheap = new int[TWO_HEAP_INITIAL_SIZE]; + int[] fourheap = new int[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + this.twoheap = twoheap; + this.fourheap = fourheap; + } + this.size = 0; + this.modCount = 0; + } + + @Override + public void clear() { + size = 0; + ++modCount; + fourheap = null; + Arrays.fill(twoheap, 0); + } + + @Override + public int size() { + return size; + } + + @Override + public boolean isEmpty() { + return (size == 0); + } + + @Override + public void add(int o) { + final int co = o; + // System.err.println("Add: " + o); + if (size < TWO_HEAP_MAX_SIZE) { + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); + } + final int twopos = size; + twoheap[twopos] = co; + ++size; + heapifyUp2(twopos, co); + ++modCount; + } else { + final int fourpos = size - TWO_HEAP_MAX_SIZE; + if (fourheap == null) { + fourheap = new int[FOUR_HEAP_INITIAL_SIZE]; + } else if (fourpos >= fourheap.length) { + // Grow extension heap by half. + fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); + } + fourheap[fourpos] = co; + ++size; + heapifyUp4(fourpos, co); + ++modCount; + } + } + + @Override + public void add(int key, int max) { + if (size < max) { + add(key); + } else if (twoheap[0] <= key) { + replaceTopElement(key); + } + } + + @Override + public int replaceTopElement(int reinsert) { + final int ret = twoheap[0]; + heapifyDown( reinsert); + ++modCount; + return ret; + } + + /** + * Heapify-Up method for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + */ + private void heapifyUp2(int twopos, int cur) { + while (twopos > 0) { + final int parent = (twopos - 1) >>> 1; + int par = twoheap[parent]; + if (cur >= par) { + break; + } + twoheap[twopos] = par; + twopos = parent; + } + twoheap[twopos] = cur; + } + + /** + * Heapify-Up method for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + */ + private void heapifyUp4(int fourpos, int cur) { + while (fourpos > 0) { + final int parent = (fourpos - 1) >> 2; + int par = fourheap[parent]; + if (cur >= par) { + break; + } + fourheap[fourpos] = par; + fourpos = parent; + } + if (fourpos == 0 && twoheap[0] > cur) { + fourheap[0] = twoheap[0]; + twoheap[0] = cur; + } else { + fourheap[fourpos] = cur; + } + } + + @Override + public int poll() { + final int ret = twoheap[0]; + --size; + // Replacement object: + if (size >= TWO_HEAP_MAX_SIZE) { + final int last = size - TWO_HEAP_MAX_SIZE; + final int reinsert = fourheap[last]; + fourheap[last] = 0; + heapifyDown(reinsert); + } else if (size > 0) { + final int reinsert = twoheap[size]; + twoheap[size] = 0; + heapifyDown(reinsert); + } else { + twoheap[0] = 0; + } + ++modCount; + return ret; + } + + /** + * Invoke heapify-down for the root object. + * + * @param reinsert Object to insert. + */ + private void heapifyDown(int reinsert) { + if (size > TWO_HEAP_MAX_SIZE) { + // Special case: 3-ary situation. + final int best = (twoheap[1] <= twoheap[2]) ? 1 : 2; + if (fourheap[0] < twoheap[best]) { + twoheap[0] = fourheap[0]; + heapifyDown4(0, reinsert); + } else { + twoheap[0] = twoheap[best]; + heapifyDown2(best, reinsert); + } + return; + } + heapifyDown2(0, reinsert); + } + + /** + * Heapify-Down for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + */ + private void heapifyDown2(int twopos, int cur) { + final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + while (twopos < stop) { + int bestchild = (twopos << 1) + 1; + int best = twoheap[bestchild]; + final int right = bestchild + 1; + if (right < size && best > twoheap[right]) { + bestchild = right; + best = twoheap[right]; + } + if (cur <= best) { + break; + } + twoheap[twopos] = best; + twopos = bestchild; + } + twoheap[twopos] = cur; } /** - * Compare two objects + * Heapify-Down for 4-ary heap. * - * @param o1 First object - * @param o2 Second object + * @param fourpos Position in 4-ary heap. + * @param cur Current object */ + private void heapifyDown4(int fourpos, int cur) { + final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; + while (fourpos < stop) { + final int child = (fourpos << 2) + 1; + int best = fourheap[child]; + int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; + if (size > minsize) { + int nextchild = fourheap[candidate]; + if (best > nextchild) { + bestchild = candidate; + best = nextchild; + } + + minsize += 2; + if (size >= minsize) { + nextchild = fourheap[++candidate]; + if (best > nextchild) { + bestchild = candidate; + best = nextchild; + } + + if (size > minsize) { + nextchild = fourheap[++candidate]; + if (best > nextchild) { + bestchild = candidate; + best = nextchild; + } + } + } + } + if (cur <= best) { + break; + } + fourheap[fourpos] = best; + fourpos = bestchild; + } + fourheap[fourpos] = cur; + } + + @Override + public int peek() { + return twoheap[0]; + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append(IntegerMinHeap.class.getSimpleName()).append(" ["); + for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) { + buf.append(iter.get()).append(','); + } + buf.append(']'); + return buf.toString(); + } + @Override - protected boolean comp(int o1, int o2) { - return o1 > o2; + public UnsortedIter unsortedIter() { + return new UnsortedIter(); + } + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * Use this class as follows: + * + * <pre> + * {@code + * for (IntegerHeap.UnsortedIter iter = heap.unsortedIter(); iter.valid(); iter.next()) { + * doSomething(iter.get()); + * } + * } + * </pre> + * + * @author Erich Schubert + */ + private class UnsortedIter implements IntegerHeap.UnsortedIter { + /** + * Iterator position. + */ + protected int pos = 0; + + /** + * Modification counter we were initialized at. + */ + protected final int myModCount = modCount; + + @Override + public boolean valid() { + if (modCount != myModCount) { + throw new ConcurrentModificationException(); + } + return pos < size; + } + + @Override + public void advance() { + pos++; + } + + @Override + public int get() { + return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + } } } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectHeap.java new file mode 100644 index 00000000..01f7aea0 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectHeap.java @@ -0,0 +1,129 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.utilities.datastructures.iterator.Iter; + +/** + * Basic in-memory heap interface, for int keys and V values. + * + * @author Erich Schubert + * + * @apiviz.has UnsortedIter + * @param <V> Value type + */ +public interface IntegerObjectHeap<V> { + /** + * Add a key-value pair to the heap + * + * @param key Key + * @param val Value + */ + void add(int key, V val); + + /** + * Add a key-value pair to the heap if it improves the top. + * + * @param key Key + * @param val Value + * @param k Desired maximum size + */ + void add(int key, V val, int k); + + /** + * Combined operation that removes the top element, and inserts a new element + * instead. + * + * @param key Key of new element + * @param val Value of new element + */ + void replaceTopElement(int key, V val); + + /** + * Get the current top key + * + * @return Top key + */ + int peekKey(); + + /** + * Get the current top value + * + * @return Value + */ + V peekValue(); + + /** + * Remove the first element + */ + void poll(); + + /** + * Clear the heap contents. + */ + void clear(); + + /** + * Query the size + * + * @return Size + */ + public int size(); + + /** + * Is the heap empty? + * + * @return {@code true} when the size is 0. + */ + public boolean isEmpty(); + + /** + * Get an unsorted iterator to inspect the heap. + * + * @return Iterator + */ + UnsortedIter<V> unsortedIter(); + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * @author Erich Schubert + * @param <V> Value type + */ + public static interface UnsortedIter<V> extends Iter { + /** + * Get the current key + * + * @return Current key + */ + int getKey(); + + /** + * Get the current value + * + * @return Current value + */ + V getValue(); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectMaxHeap.java new file mode 100644 index 00000000..93a4e75a --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectMaxHeap.java @@ -0,0 +1,482 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Arrays; +import java.util.ConcurrentModificationException; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + +/** + * Advanced priority queue class, based on a binary heap (for small sizes), + * which will for larger heaps be accompanied by a 4-ary heap (attached below + * the root of the two-ary heap, making the root actually 3-ary). + * + * This code was automatically instantiated for the types: Integer and Object + * + * This combination was found to work quite well in benchmarks, but YMMV. + * + * Some other observations from benchmarking: + * <ul> + * <li>Bulk loading did not improve things</li> + * <li>Primitive heaps are substantially faster.</li> + * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and + * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> + * <li>Workload makes a huge difference. A load-once, poll-until-empty priority + * queue is something different than e.g. a top-k heap, which will see a lot of + * top element replacements.</li> + * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for + * top-k make a difference.</li> + * <li>Different day, different benchmark results ...</li> + * </ul> + * + * @author Erich Schubert + * + * @apiviz.has UnsortedIter + * @param <V> Value type + */ +public class IntegerObjectMaxHeap<V> implements IntegerObjectHeap<V> { + /** + * Base heap. + */ + protected int[] twoheap; + + /** + * Base heap values. + */ + protected Object[] twovals; + + /** + * Extension heap. + */ + protected int[] fourheap; + + /** + * Extension heapvalues. + */ + protected Object[] fourvals; + + /** + * Current size of heap. + */ + protected int size; + + /** + * (Structural) modification counter. Used to invalidate iterators. + */ + protected int modCount = 0; + + /** + * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. + */ + private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; + + /** + * Initial size of the 2-ary heap. + */ + private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; + + /** + * Initial size of 4-ary heap when initialized. + * + * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 + * + * 85 = 4-ary heap of height 3: 21 + 4*4*4 + * + * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 + * + * Since we last grew by 255 (to 511), let's use 341. + */ + private final static int FOUR_HEAP_INITIAL_SIZE = 341; + + /** + * Constructor, with default size. + */ + public IntegerObjectMaxHeap() { + super(); + int[] twoheap = new int[TWO_HEAP_INITIAL_SIZE]; + Object[] twovals = new Object[TWO_HEAP_INITIAL_SIZE]; + + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = null; + this.fourvals = null; + this.size = 0; + this.modCount = 0; + } + + /** + * Constructor, with given minimum size. + * + * @param minsize Minimum size + */ + public IntegerObjectMaxHeap(int minsize) { + super(); + if (minsize < TWO_HEAP_MAX_SIZE) { + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + int[] twoheap = new int[size]; + Object[] twovals = new Object[size]; + + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = null; + this.fourvals = null; + } else { + int[] twoheap = new int[TWO_HEAP_INITIAL_SIZE]; + Object[] twovals = new Object[TWO_HEAP_INITIAL_SIZE]; + int[] fourheap = new int[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + Object[] fourvals = new Object[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = fourheap; + this.fourvals = fourvals; + } + this.size = 0; + this.modCount = 0; + } + + @Override + public void clear() { + size = 0; + ++modCount; + fourheap = null; + fourvals = null; + Arrays.fill(twoheap, 0); + Arrays.fill(twovals, null); + } + + @Override + public int size() { + return size; + } + + @Override + public boolean isEmpty() { + return (size == 0); + } + + @Override + public void add(int o, V v) { + final int co = o; + final Object cv = (Object)v; + // System.err.println("Add: " + o); + if (size < TWO_HEAP_MAX_SIZE) { + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); + twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1); + } + final int twopos = size; + twoheap[twopos] = co; + twovals[twopos] = cv; + ++size; + heapifyUp2(twopos, co, cv); + ++modCount; + } else { + final int fourpos = size - TWO_HEAP_MAX_SIZE; + if (fourheap == null) { + fourheap = new int[FOUR_HEAP_INITIAL_SIZE]; + fourvals = new Object[FOUR_HEAP_INITIAL_SIZE]; + } else if (fourpos >= fourheap.length) { + // Grow extension heap by half. + fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); + fourvals = Arrays.copyOf(fourvals, fourvals.length + (fourvals.length >> 1)); + } + fourheap[fourpos] = co; + fourvals[fourpos] = cv; + ++size; + heapifyUp4(fourpos, co, cv); + ++modCount; + } + } + + @Override + public void add(int key, V val, int max) { + if (size < max) { + add(key, val); + } else if (twoheap[0] >= key) { + replaceTopElement(key, val); + } + } + + @Override + public void replaceTopElement(int reinsert, V val) { + heapifyDown(reinsert, (Object)val); + ++modCount; + } + + /** + * Heapify-Up method for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + * @param val Current value + */ + private void heapifyUp2(int twopos, int cur, Object val) { + while (twopos > 0) { + final int parent = (twopos - 1) >>> 1; + int par = twoheap[parent]; + if (cur <= par) { + break; + } + twoheap[twopos] = par; + twovals[twopos] = twovals[parent]; + twopos = parent; + } + twoheap[twopos] = cur; + twovals[twopos] = val; + } + + /** + * Heapify-Up method for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + * @param val Current value + */ + private void heapifyUp4(int fourpos, int cur, Object val) { + while (fourpos > 0) { + final int parent = (fourpos - 1) >> 2; + int par = fourheap[parent]; + if (cur <= par) { + break; + } + fourheap[fourpos] = par; + fourvals[fourpos] = fourvals[parent]; + fourpos = parent; + } + if (fourpos == 0 && twoheap[0] < cur) { + fourheap[0] = twoheap[0]; + fourvals[0] = twovals[0]; + twoheap[0] = cur; + twovals[0] = val; + } else { + fourheap[fourpos] = cur; + fourvals[fourpos] = val; + } + } + + @Override + public void poll() { + --size; + // Replacement object: + if (size >= TWO_HEAP_MAX_SIZE) { + final int last = size - TWO_HEAP_MAX_SIZE; + final int reinsert = fourheap[last]; + final Object reinsertv = fourvals[last]; + fourheap[last] = 0; + fourvals[last] = null; + heapifyDown(reinsert, reinsertv); + } else if (size > 0) { + final int reinsert = twoheap[size]; + final Object reinsertv = twovals[size]; + twoheap[size] = 0; + twovals[size] = null; + heapifyDown(reinsert, reinsertv); + } else { + twoheap[0] = 0; + twovals[0] = null; + } + ++modCount; + } + + /** + * Invoke heapify-down for the root object. + * + * @param reinsert Object to insert. + * @param val Value to reinsert. + */ + private void heapifyDown(int reinsert, Object val) { + if (size > TWO_HEAP_MAX_SIZE) { + // Special case: 3-ary situation. + final int best = (twoheap[1] >= twoheap[2]) ? 1 : 2; + if (fourheap[0] > twoheap[best]) { + twoheap[0] = fourheap[0]; + twovals[0] = fourvals[0]; + heapifyDown4(0, reinsert, val); + } else { + twoheap[0] = twoheap[best]; + twovals[0] = twovals[best]; + heapifyDown2(best, reinsert, val); + } + return; + } + heapifyDown2(0, reinsert, val); + } + + /** + * Heapify-Down for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + * @param val Value to reinsert. + */ + private void heapifyDown2(int twopos, int cur, Object val) { + final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + while (twopos < stop) { + int bestchild = (twopos << 1) + 1; + int best = twoheap[bestchild]; + final int right = bestchild + 1; + if (right < size && best < twoheap[right]) { + bestchild = right; + best = twoheap[right]; + } + if (cur >= best) { + break; + } + twoheap[twopos] = best; + twovals[twopos] = twovals[bestchild]; + twopos = bestchild; + } + twoheap[twopos] = cur; + twovals[twopos] = val; + } + + /** + * Heapify-Down for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + * @param val Value to reinsert. + */ + private void heapifyDown4(int fourpos, int cur, Object val) { + final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; + while (fourpos < stop) { + final int child = (fourpos << 2) + 1; + int best = fourheap[child]; + int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; + if (size > minsize) { + int nextchild = fourheap[candidate]; + if (best < nextchild) { + bestchild = candidate; + best = nextchild; + } + + minsize += 2; + if (size >= minsize) { + nextchild = fourheap[++candidate]; + if (best < nextchild) { + bestchild = candidate; + best = nextchild; + } + + if (size > minsize) { + nextchild = fourheap[++candidate]; + if (best < nextchild) { + bestchild = candidate; + best = nextchild; + } + } + } + } + if (cur >= best) { + break; + } + fourheap[fourpos] = best; + fourvals[fourpos] = fourvals[bestchild]; + fourpos = bestchild; + } + fourheap[fourpos] = cur; + fourvals[fourpos] = val; + } + + @Override + public int peekKey() { + return twoheap[0]; + } + + @Override + @SuppressWarnings("unchecked") + public V peekValue() { + return (V)twovals[0]; + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append(IntegerObjectMaxHeap.class.getSimpleName()).append(" ["); + for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) { + buf.append(iter.getKey()).append(':').append(iter.getValue()).append(','); + } + buf.append(']'); + return buf.toString(); + } + + @Override + public UnsortedIter unsortedIter() { + return new UnsortedIter(); + } + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * Use this class as follows: + * + * <pre> + * {@code + * for (IntegerObjectHeap.UnsortedIter<V> iter = heap.unsortedIter(); iter.valid(); iter.next()) { + * doSomething(iter.get()); + * } + * } + * </pre> + * + * @author Erich Schubert + */ + private class UnsortedIter implements IntegerObjectHeap.UnsortedIter<V> { + /** + * Iterator position. + */ + protected int pos = 0; + + /** + * Modification counter we were initialized at. + */ + protected final int myModCount = modCount; + + @Override + public boolean valid() { + if (modCount != myModCount) { + throw new ConcurrentModificationException(); + } + return pos < size; + } + + @Override + public void advance() { + pos++; + } + + @Override + public int getKey() { + return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + } + + @SuppressWarnings("unchecked") + + @Override + public V getValue() { + return (V)((pos < TWO_HEAP_MAX_SIZE) ? twovals[pos] : fourvals[pos - TWO_HEAP_MAX_SIZE]); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectMinHeap.java new file mode 100644 index 00000000..e54c7d28 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectMinHeap.java @@ -0,0 +1,482 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Arrays; +import java.util.ConcurrentModificationException; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + +/** + * Advanced priority queue class, based on a binary heap (for small sizes), + * which will for larger heaps be accompanied by a 4-ary heap (attached below + * the root of the two-ary heap, making the root actually 3-ary). + * + * This code was automatically instantiated for the types: Integer and Object + * + * This combination was found to work quite well in benchmarks, but YMMV. + * + * Some other observations from benchmarking: + * <ul> + * <li>Bulk loading did not improve things</li> + * <li>Primitive heaps are substantially faster.</li> + * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and + * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> + * <li>Workload makes a huge difference. A load-once, poll-until-empty priority + * queue is something different than e.g. a top-k heap, which will see a lot of + * top element replacements.</li> + * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for + * top-k make a difference.</li> + * <li>Different day, different benchmark results ...</li> + * </ul> + * + * @author Erich Schubert + * + * @apiviz.has UnsortedIter + * @param <V> Value type + */ +public class IntegerObjectMinHeap<V> implements IntegerObjectHeap<V> { + /** + * Base heap. + */ + protected int[] twoheap; + + /** + * Base heap values. + */ + protected Object[] twovals; + + /** + * Extension heap. + */ + protected int[] fourheap; + + /** + * Extension heapvalues. + */ + protected Object[] fourvals; + + /** + * Current size of heap. + */ + protected int size; + + /** + * (Structural) modification counter. Used to invalidate iterators. + */ + protected int modCount = 0; + + /** + * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. + */ + private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; + + /** + * Initial size of the 2-ary heap. + */ + private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; + + /** + * Initial size of 4-ary heap when initialized. + * + * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 + * + * 85 = 4-ary heap of height 3: 21 + 4*4*4 + * + * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 + * + * Since we last grew by 255 (to 511), let's use 341. + */ + private final static int FOUR_HEAP_INITIAL_SIZE = 341; + + /** + * Constructor, with default size. + */ + public IntegerObjectMinHeap() { + super(); + int[] twoheap = new int[TWO_HEAP_INITIAL_SIZE]; + Object[] twovals = new Object[TWO_HEAP_INITIAL_SIZE]; + + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = null; + this.fourvals = null; + this.size = 0; + this.modCount = 0; + } + + /** + * Constructor, with given minimum size. + * + * @param minsize Minimum size + */ + public IntegerObjectMinHeap(int minsize) { + super(); + if (minsize < TWO_HEAP_MAX_SIZE) { + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + int[] twoheap = new int[size]; + Object[] twovals = new Object[size]; + + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = null; + this.fourvals = null; + } else { + int[] twoheap = new int[TWO_HEAP_INITIAL_SIZE]; + Object[] twovals = new Object[TWO_HEAP_INITIAL_SIZE]; + int[] fourheap = new int[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + Object[] fourvals = new Object[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = fourheap; + this.fourvals = fourvals; + } + this.size = 0; + this.modCount = 0; + } + + @Override + public void clear() { + size = 0; + ++modCount; + fourheap = null; + fourvals = null; + Arrays.fill(twoheap, 0); + Arrays.fill(twovals, null); + } + + @Override + public int size() { + return size; + } + + @Override + public boolean isEmpty() { + return (size == 0); + } + + @Override + public void add(int o, V v) { + final int co = o; + final Object cv = (Object)v; + // System.err.println("Add: " + o); + if (size < TWO_HEAP_MAX_SIZE) { + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); + twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1); + } + final int twopos = size; + twoheap[twopos] = co; + twovals[twopos] = cv; + ++size; + heapifyUp2(twopos, co, cv); + ++modCount; + } else { + final int fourpos = size - TWO_HEAP_MAX_SIZE; + if (fourheap == null) { + fourheap = new int[FOUR_HEAP_INITIAL_SIZE]; + fourvals = new Object[FOUR_HEAP_INITIAL_SIZE]; + } else if (fourpos >= fourheap.length) { + // Grow extension heap by half. + fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); + fourvals = Arrays.copyOf(fourvals, fourvals.length + (fourvals.length >> 1)); + } + fourheap[fourpos] = co; + fourvals[fourpos] = cv; + ++size; + heapifyUp4(fourpos, co, cv); + ++modCount; + } + } + + @Override + public void add(int key, V val, int max) { + if (size < max) { + add(key, val); + } else if (twoheap[0] <= key) { + replaceTopElement(key, val); + } + } + + @Override + public void replaceTopElement(int reinsert, V val) { + heapifyDown(reinsert, (Object)val); + ++modCount; + } + + /** + * Heapify-Up method for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + * @param val Current value + */ + private void heapifyUp2(int twopos, int cur, Object val) { + while (twopos > 0) { + final int parent = (twopos - 1) >>> 1; + int par = twoheap[parent]; + if (cur >= par) { + break; + } + twoheap[twopos] = par; + twovals[twopos] = twovals[parent]; + twopos = parent; + } + twoheap[twopos] = cur; + twovals[twopos] = val; + } + + /** + * Heapify-Up method for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + * @param val Current value + */ + private void heapifyUp4(int fourpos, int cur, Object val) { + while (fourpos > 0) { + final int parent = (fourpos - 1) >> 2; + int par = fourheap[parent]; + if (cur >= par) { + break; + } + fourheap[fourpos] = par; + fourvals[fourpos] = fourvals[parent]; + fourpos = parent; + } + if (fourpos == 0 && twoheap[0] > cur) { + fourheap[0] = twoheap[0]; + fourvals[0] = twovals[0]; + twoheap[0] = cur; + twovals[0] = val; + } else { + fourheap[fourpos] = cur; + fourvals[fourpos] = val; + } + } + + @Override + public void poll() { + --size; + // Replacement object: + if (size >= TWO_HEAP_MAX_SIZE) { + final int last = size - TWO_HEAP_MAX_SIZE; + final int reinsert = fourheap[last]; + final Object reinsertv = fourvals[last]; + fourheap[last] = 0; + fourvals[last] = null; + heapifyDown(reinsert, reinsertv); + } else if (size > 0) { + final int reinsert = twoheap[size]; + final Object reinsertv = twovals[size]; + twoheap[size] = 0; + twovals[size] = null; + heapifyDown(reinsert, reinsertv); + } else { + twoheap[0] = 0; + twovals[0] = null; + } + ++modCount; + } + + /** + * Invoke heapify-down for the root object. + * + * @param reinsert Object to insert. + * @param val Value to reinsert. + */ + private void heapifyDown(int reinsert, Object val) { + if (size > TWO_HEAP_MAX_SIZE) { + // Special case: 3-ary situation. + final int best = (twoheap[1] <= twoheap[2]) ? 1 : 2; + if (fourheap[0] < twoheap[best]) { + twoheap[0] = fourheap[0]; + twovals[0] = fourvals[0]; + heapifyDown4(0, reinsert, val); + } else { + twoheap[0] = twoheap[best]; + twovals[0] = twovals[best]; + heapifyDown2(best, reinsert, val); + } + return; + } + heapifyDown2(0, reinsert, val); + } + + /** + * Heapify-Down for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + * @param val Value to reinsert. + */ + private void heapifyDown2(int twopos, int cur, Object val) { + final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + while (twopos < stop) { + int bestchild = (twopos << 1) + 1; + int best = twoheap[bestchild]; + final int right = bestchild + 1; + if (right < size && best > twoheap[right]) { + bestchild = right; + best = twoheap[right]; + } + if (cur <= best) { + break; + } + twoheap[twopos] = best; + twovals[twopos] = twovals[bestchild]; + twopos = bestchild; + } + twoheap[twopos] = cur; + twovals[twopos] = val; + } + + /** + * Heapify-Down for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + * @param val Value to reinsert. + */ + private void heapifyDown4(int fourpos, int cur, Object val) { + final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; + while (fourpos < stop) { + final int child = (fourpos << 2) + 1; + int best = fourheap[child]; + int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; + if (size > minsize) { + int nextchild = fourheap[candidate]; + if (best > nextchild) { + bestchild = candidate; + best = nextchild; + } + + minsize += 2; + if (size >= minsize) { + nextchild = fourheap[++candidate]; + if (best > nextchild) { + bestchild = candidate; + best = nextchild; + } + + if (size > minsize) { + nextchild = fourheap[++candidate]; + if (best > nextchild) { + bestchild = candidate; + best = nextchild; + } + } + } + } + if (cur <= best) { + break; + } + fourheap[fourpos] = best; + fourvals[fourpos] = fourvals[bestchild]; + fourpos = bestchild; + } + fourheap[fourpos] = cur; + fourvals[fourpos] = val; + } + + @Override + public int peekKey() { + return twoheap[0]; + } + + @Override + @SuppressWarnings("unchecked") + public V peekValue() { + return (V)twovals[0]; + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append(IntegerObjectMinHeap.class.getSimpleName()).append(" ["); + for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) { + buf.append(iter.getKey()).append(':').append(iter.getValue()).append(','); + } + buf.append(']'); + return buf.toString(); + } + + @Override + public UnsortedIter unsortedIter() { + return new UnsortedIter(); + } + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * Use this class as follows: + * + * <pre> + * {@code + * for (IntegerObjectHeap.UnsortedIter<V> iter = heap.unsortedIter(); iter.valid(); iter.next()) { + * doSomething(iter.get()); + * } + * } + * </pre> + * + * @author Erich Schubert + */ + private class UnsortedIter implements IntegerObjectHeap.UnsortedIter<V> { + /** + * Iterator position. + */ + protected int pos = 0; + + /** + * Modification counter we were initialized at. + */ + protected final int myModCount = modCount; + + @Override + public boolean valid() { + if (modCount != myModCount) { + throw new ConcurrentModificationException(); + } + return pos < size; + } + + @Override + public void advance() { + pos++; + } + + @Override + public int getKey() { + return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + } + + @SuppressWarnings("unchecked") + + @Override + public V getValue() { + return (V)((pos < TWO_HEAP_MAX_SIZE) ? twovals[pos] : fourvals[pos - TWO_HEAP_MAX_SIZE]); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerPriorityObject.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerPriorityObject.java index 2014de65..f007b9fc 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerPriorityObject.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerPriorityObject.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ObjectHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ObjectHeap.java index 2e20ed56..b5dbbb0e 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ObjectHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ObjectHeap.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,53 +23,24 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; along with this program. If not, see <http://www.gnu.org/licenses/>. */ -import java.util.Arrays; - -import de.lmu.ifi.dbs.elki.math.MathUtil; +import de.lmu.ifi.dbs.elki.utilities.datastructures.iterator.Iter; /** - * Basic in-memory heap structure. - * - * This heap is built lazily: if you first add many elements, then poll the - * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log - * n). This is implemented via a simple validTo counter. + * Basic in-memory heap for K values. * * @author Erich Schubert + * + * @apiviz.has UnsortedIter + * + * @param <K> Key type */ -public abstract class ObjectHeap<K> extends AbstractHeap { - /** - * Heap storage: queue - */ - protected transient Object[] queue; - - /** - * Constructor with initial capacity. - * - * @param size initial capacity - */ - public ObjectHeap(int size) { - super(); - this.size = 0; - this.queue = new Object[size]; - } - +public interface ObjectHeap<K> { /** * Add a key-value pair to the heap * * @param key Key */ - public void add(Object key) { - // resize when needed - if (size + 1 > queue.length) { - resize(size + 1); - } - // final int pos = size; - this.queue[size] = key; - this.size += 1; - heapifyUp(size - 1, key); - validSize += 1; - heapModified(); - } + void add(K key); /** * Add a key-value pair to the heap, except if the new element is larger than @@ -78,13 +49,7 @@ public abstract class ObjectHeap<K> extends AbstractHeap { * @param key Key * @param max Maximum size of heap */ - public void add(Object key, int max) { - if (size < max) { - add(key); - } else if (comp(key, peek())) { - replaceTopElement(key); - } - } + void add(K key, int max); /** * Combined operation that removes the top element, and inserts a new element @@ -93,175 +58,69 @@ public abstract class ObjectHeap<K> extends AbstractHeap { * @param e New element to insert * @return Previous top element of the heap */ - @SuppressWarnings("unchecked") - public Object replaceTopElement(Object e) { - ensureValid(); - Object oldroot = (K) queue[0]; - heapifyDown(0, e); - heapModified(); - return oldroot; - } + K replaceTopElement(K e); /** * Get the current top key * * @return Top key */ - @SuppressWarnings("unchecked") - public Object peek() { - if (size == 0) { - throw new ArrayIndexOutOfBoundsException("Peek() on an empty heap!"); - } - ensureValid(); - return (K) queue[0]; - } + K peek(); /** * Remove the first element * * @return Top element */ - public Object poll() { - return removeAt(0); - } + K poll(); /** - * Repair the heap + * Delete all elements from the heap. */ - protected void ensureValid() { - if (validSize != size) { - if (size > 1) { - // Parent of first invalid - int nextmin = validSize > 0 ? ((validSize - 1) >>> 1) : 0; - int curmin = MathUtil.nextAllOnesInt(nextmin); // Next line - int nextmax = curmin - 1; // End of valid line - int pos = (size - 2) >>> 1; // Parent of last element - // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin+", "+nextmin); - while (pos >= nextmin) { - // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin); - while (pos >= curmin) { - if (!heapifyDown(pos, queue[pos])) { - final int parent = (pos - 1) >>> 1; - if (parent < curmin) { - nextmin = Math.min(nextmin, parent); - nextmax = Math.max(nextmax, parent); - } - } - pos--; - } - curmin = nextmin; - pos = Math.min(pos, nextmax); - nextmax = -1; - } - } - validSize = size; - } - } + void clear(); /** - * Remove the element at the given position. + * Query the size * - * @param pos Element position. - * @return Removed element + * @return Size */ - @SuppressWarnings("unchecked") - protected Object removeAt(int pos) { - if (pos < 0 || pos >= size) { - return null; - } - final Object top = (K) queue[0]; - // Replacement object: - final Object reinkey = queue[size - 1]; - // Keep heap in sync - if (validSize == size) { - size -= 1; - validSize -= 1; - heapifyDown(pos, reinkey); - } else { - size -= 1; - validSize = Math.min(pos >>> 1, validSize); - queue[pos] = reinkey; - } - heapModified(); - return top; - } + public int size(); /** - * Execute a "Heapify Upwards" aka "SiftUp". Used in insertions. + * Is the heap empty? * - * @param pos insertion position - * @param curkey Current key + * @return {@code true} when the size is 0. */ - protected void heapifyUp(int pos, Object curkey) { - while (pos > 0) { - final int parent = (pos - 1) >>> 1; - Object parkey = queue[parent]; - - if (comp(curkey, parkey)) { // Compare - break; - } - queue[pos] = parkey; - pos = parent; - } - queue[pos] = curkey; - } + public boolean isEmpty(); /** - * Execute a "Heapify Downwards" aka "SiftDown". Used in deletions. + * Get an unsorted iterator to inspect the heap. * - * @param ipos re-insertion position - * @param curkey Current key - * @return true when the order was changed + * @return Iterator */ - protected boolean heapifyDown(final int ipos, Object curkey) { - int pos = ipos; - final int half = size >>> 1; - while (pos < half) { - // Get left child (must exist!) - int cpos = (pos << 1) + 1; - Object chikey = queue[cpos]; - // Test right child, if present - final int rchild = cpos + 1; - if (rchild < size) { - Object right = queue[rchild]; - if (comp(chikey, right)) { // Compare - cpos = rchild; - chikey = right; - } - } - - if (comp(chikey, curkey)) { // Compare - break; - } - queue[pos] = chikey; - pos = cpos; - } - queue[pos] = curkey; - return (pos == ipos); - } + UnsortedIter<K> unsortedIter(); /** - * Test whether we need to resize to have the requested capacity. + * Unsorted iterator - in heap order. Does not poll the heap. * - * @param requiredSize required capacity - */ - protected final void resize(int requiredSize) { - queue = Arrays.copyOf(queue, desiredSize(requiredSize, queue.length)); - } - - /** - * Delete all elements from the heap. + * <pre> + * {@code + * for (ObjectHeap.UnsortedIter<K> iter = heap.unsortedIter(); iter.valid(); iter.next()) { + * doSomething(iter.get()); + * } + * } + * </pre> + * + * @author Erich Schubert + * + * @param <K> Key type */ - @Override - public void clear() { - super.clear(); - for (int i = 0; i < size; i++) { - queue[i] = null; - } + public static interface UnsortedIter<K> extends Iter { + /** + * Get the iterators current object. + * + * @return Current object + */ + K get(); } - - /** - * Compare two objects - */ - abstract protected boolean comp(Object o1, Object o2); } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TiedTopBoundedHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TiedTopBoundedHeap.java index 2daaafa4..32f57999 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TiedTopBoundedHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TiedTopBoundedHeap.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -25,11 +25,8 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; import java.util.ArrayList; import java.util.Comparator; -import java.util.Iterator; import java.util.List; -import de.lmu.ifi.dbs.elki.utilities.iterator.MergedIterator; - /** * A size-limited heap similar to {@link TopBoundedHeap}, discarding elements * with the highest value. However, this variation keeps a list of tied @@ -43,7 +40,7 @@ public class TiedTopBoundedHeap<E> extends TopBoundedHeap<E> { /** * List to keep ties in. */ - private List<E> ties = new ArrayList<E>(); + private List<E> ties = new ArrayList<>(); /** * Constructor with comparator. @@ -75,12 +72,6 @@ public class TiedTopBoundedHeap<E> extends TopBoundedHeap<E> { ties.clear(); } - @SuppressWarnings("unchecked") - @Override - public Iterator<E> iterator() { - return new MergedIterator<E>(ties.iterator(), super.iterator()); - } - @Override public E peek() { if (ties.isEmpty()) { @@ -131,4 +122,44 @@ public class TiedTopBoundedHeap<E> extends TopBoundedHeap<E> { ties.clear(); } } + + /** + * Get an unordered heap iterator. + * + * @return Iterator. + */ + @Override + public UnorderedIter unorderedIter() { + return new UnorderedIter(); + } + + /** + * Unordered heap iterator class. + * + * @author Erich Schubert + * + */ + public class UnorderedIter extends Heap<E>.UnorderedIter { + /** + * Constructor. + */ + protected UnorderedIter() { + super(); + } + + @Override + public boolean valid() { + return pos < size(); + } + + @Override + public E get() { + final int ssize = TiedTopBoundedHeap.super.size(); + if (pos < ssize) { + return super.get(); + } else { + return ties.get(pos - ssize); + } + } + } } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TiedTopBoundedUpdatableHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TiedTopBoundedUpdatableHeap.java index 8e39af1d..3905030f 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TiedTopBoundedUpdatableHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TiedTopBoundedUpdatableHeap.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -29,7 +29,6 @@ import java.util.Iterator; import java.util.List; import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; -import de.lmu.ifi.dbs.elki.utilities.iterator.MergedIterator; /** * A size-limited heap similar to {@link TopBoundedHeap}, discarding elements @@ -44,7 +43,7 @@ public class TiedTopBoundedUpdatableHeap<E> extends TopBoundedUpdatableHeap<E> { /** * List to keep ties in. */ - private List<E> ties = new ArrayList<E>(); + private List<E> ties = new ArrayList<>(); /** * Constructor with comparator. @@ -76,12 +75,6 @@ public class TiedTopBoundedUpdatableHeap<E> extends TopBoundedUpdatableHeap<E> { ties.clear(); } - @SuppressWarnings("unchecked") - @Override - public Iterator<E> iterator() { - return new MergedIterator<E>(ties.iterator(), super.iterator()); - } - @Override public void offerAt(int pos, E e) { if(pos == IN_TIES) { diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TopBoundedHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TopBoundedHeap.java index 07b595f6..9adda9f3 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TopBoundedHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TopBoundedHeap.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -69,7 +69,6 @@ public class TopBoundedHeap<E> extends Heap<E> { return; } // Peek at the top element, return if we are worse. - ensureValid(); final int comp; if (comparator == null) { @SuppressWarnings("unchecked") diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TopBoundedUpdatableHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TopBoundedUpdatableHeap.java index 75f2abcf..4a591d4c 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TopBoundedUpdatableHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TopBoundedUpdatableHeap.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -68,7 +68,6 @@ public class TopBoundedUpdatableHeap<E> extends UpdatableHeap<E> { super.offerAt(pos, e); return; } - ensureValid(); if (compare(e, queue[0]) < 0) { // while we did not change, this still was "successful". return; diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/UpdatableHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/UpdatableHeap.java index 1ab5f4df..a585d94d 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/UpdatableHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/UpdatableHeap.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -49,7 +49,7 @@ public class UpdatableHeap<O> extends Heap<O> { /** * Holds the indices in the heap of each element. */ - protected final TObjectIntMap<Object> index = new TObjectIntHashMap<Object>(100, 0.5f, NO_VALUE); + protected final TObjectIntMap<Object> index = new TObjectIntHashMap<>(100, 0.5f, NO_VALUE); /** * Simple constructor with default size. @@ -105,43 +105,32 @@ public class UpdatableHeap<O> extends Heap<O> { * @param e Element */ protected void offerAt(final int pos, O e) { - if(pos == NO_VALUE) { + if (pos == NO_VALUE) { // resize when needed - if(size + 1 > queue.length) { + if (size + 1 > queue.length) { resize(size + 1); } - // final int pos = size; - this.queue[size] = e; index.put(e, size); - size += 1; - // We do NOT YET update the heap. This is done lazily. + size++; + heapifyUp(size - 1, e); heapModified(); return; - } - else { + } else { assert (pos >= 0) : "Unexpected negative position."; assert (queue[pos].equals(e)); // Did the value improve? - if(comparator == null) { + if (comparator == null) { @SuppressWarnings("unchecked") Comparable<Object> c = (Comparable<Object>) e; - if(c.compareTo(queue[pos]) >= 0) { + if (c.compareTo(queue[pos]) >= 0) { return; } - } - else { - if(comparator.compare(e, queue[pos]) >= 0) { + } else { + if (comparator.compare(e, queue[pos]) >= 0) { return; } } - if(pos >= validSize) { - queue[pos] = e; - // validSize = Math.min(pos, validSize); - } - else { - // ensureValid(); - heapifyUp(pos, e); - } + heapifyUp(pos, e); heapModified(); return; } @@ -149,7 +138,7 @@ public class UpdatableHeap<O> extends Heap<O> { @Override protected O removeAt(int pos) { - if(pos < 0 || pos >= size) { + if (pos < 0 || pos >= size) { return null; } @SuppressWarnings("unchecked") @@ -158,34 +147,22 @@ public class UpdatableHeap<O> extends Heap<O> { final Object reinsert = queue[size - 1]; queue[size - 1] = null; // Keep heap in sync? - if(validSize == size) { - size -= 1; - validSize -= 1; - if(comparator != null) { - if(comparator.compare(ret, reinsert) > 0) { - heapifyUpComparator(pos, reinsert); - } - else { - heapifyDownComparator(pos, reinsert); - } + size--; + if (comparator != null) { + if (comparator.compare(ret, reinsert) > 0) { + heapifyUpComparator(pos, reinsert); + } else { + heapifyDownComparator(pos, reinsert); } - else { - @SuppressWarnings("unchecked") - Comparable<Object> comp = (Comparable<Object>) ret; - if(comp.compareTo(reinsert) > 0) { - heapifyUpComparable(pos, reinsert); - } - else { - heapifyDownComparable(pos, reinsert); - } + } else { + @SuppressWarnings("unchecked") + Comparable<Object> comp = (Comparable<Object>) ret; + if (comp.compareTo(reinsert) > 0) { + heapifyUpComparable(pos, reinsert); + } else { + heapifyDownComparable(pos, reinsert); } } - else { - size -= 1; - validSize = Math.min(pos >>> 1, validSize); - queue[pos] = reinsert; - index.put(reinsert, pos); - } heapModified(); // Keep index up to date index.remove(ret); @@ -200,10 +177,9 @@ public class UpdatableHeap<O> extends Heap<O> { */ public O removeObject(O e) { int pos = index.get(e); - if(pos >= 0) { + if (pos >= 0) { return removeAt(pos); - } - else { + } else { return null; } } @@ -214,7 +190,7 @@ public class UpdatableHeap<O> extends Heap<O> { index.remove(node); return node; } - + @Override public O replaceTopElement(O e) { O node = super.replaceTopElement(e); @@ -232,11 +208,11 @@ public class UpdatableHeap<O> extends Heap<O> { @SuppressWarnings("unchecked") protected void heapifyUpComparable(int pos, Object elem) { final Comparable<Object> cur = (Comparable<Object>) elem; // queue[pos]; - while(pos > 0) { + while (pos > 0) { final int parent = (pos - 1) >>> 1; Object par = queue[parent]; - if(cur.compareTo(par) >= 0) { + if (cur.compareTo(par) >= 0) { break; } queue[pos] = par; @@ -255,11 +231,11 @@ public class UpdatableHeap<O> extends Heap<O> { */ @Override protected void heapifyUpComparator(int pos, Object cur) { - while(pos > 0) { + while (pos > 0) { final int parent = (pos - 1) >>> 1; Object par = queue[parent]; - if(comparator.compare(cur, par) >= 0) { + if (comparator.compare(cur, par) >= 0) { break; } queue[pos] = par; @@ -276,21 +252,21 @@ public class UpdatableHeap<O> extends Heap<O> { Comparable<Object> cur = (Comparable<Object>) reinsert; int pos = ipos; final int half = size >>> 1; - while(pos < half) { + while (pos < half) { // Get left child (must exist!) int cpos = (pos << 1) + 1; Object child = queue[cpos]; // Test right child, if present final int rchild = cpos + 1; - if(rchild < size) { + if (rchild < size) { Object right = queue[rchild]; - if(((Comparable<Object>) child).compareTo(right) > 0) { + if (((Comparable<Object>) child).compareTo(right) > 0) { cpos = rchild; child = right; } } - if(cur.compareTo(child) <= 0) { + if (cur.compareTo(child) <= 0) { break; } queue[pos] = child; @@ -299,32 +275,32 @@ public class UpdatableHeap<O> extends Heap<O> { } queue[pos] = cur; index.put(cur, pos); - return (pos == ipos); + return (pos != ipos); } @Override protected boolean heapifyDownComparator(final int ipos, Object cur) { int pos = ipos; final int half = size >>> 1; - while(pos < half) { + while (pos < half) { int min = pos; Object best = cur; final int lchild = (pos << 1) + 1; Object left = queue[lchild]; - if(comparator.compare(best, left) > 0) { + if (comparator.compare(best, left) > 0) { min = lchild; best = left; } final int rchild = lchild + 1; - if(rchild < size) { + if (rchild < size) { Object right = queue[rchild]; - if(comparator.compare(best, right) > 0) { + if (comparator.compare(best, right) > 0) { min = rchild; best = right; } } - if(min == pos) { + if (min == pos) { break; } queue[pos] = best; @@ -333,6 +309,6 @@ public class UpdatableHeap<O> extends Heap<O> { } queue[pos] = cur; index.put(cur, pos); - return (pos == ipos); + return (pos != ipos); } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/package-info.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/package-info.java index 3f193171..83be37f4 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/package-info.java @@ -5,7 +5,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2012 +Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team |