diff options
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/utilities/datastructures')
26 files changed, 427 insertions, 3507 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/QuickSelect.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/QuickSelect.java index 3746ff87..d0a2cd20 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/QuickSelect.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/QuickSelect.java @@ -1347,7 +1347,7 @@ public class QuickSelect { pivot.seek(end - 1); // Begin partitioning - int i = start, j = end - 3; + int i = start, j = end - 2; refi.seek(i); refj.seek(j); // This is classic quicksort stuff diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ArrayLikeUtil.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ArrayLikeUtil.java index 8fab6f2b..f03d39e9 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ArrayLikeUtil.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ArrayLikeUtil.java @@ -66,6 +66,11 @@ public final class ArrayLikeUtil { public static final NumberVectorAdapter<?> NUMBERVECTORADAPTER = new NumberVectorAdapter<Double>(); /** + * Adapter for matrixes, reinterpreted as flat arrays. + */ + public static final FlatMatrixAdapter FLATMATRIXADAPTER = new FlatMatrixAdapter(); + + /** * Use a double array in the array API. */ public static final NumberArrayAdapter<Double, double[]> DOUBLEARRAYADAPTER = new DoubleArrayAdapter(); diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/FlatMatrixAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/FlatMatrixAdapter.java new file mode 100644 index 00000000..18fbae5d --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/FlatMatrixAdapter.java @@ -0,0 +1,85 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix; + +/** + * Use a matrix as array, by flattening it into a sequence. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ +class FlatMatrixAdapter implements NumberArrayAdapter<Double, Matrix> { + /** + * Constructor. + * + * Use the static instance from {@link ArrayLikeUtil}! + */ + protected FlatMatrixAdapter() { + super(); + } + + @Override + public int size(Matrix array) { + return array.getColumnDimensionality() * array.getRowDimensionality(); + } + + @Override + @Deprecated + public Double get(Matrix array, int off) throws IndexOutOfBoundsException { + return Double.valueOf(getDouble(array, off)); + } + + @Override + public double getDouble(Matrix array, int off) throws IndexOutOfBoundsException { + return array.get(off / array.getColumnDimensionality(), off % array.getColumnDimensionality()); + } + + @Override + public float getFloat(Matrix array, int off) throws IndexOutOfBoundsException { + return (float) getDouble(array, off); + } + + @Override + public int getInteger(Matrix array, int off) throws IndexOutOfBoundsException { + return (int) getDouble(array, off); + } + + @Override + public short getShort(Matrix array, int off) throws IndexOutOfBoundsException { + return (short) getDouble(array, off); + } + + @Override + public long getLong(Matrix array, int off) throws IndexOutOfBoundsException { + return (long) getDouble(array, off); + } + + @Override + public byte getByte(Matrix array, int off) throws IndexOutOfBoundsException { + return (byte) getDouble(array, off); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMaxHeap.java index 222fe83a..d6937b4d 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMaxHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMaxHeap.java @@ -24,32 +24,11 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; */ import java.util.Arrays; -import java.util.ConcurrentModificationException; import de.lmu.ifi.dbs.elki.math.MathUtil; /** - * Advanced priority queue class, based on a binary heap (for small sizes), - * which will for larger heaps be accompanied by a 4-ary heap (attached below - * the root of the two-ary heap, making the root actually 3-ary). - * - * This code was automatically instantiated for the type: Comparable - * - * This combination was found to work quite well in benchmarks, but YMMV. - * - * Some other observations from benchmarking: - * <ul> - * <li>Bulk loading did not improve things</li> - * <li>Primitive heaps are substantially faster.</li> - * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and - * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> - * <li>Workload makes a huge difference. A load-once, poll-until-empty priority - * queue is something different than e.g. a top-k heap, which will see a lot of - * top element replacements.</li> - * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for - * top-k make a difference.</li> - * <li>Different day, different benchmark results ...</li> - * </ul> + * Binary heap for primitive types. * * @author Erich Schubert * @@ -63,44 +42,16 @@ public class ComparableMaxHeap<K extends Comparable<? super K>> implements Objec protected Comparable<Object>[] twoheap; /** - * Extension heap. - */ - protected Comparable<Object>[] fourheap; - - /** * Current size of heap. */ protected int size; /** - * (Structural) modification counter. Used to invalidate iterators. - */ - protected int modCount = 0; - - /** - * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. - */ - private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; - - /** * Initial size of the 2-ary heap. */ private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; /** - * Initial size of 4-ary heap when initialized. - * - * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 - * - * 85 = 4-ary heap of height 3: 21 + 4*4*4 - * - * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 - * - * Since we last grew by 255 (to 511), let's use 341. - */ - private final static int FOUR_HEAP_INITIAL_SIZE = 341; - - /** * Constructor, with default size. */ @SuppressWarnings("unchecked") @@ -109,9 +60,6 @@ public class ComparableMaxHeap<K extends Comparable<? super K>> implements Objec Comparable<Object>[] twoheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, TWO_HEAP_INITIAL_SIZE); this.twoheap = twoheap; - this.fourheap = null; - this.size = 0; - this.modCount = 0; } /** @@ -122,27 +70,15 @@ public class ComparableMaxHeap<K extends Comparable<? super K>> implements Objec @SuppressWarnings("unchecked") public ComparableMaxHeap(int minsize) { super(); - if (minsize < TWO_HEAP_MAX_SIZE) { - final int size = MathUtil.nextPow2Int(minsize + 1) - 1; - Comparable<Object>[] twoheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, size); + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + Comparable<Object>[] twoheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, size); - this.twoheap = twoheap; - this.fourheap = null; - } else { - Comparable<Object>[] twoheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, TWO_HEAP_INITIAL_SIZE); - Comparable<Object>[] fourheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, minsize - TWO_HEAP_MAX_SIZE); - this.twoheap = twoheap; - this.fourheap = fourheap; - } - this.size = 0; - this.modCount = 0; + this.twoheap = twoheap; } @Override public void clear() { size = 0; - ++modCount; - fourheap = null; Arrays.fill(twoheap, null); } @@ -161,29 +97,14 @@ public class ComparableMaxHeap<K extends Comparable<? super K>> implements Objec public void add(K o) { final Comparable<Object> co = (Comparable<Object>)o; // System.err.println("Add: " + o); - if (size < TWO_HEAP_MAX_SIZE) { - if (size >= twoheap.length) { - // Grow by one layer. - twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); - } - final int twopos = size; - twoheap[twopos] = co; - ++size; - heapifyUp2(twopos, co); - ++modCount; - } else { - final int fourpos = size - TWO_HEAP_MAX_SIZE; - if (fourheap == null) { - fourheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, FOUR_HEAP_INITIAL_SIZE); - } else if (fourpos >= fourheap.length) { - // Grow extension heap by half. - fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); - } - fourheap[fourpos] = co; - ++size; - heapifyUp4(fourpos, co); - ++modCount; + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); } + final int twopos = size; + twoheap[twopos] = co; + ++size; + heapifyUp(twopos, co); } @Override @@ -200,7 +121,6 @@ public class ComparableMaxHeap<K extends Comparable<? super K>> implements Objec public K replaceTopElement(K reinsert) { final Comparable<Object> ret = twoheap[0]; heapifyDown((Comparable<Object>) reinsert); - ++modCount; return (K)ret; } @@ -210,7 +130,7 @@ public class ComparableMaxHeap<K extends Comparable<? super K>> implements Objec * @param twopos Position in 2-ary heap. * @param cur Current object */ - private void heapifyUp2(int twopos, Comparable<Object> cur) { + private void heapifyUp(int twopos, Comparable<Object> cur) { while (twopos > 0) { final int parent = (twopos - 1) >>> 1; Comparable<Object> par = twoheap[parent]; @@ -223,81 +143,30 @@ public class ComparableMaxHeap<K extends Comparable<? super K>> implements Objec twoheap[twopos] = cur; } - /** - * Heapify-Up method for 4-ary heap. - * - * @param fourpos Position in 4-ary heap. - * @param cur Current object - */ - private void heapifyUp4(int fourpos, Comparable<Object> cur) { - while (fourpos > 0) { - final int parent = (fourpos - 1) >> 2; - Comparable<Object> par = fourheap[parent]; - if (cur.compareTo(par) <= 0) { - break; - } - fourheap[fourpos] = par; - fourpos = parent; - } - if (fourpos == 0 && twoheap[0].compareTo(cur) < 0) { - fourheap[0] = twoheap[0]; - twoheap[0] = cur; - } else { - fourheap[fourpos] = cur; - } - } - @Override @SuppressWarnings("unchecked") public K poll() { final Comparable<Object> ret = twoheap[0]; --size; // Replacement object: - if (size >= TWO_HEAP_MAX_SIZE) { - final int last = size - TWO_HEAP_MAX_SIZE; - final Comparable<Object> reinsert = fourheap[last]; - fourheap[last] = null; - heapifyDown(reinsert); - } else if (size > 0) { + if (size > 0) { final Comparable<Object> reinsert = twoheap[size]; twoheap[size] = null; heapifyDown(reinsert); } else { twoheap[0] = null; } - ++modCount; return (K)ret; } /** * Invoke heapify-down for the root object. * - * @param reinsert Object to insert. - */ - private void heapifyDown(Comparable<Object> reinsert) { - if (size > TWO_HEAP_MAX_SIZE) { - // Special case: 3-ary situation. - final int best = (twoheap[1].compareTo(twoheap[2]) >= 0) ? 1 : 2; - if (fourheap[0].compareTo(twoheap[best]) > 0) { - twoheap[0] = fourheap[0]; - heapifyDown4(0, reinsert); - } else { - twoheap[0] = twoheap[best]; - heapifyDown2(best, reinsert); - } - return; - } - heapifyDown2(0, reinsert); - } - - /** - * Heapify-Down for 2-ary heap. - * - * @param twopos Position in 2-ary heap. - * @param cur Current object + * @param cur Object to insert. */ - private void heapifyDown2(int twopos, Comparable<Object> cur) { - final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + private void heapifyDown(Comparable<Object> cur) { + final int stop = size >>> 1; + int twopos = 0; while (twopos < stop) { int bestchild = (twopos << 1) + 1; Comparable<Object> best = twoheap[bestchild]; @@ -315,51 +184,6 @@ public class ComparableMaxHeap<K extends Comparable<? super K>> implements Objec twoheap[twopos] = cur; } - /** - * Heapify-Down for 4-ary heap. - * - * @param fourpos Position in 4-ary heap. - * @param cur Current object - */ - private void heapifyDown4(int fourpos, Comparable<Object> cur) { - final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; - while (fourpos < stop) { - final int child = (fourpos << 2) + 1; - Comparable<Object> best = fourheap[child]; - int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; - if (size > minsize) { - Comparable<Object> nextchild = fourheap[candidate]; - if (best.compareTo(nextchild) < 0) { - bestchild = candidate; - best = nextchild; - } - - minsize += 2; - if (size >= minsize) { - nextchild = fourheap[++candidate]; - if (best.compareTo(nextchild) < 0) { - bestchild = candidate; - best = nextchild; - } - - if (size > minsize) { - nextchild = fourheap[++candidate]; - if (best.compareTo(nextchild) < 0) { - bestchild = candidate; - best = nextchild; - } - } - } - } - if (cur.compareTo(best) >= 0) { - break; - } - fourheap[fourpos] = best; - fourpos = bestchild; - } - fourheap[fourpos] = cur; - } - @Override @SuppressWarnings("unchecked") public K peek() { @@ -403,16 +227,8 @@ public class ComparableMaxHeap<K extends Comparable<? super K>> implements Objec */ protected int pos = 0; - /** - * Modification counter we were initialized at. - */ - protected final int myModCount = modCount; - @Override public boolean valid() { - if (modCount != myModCount) { - throw new ConcurrentModificationException(); - } return pos < size; } @@ -425,7 +241,7 @@ public class ComparableMaxHeap<K extends Comparable<? super K>> implements Objec @Override public K get() { - return (K)((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + return (K)twoheap[pos]; } } } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMinHeap.java index 3cc5a02f..167c6bc7 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMinHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMinHeap.java @@ -24,32 +24,11 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; */ import java.util.Arrays; -import java.util.ConcurrentModificationException; import de.lmu.ifi.dbs.elki.math.MathUtil; /** - * Advanced priority queue class, based on a binary heap (for small sizes), - * which will for larger heaps be accompanied by a 4-ary heap (attached below - * the root of the two-ary heap, making the root actually 3-ary). - * - * This code was automatically instantiated for the type: Comparable - * - * This combination was found to work quite well in benchmarks, but YMMV. - * - * Some other observations from benchmarking: - * <ul> - * <li>Bulk loading did not improve things</li> - * <li>Primitive heaps are substantially faster.</li> - * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and - * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> - * <li>Workload makes a huge difference. A load-once, poll-until-empty priority - * queue is something different than e.g. a top-k heap, which will see a lot of - * top element replacements.</li> - * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for - * top-k make a difference.</li> - * <li>Different day, different benchmark results ...</li> - * </ul> + * Binary heap for primitive types. * * @author Erich Schubert * @@ -63,44 +42,16 @@ public class ComparableMinHeap<K extends Comparable<? super K>> implements Objec protected Comparable<Object>[] twoheap; /** - * Extension heap. - */ - protected Comparable<Object>[] fourheap; - - /** * Current size of heap. */ protected int size; /** - * (Structural) modification counter. Used to invalidate iterators. - */ - protected int modCount = 0; - - /** - * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. - */ - private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; - - /** * Initial size of the 2-ary heap. */ private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; /** - * Initial size of 4-ary heap when initialized. - * - * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 - * - * 85 = 4-ary heap of height 3: 21 + 4*4*4 - * - * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 - * - * Since we last grew by 255 (to 511), let's use 341. - */ - private final static int FOUR_HEAP_INITIAL_SIZE = 341; - - /** * Constructor, with default size. */ @SuppressWarnings("unchecked") @@ -109,9 +60,6 @@ public class ComparableMinHeap<K extends Comparable<? super K>> implements Objec Comparable<Object>[] twoheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, TWO_HEAP_INITIAL_SIZE); this.twoheap = twoheap; - this.fourheap = null; - this.size = 0; - this.modCount = 0; } /** @@ -122,27 +70,15 @@ public class ComparableMinHeap<K extends Comparable<? super K>> implements Objec @SuppressWarnings("unchecked") public ComparableMinHeap(int minsize) { super(); - if (minsize < TWO_HEAP_MAX_SIZE) { - final int size = MathUtil.nextPow2Int(minsize + 1) - 1; - Comparable<Object>[] twoheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, size); + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + Comparable<Object>[] twoheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, size); - this.twoheap = twoheap; - this.fourheap = null; - } else { - Comparable<Object>[] twoheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, TWO_HEAP_INITIAL_SIZE); - Comparable<Object>[] fourheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, minsize - TWO_HEAP_MAX_SIZE); - this.twoheap = twoheap; - this.fourheap = fourheap; - } - this.size = 0; - this.modCount = 0; + this.twoheap = twoheap; } @Override public void clear() { size = 0; - ++modCount; - fourheap = null; Arrays.fill(twoheap, null); } @@ -161,29 +97,14 @@ public class ComparableMinHeap<K extends Comparable<? super K>> implements Objec public void add(K o) { final Comparable<Object> co = (Comparable<Object>)o; // System.err.println("Add: " + o); - if (size < TWO_HEAP_MAX_SIZE) { - if (size >= twoheap.length) { - // Grow by one layer. - twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); - } - final int twopos = size; - twoheap[twopos] = co; - ++size; - heapifyUp2(twopos, co); - ++modCount; - } else { - final int fourpos = size - TWO_HEAP_MAX_SIZE; - if (fourheap == null) { - fourheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, FOUR_HEAP_INITIAL_SIZE); - } else if (fourpos >= fourheap.length) { - // Grow extension heap by half. - fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); - } - fourheap[fourpos] = co; - ++size; - heapifyUp4(fourpos, co); - ++modCount; + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); } + final int twopos = size; + twoheap[twopos] = co; + ++size; + heapifyUp(twopos, co); } @Override @@ -200,7 +121,6 @@ public class ComparableMinHeap<K extends Comparable<? super K>> implements Objec public K replaceTopElement(K reinsert) { final Comparable<Object> ret = twoheap[0]; heapifyDown((Comparable<Object>) reinsert); - ++modCount; return (K)ret; } @@ -210,7 +130,7 @@ public class ComparableMinHeap<K extends Comparable<? super K>> implements Objec * @param twopos Position in 2-ary heap. * @param cur Current object */ - private void heapifyUp2(int twopos, Comparable<Object> cur) { + private void heapifyUp(int twopos, Comparable<Object> cur) { while (twopos > 0) { final int parent = (twopos - 1) >>> 1; Comparable<Object> par = twoheap[parent]; @@ -223,81 +143,30 @@ public class ComparableMinHeap<K extends Comparable<? super K>> implements Objec twoheap[twopos] = cur; } - /** - * Heapify-Up method for 4-ary heap. - * - * @param fourpos Position in 4-ary heap. - * @param cur Current object - */ - private void heapifyUp4(int fourpos, Comparable<Object> cur) { - while (fourpos > 0) { - final int parent = (fourpos - 1) >> 2; - Comparable<Object> par = fourheap[parent]; - if (cur.compareTo(par) >= 0) { - break; - } - fourheap[fourpos] = par; - fourpos = parent; - } - if (fourpos == 0 && twoheap[0].compareTo(cur) > 0) { - fourheap[0] = twoheap[0]; - twoheap[0] = cur; - } else { - fourheap[fourpos] = cur; - } - } - @Override @SuppressWarnings("unchecked") public K poll() { final Comparable<Object> ret = twoheap[0]; --size; // Replacement object: - if (size >= TWO_HEAP_MAX_SIZE) { - final int last = size - TWO_HEAP_MAX_SIZE; - final Comparable<Object> reinsert = fourheap[last]; - fourheap[last] = null; - heapifyDown(reinsert); - } else if (size > 0) { + if (size > 0) { final Comparable<Object> reinsert = twoheap[size]; twoheap[size] = null; heapifyDown(reinsert); } else { twoheap[0] = null; } - ++modCount; return (K)ret; } /** * Invoke heapify-down for the root object. * - * @param reinsert Object to insert. - */ - private void heapifyDown(Comparable<Object> reinsert) { - if (size > TWO_HEAP_MAX_SIZE) { - // Special case: 3-ary situation. - final int best = (twoheap[1].compareTo(twoheap[2]) <= 0) ? 1 : 2; - if (fourheap[0].compareTo(twoheap[best]) < 0) { - twoheap[0] = fourheap[0]; - heapifyDown4(0, reinsert); - } else { - twoheap[0] = twoheap[best]; - heapifyDown2(best, reinsert); - } - return; - } - heapifyDown2(0, reinsert); - } - - /** - * Heapify-Down for 2-ary heap. - * - * @param twopos Position in 2-ary heap. - * @param cur Current object + * @param cur Object to insert. */ - private void heapifyDown2(int twopos, Comparable<Object> cur) { - final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + private void heapifyDown(Comparable<Object> cur) { + final int stop = size >>> 1; + int twopos = 0; while (twopos < stop) { int bestchild = (twopos << 1) + 1; Comparable<Object> best = twoheap[bestchild]; @@ -315,51 +184,6 @@ public class ComparableMinHeap<K extends Comparable<? super K>> implements Objec twoheap[twopos] = cur; } - /** - * Heapify-Down for 4-ary heap. - * - * @param fourpos Position in 4-ary heap. - * @param cur Current object - */ - private void heapifyDown4(int fourpos, Comparable<Object> cur) { - final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; - while (fourpos < stop) { - final int child = (fourpos << 2) + 1; - Comparable<Object> best = fourheap[child]; - int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; - if (size > minsize) { - Comparable<Object> nextchild = fourheap[candidate]; - if (best.compareTo(nextchild) > 0) { - bestchild = candidate; - best = nextchild; - } - - minsize += 2; - if (size >= minsize) { - nextchild = fourheap[++candidate]; - if (best.compareTo(nextchild) > 0) { - bestchild = candidate; - best = nextchild; - } - - if (size > minsize) { - nextchild = fourheap[++candidate]; - if (best.compareTo(nextchild) > 0) { - bestchild = candidate; - best = nextchild; - } - } - } - } - if (cur.compareTo(best) <= 0) { - break; - } - fourheap[fourpos] = best; - fourpos = bestchild; - } - fourheap[fourpos] = cur; - } - @Override @SuppressWarnings("unchecked") public K peek() { @@ -403,16 +227,8 @@ public class ComparableMinHeap<K extends Comparable<? super K>> implements Objec */ protected int pos = 0; - /** - * Modification counter we were initialized at. - */ - protected final int myModCount = modCount; - @Override public boolean valid() { - if (modCount != myModCount) { - throw new ConcurrentModificationException(); - } return pos < size; } @@ -425,7 +241,7 @@ public class ComparableMinHeap<K extends Comparable<? super K>> implements Objec @Override public K get() { - return (K)((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + return (K)twoheap[pos]; } } } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparatorMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparatorMaxHeap.java index 7b660d31..e5887c73 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparatorMaxHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparatorMaxHeap.java @@ -24,32 +24,11 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; */ import java.util.Arrays; -import java.util.ConcurrentModificationException; import de.lmu.ifi.dbs.elki.math.MathUtil; /** - * Advanced priority queue class, based on a binary heap (for small sizes), - * which will for larger heaps be accompanied by a 4-ary heap (attached below - * the root of the two-ary heap, making the root actually 3-ary). - * - * This code was automatically instantiated for the type: Comparator - * - * This combination was found to work quite well in benchmarks, but YMMV. - * - * Some other observations from benchmarking: - * <ul> - * <li>Bulk loading did not improve things</li> - * <li>Primitive heaps are substantially faster.</li> - * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and - * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> - * <li>Workload makes a huge difference. A load-once, poll-until-empty priority - * queue is something different than e.g. a top-k heap, which will see a lot of - * top element replacements.</li> - * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for - * top-k make a difference.</li> - * <li>Different day, different benchmark results ...</li> - * </ul> + * Binary heap for primitive types. * * @author Erich Schubert * @@ -63,43 +42,15 @@ public class ComparatorMaxHeap<K> implements ObjectHeap<K> { protected Object[] twoheap; /** - * Extension heap. - */ - protected Object[] fourheap; - - /** * Current size of heap. */ protected int size; /** - * (Structural) modification counter. Used to invalidate iterators. - */ - protected int modCount = 0; - - /** - * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. - */ - private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; - - /** * Initial size of the 2-ary heap. */ private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; - /** - * Initial size of 4-ary heap when initialized. - * - * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 - * - * 85 = 4-ary heap of height 3: 21 + 4*4*4 - * - * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 - * - * Since we last grew by 255 (to 511), let's use 341. - */ - private final static int FOUR_HEAP_INITIAL_SIZE = 341; - /** * Comparator @@ -117,9 +68,6 @@ public class ComparatorMaxHeap<K> implements ObjectHeap<K> { Object[] twoheap = new Object[TWO_HEAP_INITIAL_SIZE]; this.twoheap = twoheap; - this.fourheap = null; - this.size = 0; - this.modCount = 0; } /** @@ -132,27 +80,15 @@ public class ComparatorMaxHeap<K> implements ObjectHeap<K> { public ComparatorMaxHeap(int minsize, java.util.Comparator<? super K> comparator) { super(); this.comparator = (java.util.Comparator<Object>) java.util.Comparator.class.cast(comparator); - if (minsize < TWO_HEAP_MAX_SIZE) { - final int size = MathUtil.nextPow2Int(minsize + 1) - 1; - Object[] twoheap = new Object[size]; + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + Object[] twoheap = new Object[size]; - this.twoheap = twoheap; - this.fourheap = null; - } else { - Object[] twoheap = new Object[TWO_HEAP_INITIAL_SIZE]; - Object[] fourheap = new Object[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; - this.twoheap = twoheap; - this.fourheap = fourheap; - } - this.size = 0; - this.modCount = 0; + this.twoheap = twoheap; } @Override public void clear() { size = 0; - ++modCount; - fourheap = null; Arrays.fill(twoheap, null); } @@ -170,29 +106,14 @@ public class ComparatorMaxHeap<K> implements ObjectHeap<K> { public void add(K o) { final Object co = o; // System.err.println("Add: " + o); - if (size < TWO_HEAP_MAX_SIZE) { - if (size >= twoheap.length) { - // Grow by one layer. - twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); - } - final int twopos = size; - twoheap[twopos] = co; - ++size; - heapifyUp2(twopos, co); - ++modCount; - } else { - final int fourpos = size - TWO_HEAP_MAX_SIZE; - if (fourheap == null) { - fourheap = new Object[FOUR_HEAP_INITIAL_SIZE]; - } else if (fourpos >= fourheap.length) { - // Grow extension heap by half. - fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); - } - fourheap[fourpos] = co; - ++size; - heapifyUp4(fourpos, co); - ++modCount; + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); } + final int twopos = size; + twoheap[twopos] = co; + ++size; + heapifyUp(twopos, co); } @Override @@ -209,7 +130,6 @@ public class ComparatorMaxHeap<K> implements ObjectHeap<K> { public K replaceTopElement(K reinsert) { final Object ret = twoheap[0]; heapifyDown( reinsert); - ++modCount; return (K)ret; } @@ -219,7 +139,7 @@ public class ComparatorMaxHeap<K> implements ObjectHeap<K> { * @param twopos Position in 2-ary heap. * @param cur Current object */ - private void heapifyUp2(int twopos, Object cur) { + private void heapifyUp(int twopos, Object cur) { while (twopos > 0) { final int parent = (twopos - 1) >>> 1; Object par = twoheap[parent]; @@ -232,81 +152,30 @@ public class ComparatorMaxHeap<K> implements ObjectHeap<K> { twoheap[twopos] = cur; } - /** - * Heapify-Up method for 4-ary heap. - * - * @param fourpos Position in 4-ary heap. - * @param cur Current object - */ - private void heapifyUp4(int fourpos, Object cur) { - while (fourpos > 0) { - final int parent = (fourpos - 1) >> 2; - Object par = fourheap[parent]; - if (comparator.compare(cur, par) <= 0) { - break; - } - fourheap[fourpos] = par; - fourpos = parent; - } - if (fourpos == 0 && comparator.compare(twoheap[0], cur) < 0) { - fourheap[0] = twoheap[0]; - twoheap[0] = cur; - } else { - fourheap[fourpos] = cur; - } - } - @Override @SuppressWarnings("unchecked") public K poll() { final Object ret = twoheap[0]; --size; // Replacement object: - if (size >= TWO_HEAP_MAX_SIZE) { - final int last = size - TWO_HEAP_MAX_SIZE; - final Object reinsert = fourheap[last]; - fourheap[last] = null; - heapifyDown(reinsert); - } else if (size > 0) { + if (size > 0) { final Object reinsert = twoheap[size]; twoheap[size] = null; heapifyDown(reinsert); } else { twoheap[0] = null; } - ++modCount; return (K)ret; } /** * Invoke heapify-down for the root object. * - * @param reinsert Object to insert. - */ - private void heapifyDown(Object reinsert) { - if (size > TWO_HEAP_MAX_SIZE) { - // Special case: 3-ary situation. - final int best = (comparator.compare(twoheap[1], twoheap[2]) >= 0) ? 1 : 2; - if (comparator.compare(fourheap[0], twoheap[best]) > 0) { - twoheap[0] = fourheap[0]; - heapifyDown4(0, reinsert); - } else { - twoheap[0] = twoheap[best]; - heapifyDown2(best, reinsert); - } - return; - } - heapifyDown2(0, reinsert); - } - - /** - * Heapify-Down for 2-ary heap. - * - * @param twopos Position in 2-ary heap. - * @param cur Current object + * @param cur Object to insert. */ - private void heapifyDown2(int twopos, Object cur) { - final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + private void heapifyDown(Object cur) { + final int stop = size >>> 1; + int twopos = 0; while (twopos < stop) { int bestchild = (twopos << 1) + 1; Object best = twoheap[bestchild]; @@ -324,51 +193,6 @@ public class ComparatorMaxHeap<K> implements ObjectHeap<K> { twoheap[twopos] = cur; } - /** - * Heapify-Down for 4-ary heap. - * - * @param fourpos Position in 4-ary heap. - * @param cur Current object - */ - private void heapifyDown4(int fourpos, Object cur) { - final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; - while (fourpos < stop) { - final int child = (fourpos << 2) + 1; - Object best = fourheap[child]; - int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; - if (size > minsize) { - Object nextchild = fourheap[candidate]; - if (comparator.compare(best, nextchild) < 0) { - bestchild = candidate; - best = nextchild; - } - - minsize += 2; - if (size >= minsize) { - nextchild = fourheap[++candidate]; - if (comparator.compare(best, nextchild) < 0) { - bestchild = candidate; - best = nextchild; - } - - if (size > minsize) { - nextchild = fourheap[++candidate]; - if (comparator.compare(best, nextchild) < 0) { - bestchild = candidate; - best = nextchild; - } - } - } - } - if (comparator.compare(cur, best) >= 0) { - break; - } - fourheap[fourpos] = best; - fourpos = bestchild; - } - fourheap[fourpos] = cur; - } - @Override @SuppressWarnings("unchecked") public K peek() { @@ -412,16 +236,8 @@ public class ComparatorMaxHeap<K> implements ObjectHeap<K> { */ protected int pos = 0; - /** - * Modification counter we were initialized at. - */ - protected final int myModCount = modCount; - @Override public boolean valid() { - if (modCount != myModCount) { - throw new ConcurrentModificationException(); - } return pos < size; } @@ -434,7 +250,7 @@ public class ComparatorMaxHeap<K> implements ObjectHeap<K> { @Override public K get() { - return (K)((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + return (K)twoheap[pos]; } } } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparatorMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparatorMinHeap.java index e12c5f64..215a78b6 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparatorMinHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparatorMinHeap.java @@ -24,32 +24,11 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; */ import java.util.Arrays; -import java.util.ConcurrentModificationException; import de.lmu.ifi.dbs.elki.math.MathUtil; /** - * Advanced priority queue class, based on a binary heap (for small sizes), - * which will for larger heaps be accompanied by a 4-ary heap (attached below - * the root of the two-ary heap, making the root actually 3-ary). - * - * This code was automatically instantiated for the type: Comparator - * - * This combination was found to work quite well in benchmarks, but YMMV. - * - * Some other observations from benchmarking: - * <ul> - * <li>Bulk loading did not improve things</li> - * <li>Primitive heaps are substantially faster.</li> - * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and - * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> - * <li>Workload makes a huge difference. A load-once, poll-until-empty priority - * queue is something different than e.g. a top-k heap, which will see a lot of - * top element replacements.</li> - * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for - * top-k make a difference.</li> - * <li>Different day, different benchmark results ...</li> - * </ul> + * Binary heap for primitive types. * * @author Erich Schubert * @@ -63,43 +42,15 @@ public class ComparatorMinHeap<K> implements ObjectHeap<K> { protected Object[] twoheap; /** - * Extension heap. - */ - protected Object[] fourheap; - - /** * Current size of heap. */ protected int size; /** - * (Structural) modification counter. Used to invalidate iterators. - */ - protected int modCount = 0; - - /** - * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. - */ - private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; - - /** * Initial size of the 2-ary heap. */ private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; - /** - * Initial size of 4-ary heap when initialized. - * - * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 - * - * 85 = 4-ary heap of height 3: 21 + 4*4*4 - * - * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 - * - * Since we last grew by 255 (to 511), let's use 341. - */ - private final static int FOUR_HEAP_INITIAL_SIZE = 341; - /** * Comparator @@ -117,9 +68,6 @@ public class ComparatorMinHeap<K> implements ObjectHeap<K> { Object[] twoheap = new Object[TWO_HEAP_INITIAL_SIZE]; this.twoheap = twoheap; - this.fourheap = null; - this.size = 0; - this.modCount = 0; } /** @@ -132,27 +80,15 @@ public class ComparatorMinHeap<K> implements ObjectHeap<K> { public ComparatorMinHeap(int minsize, java.util.Comparator<? super K> comparator) { super(); this.comparator = (java.util.Comparator<Object>) java.util.Comparator.class.cast(comparator); - if (minsize < TWO_HEAP_MAX_SIZE) { - final int size = MathUtil.nextPow2Int(minsize + 1) - 1; - Object[] twoheap = new Object[size]; + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + Object[] twoheap = new Object[size]; - this.twoheap = twoheap; - this.fourheap = null; - } else { - Object[] twoheap = new Object[TWO_HEAP_INITIAL_SIZE]; - Object[] fourheap = new Object[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; - this.twoheap = twoheap; - this.fourheap = fourheap; - } - this.size = 0; - this.modCount = 0; + this.twoheap = twoheap; } @Override public void clear() { size = 0; - ++modCount; - fourheap = null; Arrays.fill(twoheap, null); } @@ -170,29 +106,14 @@ public class ComparatorMinHeap<K> implements ObjectHeap<K> { public void add(K o) { final Object co = o; // System.err.println("Add: " + o); - if (size < TWO_HEAP_MAX_SIZE) { - if (size >= twoheap.length) { - // Grow by one layer. - twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); - } - final int twopos = size; - twoheap[twopos] = co; - ++size; - heapifyUp2(twopos, co); - ++modCount; - } else { - final int fourpos = size - TWO_HEAP_MAX_SIZE; - if (fourheap == null) { - fourheap = new Object[FOUR_HEAP_INITIAL_SIZE]; - } else if (fourpos >= fourheap.length) { - // Grow extension heap by half. - fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); - } - fourheap[fourpos] = co; - ++size; - heapifyUp4(fourpos, co); - ++modCount; + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); } + final int twopos = size; + twoheap[twopos] = co; + ++size; + heapifyUp(twopos, co); } @Override @@ -209,7 +130,6 @@ public class ComparatorMinHeap<K> implements ObjectHeap<K> { public K replaceTopElement(K reinsert) { final Object ret = twoheap[0]; heapifyDown( reinsert); - ++modCount; return (K)ret; } @@ -219,7 +139,7 @@ public class ComparatorMinHeap<K> implements ObjectHeap<K> { * @param twopos Position in 2-ary heap. * @param cur Current object */ - private void heapifyUp2(int twopos, Object cur) { + private void heapifyUp(int twopos, Object cur) { while (twopos > 0) { final int parent = (twopos - 1) >>> 1; Object par = twoheap[parent]; @@ -232,81 +152,30 @@ public class ComparatorMinHeap<K> implements ObjectHeap<K> { twoheap[twopos] = cur; } - /** - * Heapify-Up method for 4-ary heap. - * - * @param fourpos Position in 4-ary heap. - * @param cur Current object - */ - private void heapifyUp4(int fourpos, Object cur) { - while (fourpos > 0) { - final int parent = (fourpos - 1) >> 2; - Object par = fourheap[parent]; - if (comparator.compare(cur, par) >= 0) { - break; - } - fourheap[fourpos] = par; - fourpos = parent; - } - if (fourpos == 0 && comparator.compare(twoheap[0], cur) > 0) { - fourheap[0] = twoheap[0]; - twoheap[0] = cur; - } else { - fourheap[fourpos] = cur; - } - } - @Override @SuppressWarnings("unchecked") public K poll() { final Object ret = twoheap[0]; --size; // Replacement object: - if (size >= TWO_HEAP_MAX_SIZE) { - final int last = size - TWO_HEAP_MAX_SIZE; - final Object reinsert = fourheap[last]; - fourheap[last] = null; - heapifyDown(reinsert); - } else if (size > 0) { + if (size > 0) { final Object reinsert = twoheap[size]; twoheap[size] = null; heapifyDown(reinsert); } else { twoheap[0] = null; } - ++modCount; return (K)ret; } /** * Invoke heapify-down for the root object. * - * @param reinsert Object to insert. - */ - private void heapifyDown(Object reinsert) { - if (size > TWO_HEAP_MAX_SIZE) { - // Special case: 3-ary situation. - final int best = (comparator.compare(twoheap[1], twoheap[2]) <= 0) ? 1 : 2; - if (comparator.compare(fourheap[0], twoheap[best]) < 0) { - twoheap[0] = fourheap[0]; - heapifyDown4(0, reinsert); - } else { - twoheap[0] = twoheap[best]; - heapifyDown2(best, reinsert); - } - return; - } - heapifyDown2(0, reinsert); - } - - /** - * Heapify-Down for 2-ary heap. - * - * @param twopos Position in 2-ary heap. - * @param cur Current object + * @param cur Object to insert. */ - private void heapifyDown2(int twopos, Object cur) { - final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + private void heapifyDown(Object cur) { + final int stop = size >>> 1; + int twopos = 0; while (twopos < stop) { int bestchild = (twopos << 1) + 1; Object best = twoheap[bestchild]; @@ -324,51 +193,6 @@ public class ComparatorMinHeap<K> implements ObjectHeap<K> { twoheap[twopos] = cur; } - /** - * Heapify-Down for 4-ary heap. - * - * @param fourpos Position in 4-ary heap. - * @param cur Current object - */ - private void heapifyDown4(int fourpos, Object cur) { - final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; - while (fourpos < stop) { - final int child = (fourpos << 2) + 1; - Object best = fourheap[child]; - int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; - if (size > minsize) { - Object nextchild = fourheap[candidate]; - if (comparator.compare(best, nextchild) > 0) { - bestchild = candidate; - best = nextchild; - } - - minsize += 2; - if (size >= minsize) { - nextchild = fourheap[++candidate]; - if (comparator.compare(best, nextchild) > 0) { - bestchild = candidate; - best = nextchild; - } - - if (size > minsize) { - nextchild = fourheap[++candidate]; - if (comparator.compare(best, nextchild) > 0) { - bestchild = candidate; - best = nextchild; - } - } - } - } - if (comparator.compare(cur, best) <= 0) { - break; - } - fourheap[fourpos] = best; - fourpos = bestchild; - } - fourheap[fourpos] = cur; - } - @Override @SuppressWarnings("unchecked") public K peek() { @@ -412,16 +236,8 @@ public class ComparatorMinHeap<K> implements ObjectHeap<K> { */ protected int pos = 0; - /** - * Modification counter we were initialized at. - */ - protected final int myModCount = modCount; - @Override public boolean valid() { - if (modCount != myModCount) { - throw new ConcurrentModificationException(); - } return pos < size; } @@ -434,7 +250,7 @@ public class ComparatorMinHeap<K> implements ObjectHeap<K> { @Override public K get() { - return (K)((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + return (K)twoheap[pos]; } } } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleHeap.java index acf77d86..c82f2a4a 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleHeap.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerHeap.java index c3bf85f4..5d8d31f7 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerHeap.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerMaxHeap.java index 34f1e889..e903c23d 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerMaxHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerMaxHeap.java @@ -24,32 +24,11 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; */ import java.util.Arrays; -import java.util.ConcurrentModificationException; import de.lmu.ifi.dbs.elki.math.MathUtil; /** - * Advanced priority queue class, based on a binary heap (for small sizes), - * which will for larger heaps be accompanied by a 4-ary heap (attached below - * the root of the two-ary heap, making the root actually 3-ary). - * - * This code was automatically instantiated for the types: Double and Integer - * - * This combination was found to work quite well in benchmarks, but YMMV. - * - * Some other observations from benchmarking: - * <ul> - * <li>Bulk loading did not improve things</li> - * <li>Primitive heaps are substantially faster.</li> - * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and - * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> - * <li>Workload makes a huge difference. A load-once, poll-until-empty priority - * queue is something different than e.g. a top-k heap, which will see a lot of - * top element replacements.</li> - * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for - * top-k make a difference.</li> - * <li>Different day, different benchmark results ...</li> - * </ul> + * Binary heap for primitive types. * * @author Erich Schubert * @@ -67,49 +46,16 @@ public class DoubleIntegerMaxHeap implements DoubleIntegerHeap { protected int[] twovals; /** - * Extension heap. - */ - protected double[] fourheap; - - /** - * Extension heapvalues. - */ - protected int[] fourvals; - - /** * Current size of heap. */ protected int size; /** - * (Structural) modification counter. Used to invalidate iterators. - */ - protected int modCount = 0; - - /** - * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. - */ - private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; - - /** * Initial size of the 2-ary heap. */ private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; /** - * Initial size of 4-ary heap when initialized. - * - * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 - * - * 85 = 4-ary heap of height 3: 21 + 4*4*4 - * - * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 - * - * Since we last grew by 255 (to 511), let's use 341. - */ - private final static int FOUR_HEAP_INITIAL_SIZE = 341; - - /** * Constructor, with default size. */ public DoubleIntegerMaxHeap() { @@ -119,10 +65,6 @@ public class DoubleIntegerMaxHeap implements DoubleIntegerHeap { this.twoheap = twoheap; this.twovals = twovals; - this.fourheap = null; - this.fourvals = null; - this.size = 0; - this.modCount = 0; } /** @@ -132,35 +74,17 @@ public class DoubleIntegerMaxHeap implements DoubleIntegerHeap { */ public DoubleIntegerMaxHeap(int minsize) { super(); - if (minsize < TWO_HEAP_MAX_SIZE) { - final int size = MathUtil.nextPow2Int(minsize + 1) - 1; - double[] twoheap = new double[size]; - int[] twovals = new int[size]; + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + double[] twoheap = new double[size]; + int[] twovals = new int[size]; - this.twoheap = twoheap; - this.twovals = twovals; - this.fourheap = null; - this.fourvals = null; - } else { - double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE]; - int[] twovals = new int[TWO_HEAP_INITIAL_SIZE]; - double[] fourheap = new double[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; - int[] fourvals = new int[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; - this.twoheap = twoheap; - this.twovals = twovals; - this.fourheap = fourheap; - this.fourvals = fourvals; - } - this.size = 0; - this.modCount = 0; + this.twoheap = twoheap; + this.twovals = twovals; } @Override public void clear() { size = 0; - ++modCount; - fourheap = null; - fourvals = null; Arrays.fill(twoheap, 0.0); Arrays.fill(twovals, 0); } @@ -180,34 +104,16 @@ public class DoubleIntegerMaxHeap implements DoubleIntegerHeap { final double co = o; final int cv = v; // System.err.println("Add: " + o); - if (size < TWO_HEAP_MAX_SIZE) { - if (size >= twoheap.length) { - // Grow by one layer. - twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); - twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1); - } - final int twopos = size; - twoheap[twopos] = co; - twovals[twopos] = cv; - ++size; - heapifyUp2(twopos, co, cv); - ++modCount; - } else { - final int fourpos = size - TWO_HEAP_MAX_SIZE; - if (fourheap == null) { - fourheap = new double[FOUR_HEAP_INITIAL_SIZE]; - fourvals = new int[FOUR_HEAP_INITIAL_SIZE]; - } else if (fourpos >= fourheap.length) { - // Grow extension heap by half. - fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); - fourvals = Arrays.copyOf(fourvals, fourvals.length + (fourvals.length >> 1)); - } - fourheap[fourpos] = co; - fourvals[fourpos] = cv; - ++size; - heapifyUp4(fourpos, co, cv); - ++modCount; + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); + twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1); } + final int twopos = size; + twoheap[twopos] = co; + twovals[twopos] = cv; + ++size; + heapifyUp(twopos, co, cv); } @Override @@ -222,7 +128,6 @@ public class DoubleIntegerMaxHeap implements DoubleIntegerHeap { @Override public void replaceTopElement(double reinsert, int val) { heapifyDown(reinsert, val); - ++modCount; } /** @@ -232,7 +137,7 @@ public class DoubleIntegerMaxHeap implements DoubleIntegerHeap { * @param cur Current object * @param val Current value */ - private void heapifyUp2(int twopos, double cur, int val) { + private void heapifyUp(int twopos, double cur, int val) { while (twopos > 0) { final int parent = (twopos - 1) >>> 1; double par = twoheap[parent]; @@ -247,47 +152,11 @@ public class DoubleIntegerMaxHeap implements DoubleIntegerHeap { twovals[twopos] = val; } - /** - * Heapify-Up method for 4-ary heap. - * - * @param fourpos Position in 4-ary heap. - * @param cur Current object - * @param val Current value - */ - private void heapifyUp4(int fourpos, double cur, int val) { - while (fourpos > 0) { - final int parent = (fourpos - 1) >> 2; - double par = fourheap[parent]; - if (cur <= par) { - break; - } - fourheap[fourpos] = par; - fourvals[fourpos] = fourvals[parent]; - fourpos = parent; - } - if (fourpos == 0 && twoheap[0] < cur) { - fourheap[0] = twoheap[0]; - fourvals[0] = twovals[0]; - twoheap[0] = cur; - twovals[0] = val; - } else { - fourheap[fourpos] = cur; - fourvals[fourpos] = val; - } - } - @Override public void poll() { --size; // Replacement object: - if (size >= TWO_HEAP_MAX_SIZE) { - final int last = size - TWO_HEAP_MAX_SIZE; - final double reinsert = fourheap[last]; - final int reinsertv = fourvals[last]; - fourheap[last] = 0.0; - fourvals[last] = 0; - heapifyDown(reinsert, reinsertv); - } else if (size > 0) { + if (size > 0) { final double reinsert = twoheap[size]; final int reinsertv = twovals[size]; twoheap[size] = 0.0; @@ -297,42 +166,17 @@ public class DoubleIntegerMaxHeap implements DoubleIntegerHeap { twoheap[0] = 0.0; twovals[0] = 0; } - ++modCount; } /** * Invoke heapify-down for the root object. * - * @param reinsert Object to insert. - * @param val Value to reinsert. - */ - private void heapifyDown(double reinsert, int val) { - if (size > TWO_HEAP_MAX_SIZE) { - // Special case: 3-ary situation. - final int best = (twoheap[1] >= twoheap[2]) ? 1 : 2; - if (fourheap[0] > twoheap[best]) { - twoheap[0] = fourheap[0]; - twovals[0] = fourvals[0]; - heapifyDown4(0, reinsert, val); - } else { - twoheap[0] = twoheap[best]; - twovals[0] = twovals[best]; - heapifyDown2(best, reinsert, val); - } - return; - } - heapifyDown2(0, reinsert, val); - } - - /** - * Heapify-Down for 2-ary heap. - * - * @param twopos Position in 2-ary heap. - * @param cur Current object + * @param cur Object to insert. * @param val Value to reinsert. */ - private void heapifyDown2(int twopos, double cur, int val) { - final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + private void heapifyDown(double cur, int val) { + final int stop = size >>> 1; + int twopos = 0; while (twopos < stop) { int bestchild = (twopos << 1) + 1; double best = twoheap[bestchild]; @@ -352,54 +196,6 @@ public class DoubleIntegerMaxHeap implements DoubleIntegerHeap { twovals[twopos] = val; } - /** - * Heapify-Down for 4-ary heap. - * - * @param fourpos Position in 4-ary heap. - * @param cur Current object - * @param val Value to reinsert. - */ - private void heapifyDown4(int fourpos, double cur, int val) { - final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; - while (fourpos < stop) { - final int child = (fourpos << 2) + 1; - double best = fourheap[child]; - int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; - if (size > minsize) { - double nextchild = fourheap[candidate]; - if (best < nextchild) { - bestchild = candidate; - best = nextchild; - } - - minsize += 2; - if (size >= minsize) { - nextchild = fourheap[++candidate]; - if (best < nextchild) { - bestchild = candidate; - best = nextchild; - } - - if (size > minsize) { - nextchild = fourheap[++candidate]; - if (best < nextchild) { - bestchild = candidate; - best = nextchild; - } - } - } - } - if (cur >= best) { - break; - } - fourheap[fourpos] = best; - fourvals[fourpos] = fourvals[bestchild]; - fourpos = bestchild; - } - fourheap[fourpos] = cur; - fourvals[fourpos] = val; - } - @Override public double peekKey() { return twoheap[0]; @@ -447,16 +243,8 @@ public class DoubleIntegerMaxHeap implements DoubleIntegerHeap { */ protected int pos = 0; - /** - * Modification counter we were initialized at. - */ - protected final int myModCount = modCount; - @Override public boolean valid() { - if (modCount != myModCount) { - throw new ConcurrentModificationException(); - } return pos < size; } @@ -467,12 +255,12 @@ public class DoubleIntegerMaxHeap implements DoubleIntegerHeap { @Override public double getKey() { - return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + return twoheap[pos]; } @Override public int getValue() { - return ((pos < TWO_HEAP_MAX_SIZE) ? twovals[pos] : fourvals[pos - TWO_HEAP_MAX_SIZE]); + return twovals[pos]; } } } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerMinHeap.java index ca6192ad..0e4e2204 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerMinHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerMinHeap.java @@ -24,32 +24,11 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; */ import java.util.Arrays; -import java.util.ConcurrentModificationException; import de.lmu.ifi.dbs.elki.math.MathUtil; /** - * Advanced priority queue class, based on a binary heap (for small sizes), - * which will for larger heaps be accompanied by a 4-ary heap (attached below - * the root of the two-ary heap, making the root actually 3-ary). - * - * This code was automatically instantiated for the types: Double and Integer - * - * This combination was found to work quite well in benchmarks, but YMMV. - * - * Some other observations from benchmarking: - * <ul> - * <li>Bulk loading did not improve things</li> - * <li>Primitive heaps are substantially faster.</li> - * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and - * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> - * <li>Workload makes a huge difference. A load-once, poll-until-empty priority - * queue is something different than e.g. a top-k heap, which will see a lot of - * top element replacements.</li> - * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for - * top-k make a difference.</li> - * <li>Different day, different benchmark results ...</li> - * </ul> + * Binary heap for primitive types. * * @author Erich Schubert * @@ -67,49 +46,16 @@ public class DoubleIntegerMinHeap implements DoubleIntegerHeap { protected int[] twovals; /** - * Extension heap. - */ - protected double[] fourheap; - - /** - * Extension heapvalues. - */ - protected int[] fourvals; - - /** * Current size of heap. */ protected int size; /** - * (Structural) modification counter. Used to invalidate iterators. - */ - protected int modCount = 0; - - /** - * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. - */ - private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; - - /** * Initial size of the 2-ary heap. */ private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; /** - * Initial size of 4-ary heap when initialized. - * - * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 - * - * 85 = 4-ary heap of height 3: 21 + 4*4*4 - * - * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 - * - * Since we last grew by 255 (to 511), let's use 341. - */ - private final static int FOUR_HEAP_INITIAL_SIZE = 341; - - /** * Constructor, with default size. */ public DoubleIntegerMinHeap() { @@ -119,10 +65,6 @@ public class DoubleIntegerMinHeap implements DoubleIntegerHeap { this.twoheap = twoheap; this.twovals = twovals; - this.fourheap = null; - this.fourvals = null; - this.size = 0; - this.modCount = 0; } /** @@ -132,35 +74,17 @@ public class DoubleIntegerMinHeap implements DoubleIntegerHeap { */ public DoubleIntegerMinHeap(int minsize) { super(); - if (minsize < TWO_HEAP_MAX_SIZE) { - final int size = MathUtil.nextPow2Int(minsize + 1) - 1; - double[] twoheap = new double[size]; - int[] twovals = new int[size]; + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + double[] twoheap = new double[size]; + int[] twovals = new int[size]; - this.twoheap = twoheap; - this.twovals = twovals; - this.fourheap = null; - this.fourvals = null; - } else { - double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE]; - int[] twovals = new int[TWO_HEAP_INITIAL_SIZE]; - double[] fourheap = new double[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; - int[] fourvals = new int[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; - this.twoheap = twoheap; - this.twovals = twovals; - this.fourheap = fourheap; - this.fourvals = fourvals; - } - this.size = 0; - this.modCount = 0; + this.twoheap = twoheap; + this.twovals = twovals; } @Override public void clear() { size = 0; - ++modCount; - fourheap = null; - fourvals = null; Arrays.fill(twoheap, 0.0); Arrays.fill(twovals, 0); } @@ -180,34 +104,16 @@ public class DoubleIntegerMinHeap implements DoubleIntegerHeap { final double co = o; final int cv = v; // System.err.println("Add: " + o); - if (size < TWO_HEAP_MAX_SIZE) { - if (size >= twoheap.length) { - // Grow by one layer. - twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); - twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1); - } - final int twopos = size; - twoheap[twopos] = co; - twovals[twopos] = cv; - ++size; - heapifyUp2(twopos, co, cv); - ++modCount; - } else { - final int fourpos = size - TWO_HEAP_MAX_SIZE; - if (fourheap == null) { - fourheap = new double[FOUR_HEAP_INITIAL_SIZE]; - fourvals = new int[FOUR_HEAP_INITIAL_SIZE]; - } else if (fourpos >= fourheap.length) { - // Grow extension heap by half. - fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); - fourvals = Arrays.copyOf(fourvals, fourvals.length + (fourvals.length >> 1)); - } - fourheap[fourpos] = co; - fourvals[fourpos] = cv; - ++size; - heapifyUp4(fourpos, co, cv); - ++modCount; + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); + twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1); } + final int twopos = size; + twoheap[twopos] = co; + twovals[twopos] = cv; + ++size; + heapifyUp(twopos, co, cv); } @Override @@ -222,7 +128,6 @@ public class DoubleIntegerMinHeap implements DoubleIntegerHeap { @Override public void replaceTopElement(double reinsert, int val) { heapifyDown(reinsert, val); - ++modCount; } /** @@ -232,7 +137,7 @@ public class DoubleIntegerMinHeap implements DoubleIntegerHeap { * @param cur Current object * @param val Current value */ - private void heapifyUp2(int twopos, double cur, int val) { + private void heapifyUp(int twopos, double cur, int val) { while (twopos > 0) { final int parent = (twopos - 1) >>> 1; double par = twoheap[parent]; @@ -247,47 +152,11 @@ public class DoubleIntegerMinHeap implements DoubleIntegerHeap { twovals[twopos] = val; } - /** - * Heapify-Up method for 4-ary heap. - * - * @param fourpos Position in 4-ary heap. - * @param cur Current object - * @param val Current value - */ - private void heapifyUp4(int fourpos, double cur, int val) { - while (fourpos > 0) { - final int parent = (fourpos - 1) >> 2; - double par = fourheap[parent]; - if (cur >= par) { - break; - } - fourheap[fourpos] = par; - fourvals[fourpos] = fourvals[parent]; - fourpos = parent; - } - if (fourpos == 0 && twoheap[0] > cur) { - fourheap[0] = twoheap[0]; - fourvals[0] = twovals[0]; - twoheap[0] = cur; - twovals[0] = val; - } else { - fourheap[fourpos] = cur; - fourvals[fourpos] = val; - } - } - @Override public void poll() { --size; // Replacement object: - if (size >= TWO_HEAP_MAX_SIZE) { - final int last = size - TWO_HEAP_MAX_SIZE; - final double reinsert = fourheap[last]; - final int reinsertv = fourvals[last]; - fourheap[last] = 0.0; - fourvals[last] = 0; - heapifyDown(reinsert, reinsertv); - } else if (size > 0) { + if (size > 0) { final double reinsert = twoheap[size]; final int reinsertv = twovals[size]; twoheap[size] = 0.0; @@ -297,42 +166,17 @@ public class DoubleIntegerMinHeap implements DoubleIntegerHeap { twoheap[0] = 0.0; twovals[0] = 0; } - ++modCount; } /** * Invoke heapify-down for the root object. * - * @param reinsert Object to insert. - * @param val Value to reinsert. - */ - private void heapifyDown(double reinsert, int val) { - if (size > TWO_HEAP_MAX_SIZE) { - // Special case: 3-ary situation. - final int best = (twoheap[1] <= twoheap[2]) ? 1 : 2; - if (fourheap[0] < twoheap[best]) { - twoheap[0] = fourheap[0]; - twovals[0] = fourvals[0]; - heapifyDown4(0, reinsert, val); - } else { - twoheap[0] = twoheap[best]; - twovals[0] = twovals[best]; - heapifyDown2(best, reinsert, val); - } - return; - } - heapifyDown2(0, reinsert, val); - } - - /** - * Heapify-Down for 2-ary heap. - * - * @param twopos Position in 2-ary heap. - * @param cur Current object + * @param cur Object to insert. * @param val Value to reinsert. */ - private void heapifyDown2(int twopos, double cur, int val) { - final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + private void heapifyDown(double cur, int val) { + final int stop = size >>> 1; + int twopos = 0; while (twopos < stop) { int bestchild = (twopos << 1) + 1; double best = twoheap[bestchild]; @@ -352,54 +196,6 @@ public class DoubleIntegerMinHeap implements DoubleIntegerHeap { twovals[twopos] = val; } - /** - * Heapify-Down for 4-ary heap. - * - * @param fourpos Position in 4-ary heap. - * @param cur Current object - * @param val Value to reinsert. - */ - private void heapifyDown4(int fourpos, double cur, int val) { - final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; - while (fourpos < stop) { - final int child = (fourpos << 2) + 1; - double best = fourheap[child]; - int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; - if (size > minsize) { - double nextchild = fourheap[candidate]; - if (best > nextchild) { - bestchild = candidate; - best = nextchild; - } - - minsize += 2; - if (size >= minsize) { - nextchild = fourheap[++candidate]; - if (best > nextchild) { - bestchild = candidate; - best = nextchild; - } - - if (size > minsize) { - nextchild = fourheap[++candidate]; - if (best > nextchild) { - bestchild = candidate; - best = nextchild; - } - } - } - } - if (cur <= best) { - break; - } - fourheap[fourpos] = best; - fourvals[fourpos] = fourvals[bestchild]; - fourpos = bestchild; - } - fourheap[fourpos] = cur; - fourvals[fourpos] = val; - } - @Override public double peekKey() { return twoheap[0]; @@ -447,16 +243,8 @@ public class DoubleIntegerMinHeap implements DoubleIntegerHeap { */ protected int pos = 0; - /** - * Modification counter we were initialized at. - */ - protected final int myModCount = modCount; - @Override public boolean valid() { - if (modCount != myModCount) { - throw new ConcurrentModificationException(); - } return pos < size; } @@ -467,12 +255,12 @@ public class DoubleIntegerMinHeap implements DoubleIntegerHeap { @Override public double getKey() { - return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + return twoheap[pos]; } @Override public int getValue() { - return ((pos < TWO_HEAP_MAX_SIZE) ? twovals[pos] : fourvals[pos - TWO_HEAP_MAX_SIZE]); + return twovals[pos]; } } } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleLongMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleLongMaxHeap.java index 6d15656c..b7508a61 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleLongMaxHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleLongMaxHeap.java @@ -24,32 +24,11 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; */ import java.util.Arrays; -import java.util.ConcurrentModificationException; import de.lmu.ifi.dbs.elki.math.MathUtil; /** - * Advanced priority queue class, based on a binary heap (for small sizes), - * which will for larger heaps be accompanied by a 4-ary heap (attached below - * the root of the two-ary heap, making the root actually 3-ary). - * - * This code was automatically instantiated for the types: Double and Long - * - * This combination was found to work quite well in benchmarks, but YMMV. - * - * Some other observations from benchmarking: - * <ul> - * <li>Bulk loading did not improve things</li> - * <li>Primitive heaps are substantially faster.</li> - * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and - * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> - * <li>Workload makes a huge difference. A load-once, poll-until-empty priority - * queue is something different than e.g. a top-k heap, which will see a lot of - * top element replacements.</li> - * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for - * top-k make a difference.</li> - * <li>Different day, different benchmark results ...</li> - * </ul> + * Binary heap for primitive types. * * @author Erich Schubert * @@ -67,49 +46,16 @@ public class DoubleLongMaxHeap implements DoubleLongHeap { protected long[] twovals; /** - * Extension heap. - */ - protected double[] fourheap; - - /** - * Extension heapvalues. - */ - protected long[] fourvals; - - /** * Current size of heap. */ protected int size; /** - * (Structural) modification counter. Used to invalidate iterators. - */ - protected int modCount = 0; - - /** - * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. - */ - private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; - - /** * Initial size of the 2-ary heap. */ private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; /** - * Initial size of 4-ary heap when initialized. - * - * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 - * - * 85 = 4-ary heap of height 3: 21 + 4*4*4 - * - * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 - * - * Since we last grew by 255 (to 511), let's use 341. - */ - private final static int FOUR_HEAP_INITIAL_SIZE = 341; - - /** * Constructor, with default size. */ public DoubleLongMaxHeap() { @@ -119,10 +65,6 @@ public class DoubleLongMaxHeap implements DoubleLongHeap { this.twoheap = twoheap; this.twovals = twovals; - this.fourheap = null; - this.fourvals = null; - this.size = 0; - this.modCount = 0; } /** @@ -132,35 +74,17 @@ public class DoubleLongMaxHeap implements DoubleLongHeap { */ public DoubleLongMaxHeap(int minsize) { super(); - if (minsize < TWO_HEAP_MAX_SIZE) { - final int size = MathUtil.nextPow2Int(minsize + 1) - 1; - double[] twoheap = new double[size]; - long[] twovals = new long[size]; + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + double[] twoheap = new double[size]; + long[] twovals = new long[size]; - this.twoheap = twoheap; - this.twovals = twovals; - this.fourheap = null; - this.fourvals = null; - } else { - double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE]; - long[] twovals = new long[TWO_HEAP_INITIAL_SIZE]; - double[] fourheap = new double[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; - long[] fourvals = new long[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; - this.twoheap = twoheap; - this.twovals = twovals; - this.fourheap = fourheap; - this.fourvals = fourvals; - } - this.size = 0; - this.modCount = 0; + this.twoheap = twoheap; + this.twovals = twovals; } @Override public void clear() { size = 0; - ++modCount; - fourheap = null; - fourvals = null; Arrays.fill(twoheap, 0.0); Arrays.fill(twovals, 0); } @@ -180,34 +104,16 @@ public class DoubleLongMaxHeap implements DoubleLongHeap { final double co = o; final long cv = v; // System.err.println("Add: " + o); - if (size < TWO_HEAP_MAX_SIZE) { - if (size >= twoheap.length) { - // Grow by one layer. - twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); - twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1); - } - final int twopos = size; - twoheap[twopos] = co; - twovals[twopos] = cv; - ++size; - heapifyUp2(twopos, co, cv); - ++modCount; - } else { - final int fourpos = size - TWO_HEAP_MAX_SIZE; - if (fourheap == null) { - fourheap = new double[FOUR_HEAP_INITIAL_SIZE]; - fourvals = new long[FOUR_HEAP_INITIAL_SIZE]; - } else if (fourpos >= fourheap.length) { - // Grow extension heap by half. - fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); - fourvals = Arrays.copyOf(fourvals, fourvals.length + (fourvals.length >> 1)); - } - fourheap[fourpos] = co; - fourvals[fourpos] = cv; - ++size; - heapifyUp4(fourpos, co, cv); - ++modCount; + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); + twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1); } + final int twopos = size; + twoheap[twopos] = co; + twovals[twopos] = cv; + ++size; + heapifyUp(twopos, co, cv); } @Override @@ -222,7 +128,6 @@ public class DoubleLongMaxHeap implements DoubleLongHeap { @Override public void replaceTopElement(double reinsert, long val) { heapifyDown(reinsert, val); - ++modCount; } /** @@ -232,7 +137,7 @@ public class DoubleLongMaxHeap implements DoubleLongHeap { * @param cur Current object * @param val Current value */ - private void heapifyUp2(int twopos, double cur, long val) { + private void heapifyUp(int twopos, double cur, long val) { while (twopos > 0) { final int parent = (twopos - 1) >>> 1; double par = twoheap[parent]; @@ -247,47 +152,11 @@ public class DoubleLongMaxHeap implements DoubleLongHeap { twovals[twopos] = val; } - /** - * Heapify-Up method for 4-ary heap. - * - * @param fourpos Position in 4-ary heap. - * @param cur Current object - * @param val Current value - */ - private void heapifyUp4(int fourpos, double cur, long val) { - while (fourpos > 0) { - final int parent = (fourpos - 1) >> 2; - double par = fourheap[parent]; - if (cur <= par) { - break; - } - fourheap[fourpos] = par; - fourvals[fourpos] = fourvals[parent]; - fourpos = parent; - } - if (fourpos == 0 && twoheap[0] < cur) { - fourheap[0] = twoheap[0]; - fourvals[0] = twovals[0]; - twoheap[0] = cur; - twovals[0] = val; - } else { - fourheap[fourpos] = cur; - fourvals[fourpos] = val; - } - } - @Override public void poll() { --size; // Replacement object: - if (size >= TWO_HEAP_MAX_SIZE) { - final int last = size - TWO_HEAP_MAX_SIZE; - final double reinsert = fourheap[last]; - final long reinsertv = fourvals[last]; - fourheap[last] = 0.0; - fourvals[last] = 0; - heapifyDown(reinsert, reinsertv); - } else if (size > 0) { + if (size > 0) { final double reinsert = twoheap[size]; final long reinsertv = twovals[size]; twoheap[size] = 0.0; @@ -297,42 +166,17 @@ public class DoubleLongMaxHeap implements DoubleLongHeap { twoheap[0] = 0.0; twovals[0] = 0; } - ++modCount; } /** * Invoke heapify-down for the root object. * - * @param reinsert Object to insert. - * @param val Value to reinsert. - */ - private void heapifyDown(double reinsert, long val) { - if (size > TWO_HEAP_MAX_SIZE) { - // Special case: 3-ary situation. - final int best = (twoheap[1] >= twoheap[2]) ? 1 : 2; - if (fourheap[0] > twoheap[best]) { - twoheap[0] = fourheap[0]; - twovals[0] = fourvals[0]; - heapifyDown4(0, reinsert, val); - } else { - twoheap[0] = twoheap[best]; - twovals[0] = twovals[best]; - heapifyDown2(best, reinsert, val); - } - return; - } - heapifyDown2(0, reinsert, val); - } - - /** - * Heapify-Down for 2-ary heap. - * - * @param twopos Position in 2-ary heap. - * @param cur Current object + * @param cur Object to insert. * @param val Value to reinsert. */ - private void heapifyDown2(int twopos, double cur, long val) { - final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + private void heapifyDown(double cur, long val) { + final int stop = size >>> 1; + int twopos = 0; while (twopos < stop) { int bestchild = (twopos << 1) + 1; double best = twoheap[bestchild]; @@ -352,54 +196,6 @@ public class DoubleLongMaxHeap implements DoubleLongHeap { twovals[twopos] = val; } - /** - * Heapify-Down for 4-ary heap. - * - * @param fourpos Position in 4-ary heap. - * @param cur Current object - * @param val Value to reinsert. - */ - private void heapifyDown4(int fourpos, double cur, long val) { - final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; - while (fourpos < stop) { - final int child = (fourpos << 2) + 1; - double best = fourheap[child]; - int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; - if (size > minsize) { - double nextchild = fourheap[candidate]; - if (best < nextchild) { - bestchild = candidate; - best = nextchild; - } - - minsize += 2; - if (size >= minsize) { - nextchild = fourheap[++candidate]; - if (best < nextchild) { - bestchild = candidate; - best = nextchild; - } - - if (size > minsize) { - nextchild = fourheap[++candidate]; - if (best < nextchild) { - bestchild = candidate; - best = nextchild; - } - } - } - } - if (cur >= best) { - break; - } - fourheap[fourpos] = best; - fourvals[fourpos] = fourvals[bestchild]; - fourpos = bestchild; - } - fourheap[fourpos] = cur; - fourvals[fourpos] = val; - } - @Override public double peekKey() { return twoheap[0]; @@ -447,16 +243,8 @@ public class DoubleLongMaxHeap implements DoubleLongHeap { */ protected int pos = 0; - /** - * Modification counter we were initialized at. - */ - protected final int myModCount = modCount; - @Override public boolean valid() { - if (modCount != myModCount) { - throw new ConcurrentModificationException(); - } return pos < size; } @@ -467,12 +255,12 @@ public class DoubleLongMaxHeap implements DoubleLongHeap { @Override public double getKey() { - return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + return twoheap[pos]; } @Override public long getValue() { - return ((pos < TWO_HEAP_MAX_SIZE) ? twovals[pos] : fourvals[pos - TWO_HEAP_MAX_SIZE]); + return twovals[pos]; } } } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleLongMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleLongMinHeap.java index d38eb6e3..9fbe0300 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleLongMinHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleLongMinHeap.java @@ -24,32 +24,11 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; */ import java.util.Arrays; -import java.util.ConcurrentModificationException; import de.lmu.ifi.dbs.elki.math.MathUtil; /** - * Advanced priority queue class, based on a binary heap (for small sizes), - * which will for larger heaps be accompanied by a 4-ary heap (attached below - * the root of the two-ary heap, making the root actually 3-ary). - * - * This code was automatically instantiated for the types: Double and Long - * - * This combination was found to work quite well in benchmarks, but YMMV. - * - * Some other observations from benchmarking: - * <ul> - * <li>Bulk loading did not improve things</li> - * <li>Primitive heaps are substantially faster.</li> - * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and - * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> - * <li>Workload makes a huge difference. A load-once, poll-until-empty priority - * queue is something different than e.g. a top-k heap, which will see a lot of - * top element replacements.</li> - * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for - * top-k make a difference.</li> - * <li>Different day, different benchmark results ...</li> - * </ul> + * Binary heap for primitive types. * * @author Erich Schubert * @@ -67,49 +46,16 @@ public class DoubleLongMinHeap implements DoubleLongHeap { protected long[] twovals; /** - * Extension heap. - */ - protected double[] fourheap; - - /** - * Extension heapvalues. - */ - protected long[] fourvals; - - /** * Current size of heap. */ protected int size; /** - * (Structural) modification counter. Used to invalidate iterators. - */ - protected int modCount = 0; - - /** - * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. - */ - private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; - - /** * Initial size of the 2-ary heap. */ private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; /** - * Initial size of 4-ary heap when initialized. - * - * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 - * - * 85 = 4-ary heap of height 3: 21 + 4*4*4 - * - * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 - * - * Since we last grew by 255 (to 511), let's use 341. - */ - private final static int FOUR_HEAP_INITIAL_SIZE = 341; - - /** * Constructor, with default size. */ public DoubleLongMinHeap() { @@ -119,10 +65,6 @@ public class DoubleLongMinHeap implements DoubleLongHeap { this.twoheap = twoheap; this.twovals = twovals; - this.fourheap = null; - this.fourvals = null; - this.size = 0; - this.modCount = 0; } /** @@ -132,35 +74,17 @@ public class DoubleLongMinHeap implements DoubleLongHeap { */ public DoubleLongMinHeap(int minsize) { super(); - if (minsize < TWO_HEAP_MAX_SIZE) { - final int size = MathUtil.nextPow2Int(minsize + 1) - 1; - double[] twoheap = new double[size]; - long[] twovals = new long[size]; + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + double[] twoheap = new double[size]; + long[] twovals = new long[size]; - this.twoheap = twoheap; - this.twovals = twovals; - this.fourheap = null; - this.fourvals = null; - } else { - double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE]; - long[] twovals = new long[TWO_HEAP_INITIAL_SIZE]; - double[] fourheap = new double[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; - long[] fourvals = new long[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; - this.twoheap = twoheap; - this.twovals = twovals; - this.fourheap = fourheap; - this.fourvals = fourvals; - } - this.size = 0; - this.modCount = 0; + this.twoheap = twoheap; + this.twovals = twovals; } @Override public void clear() { size = 0; - ++modCount; - fourheap = null; - fourvals = null; Arrays.fill(twoheap, 0.0); Arrays.fill(twovals, 0); } @@ -180,34 +104,16 @@ public class DoubleLongMinHeap implements DoubleLongHeap { final double co = o; final long cv = v; // System.err.println("Add: " + o); - if (size < TWO_HEAP_MAX_SIZE) { - if (size >= twoheap.length) { - // Grow by one layer. - twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); - twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1); - } - final int twopos = size; - twoheap[twopos] = co; - twovals[twopos] = cv; - ++size; - heapifyUp2(twopos, co, cv); - ++modCount; - } else { - final int fourpos = size - TWO_HEAP_MAX_SIZE; - if (fourheap == null) { - fourheap = new double[FOUR_HEAP_INITIAL_SIZE]; - fourvals = new long[FOUR_HEAP_INITIAL_SIZE]; - } else if (fourpos >= fourheap.length) { - // Grow extension heap by half. - fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); - fourvals = Arrays.copyOf(fourvals, fourvals.length + (fourvals.length >> 1)); - } - fourheap[fourpos] = co; - fourvals[fourpos] = cv; - ++size; - heapifyUp4(fourpos, co, cv); - ++modCount; + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); + twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1); } + final int twopos = size; + twoheap[twopos] = co; + twovals[twopos] = cv; + ++size; + heapifyUp(twopos, co, cv); } @Override @@ -222,7 +128,6 @@ public class DoubleLongMinHeap implements DoubleLongHeap { @Override public void replaceTopElement(double reinsert, long val) { heapifyDown(reinsert, val); - ++modCount; } /** @@ -232,7 +137,7 @@ public class DoubleLongMinHeap implements DoubleLongHeap { * @param cur Current object * @param val Current value */ - private void heapifyUp2(int twopos, double cur, long val) { + private void heapifyUp(int twopos, double cur, long val) { while (twopos > 0) { final int parent = (twopos - 1) >>> 1; double par = twoheap[parent]; @@ -247,47 +152,11 @@ public class DoubleLongMinHeap implements DoubleLongHeap { twovals[twopos] = val; } - /** - * Heapify-Up method for 4-ary heap. - * - * @param fourpos Position in 4-ary heap. - * @param cur Current object - * @param val Current value - */ - private void heapifyUp4(int fourpos, double cur, long val) { - while (fourpos > 0) { - final int parent = (fourpos - 1) >> 2; - double par = fourheap[parent]; - if (cur >= par) { - break; - } - fourheap[fourpos] = par; - fourvals[fourpos] = fourvals[parent]; - fourpos = parent; - } - if (fourpos == 0 && twoheap[0] > cur) { - fourheap[0] = twoheap[0]; - fourvals[0] = twovals[0]; - twoheap[0] = cur; - twovals[0] = val; - } else { - fourheap[fourpos] = cur; - fourvals[fourpos] = val; - } - } - @Override public void poll() { --size; // Replacement object: - if (size >= TWO_HEAP_MAX_SIZE) { - final int last = size - TWO_HEAP_MAX_SIZE; - final double reinsert = fourheap[last]; - final long reinsertv = fourvals[last]; - fourheap[last] = 0.0; - fourvals[last] = 0; - heapifyDown(reinsert, reinsertv); - } else if (size > 0) { + if (size > 0) { final double reinsert = twoheap[size]; final long reinsertv = twovals[size]; twoheap[size] = 0.0; @@ -297,42 +166,17 @@ public class DoubleLongMinHeap implements DoubleLongHeap { twoheap[0] = 0.0; twovals[0] = 0; } - ++modCount; } /** * Invoke heapify-down for the root object. * - * @param reinsert Object to insert. - * @param val Value to reinsert. - */ - private void heapifyDown(double reinsert, long val) { - if (size > TWO_HEAP_MAX_SIZE) { - // Special case: 3-ary situation. - final int best = (twoheap[1] <= twoheap[2]) ? 1 : 2; - if (fourheap[0] < twoheap[best]) { - twoheap[0] = fourheap[0]; - twovals[0] = fourvals[0]; - heapifyDown4(0, reinsert, val); - } else { - twoheap[0] = twoheap[best]; - twovals[0] = twovals[best]; - heapifyDown2(best, reinsert, val); - } - return; - } - heapifyDown2(0, reinsert, val); - } - - /** - * Heapify-Down for 2-ary heap. - * - * @param twopos Position in 2-ary heap. - * @param cur Current object + * @param cur Object to insert. * @param val Value to reinsert. */ - private void heapifyDown2(int twopos, double cur, long val) { - final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + private void heapifyDown(double cur, long val) { + final int stop = size >>> 1; + int twopos = 0; while (twopos < stop) { int bestchild = (twopos << 1) + 1; double best = twoheap[bestchild]; @@ -352,54 +196,6 @@ public class DoubleLongMinHeap implements DoubleLongHeap { twovals[twopos] = val; } - /** - * Heapify-Down for 4-ary heap. - * - * @param fourpos Position in 4-ary heap. - * @param cur Current object - * @param val Value to reinsert. - */ - private void heapifyDown4(int fourpos, double cur, long val) { - final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; - while (fourpos < stop) { - final int child = (fourpos << 2) + 1; - double best = fourheap[child]; - int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; - if (size > minsize) { - double nextchild = fourheap[candidate]; - if (best > nextchild) { - bestchild = candidate; - best = nextchild; - } - - minsize += 2; - if (size >= minsize) { - nextchild = fourheap[++candidate]; - if (best > nextchild) { - bestchild = candidate; - best = nextchild; - } - - if (size > minsize) { - nextchild = fourheap[++candidate]; - if (best > nextchild) { - bestchild = candidate; - best = nextchild; - } - } - } - } - if (cur <= best) { - break; - } - fourheap[fourpos] = best; - fourvals[fourpos] = fourvals[bestchild]; - fourpos = bestchild; - } - fourheap[fourpos] = cur; - fourvals[fourpos] = val; - } - @Override public double peekKey() { return twoheap[0]; @@ -447,16 +243,8 @@ public class DoubleLongMinHeap implements DoubleLongHeap { */ protected int pos = 0; - /** - * Modification counter we were initialized at. - */ - protected final int myModCount = modCount; - @Override public boolean valid() { - if (modCount != myModCount) { - throw new ConcurrentModificationException(); - } return pos < size; } @@ -467,12 +255,12 @@ public class DoubleLongMinHeap implements DoubleLongHeap { @Override public double getKey() { - return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + return twoheap[pos]; } @Override public long getValue() { - return ((pos < TWO_HEAP_MAX_SIZE) ? twovals[pos] : fourvals[pos - TWO_HEAP_MAX_SIZE]); + return twovals[pos]; } } } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMaxHeap.java index 7ea28f14..2c74b34b 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMaxHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMaxHeap.java @@ -24,32 +24,11 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; */ import java.util.Arrays; -import java.util.ConcurrentModificationException; import de.lmu.ifi.dbs.elki.math.MathUtil; /** - * Advanced priority queue class, based on a binary heap (for small sizes), - * which will for larger heaps be accompanied by a 4-ary heap (attached below - * the root of the two-ary heap, making the root actually 3-ary). - * - * This code was automatically instantiated for the type: Double - * - * This combination was found to work quite well in benchmarks, but YMMV. - * - * Some other observations from benchmarking: - * <ul> - * <li>Bulk loading did not improve things</li> - * <li>Primitive heaps are substantially faster.</li> - * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and - * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> - * <li>Workload makes a huge difference. A load-once, poll-until-empty priority - * queue is something different than e.g. a top-k heap, which will see a lot of - * top element replacements.</li> - * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for - * top-k make a difference.</li> - * <li>Different day, different benchmark results ...</li> - * </ul> + * Binary heap for primitive types. * * @author Erich Schubert * @@ -62,44 +41,16 @@ public class DoubleMaxHeap implements DoubleHeap { protected double[] twoheap; /** - * Extension heap. - */ - protected double[] fourheap; - - /** * Current size of heap. */ protected int size; /** - * (Structural) modification counter. Used to invalidate iterators. - */ - protected int modCount = 0; - - /** - * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. - */ - private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; - - /** * Initial size of the 2-ary heap. */ private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; /** - * Initial size of 4-ary heap when initialized. - * - * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 - * - * 85 = 4-ary heap of height 3: 21 + 4*4*4 - * - * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 - * - * Since we last grew by 255 (to 511), let's use 341. - */ - private final static int FOUR_HEAP_INITIAL_SIZE = 341; - - /** * Constructor, with default size. */ public DoubleMaxHeap() { @@ -107,9 +58,6 @@ public class DoubleMaxHeap implements DoubleHeap { double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE]; this.twoheap = twoheap; - this.fourheap = null; - this.size = 0; - this.modCount = 0; } /** @@ -119,27 +67,15 @@ public class DoubleMaxHeap implements DoubleHeap { */ public DoubleMaxHeap(int minsize) { super(); - if (minsize < TWO_HEAP_MAX_SIZE) { - final int size = MathUtil.nextPow2Int(minsize + 1) - 1; - double[] twoheap = new double[size]; + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + double[] twoheap = new double[size]; - this.twoheap = twoheap; - this.fourheap = null; - } else { - double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE]; - double[] fourheap = new double[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; - this.twoheap = twoheap; - this.fourheap = fourheap; - } - this.size = 0; - this.modCount = 0; + this.twoheap = twoheap; } @Override public void clear() { size = 0; - ++modCount; - fourheap = null; Arrays.fill(twoheap, 0.0); } @@ -157,29 +93,14 @@ public class DoubleMaxHeap implements DoubleHeap { public void add(double o) { final double co = o; // System.err.println("Add: " + o); - if (size < TWO_HEAP_MAX_SIZE) { - if (size >= twoheap.length) { - // Grow by one layer. - twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); - } - final int twopos = size; - twoheap[twopos] = co; - ++size; - heapifyUp2(twopos, co); - ++modCount; - } else { - final int fourpos = size - TWO_HEAP_MAX_SIZE; - if (fourheap == null) { - fourheap = new double[FOUR_HEAP_INITIAL_SIZE]; - } else if (fourpos >= fourheap.length) { - // Grow extension heap by half. - fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); - } - fourheap[fourpos] = co; - ++size; - heapifyUp4(fourpos, co); - ++modCount; + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); } + final int twopos = size; + twoheap[twopos] = co; + ++size; + heapifyUp(twopos, co); } @Override @@ -195,7 +116,6 @@ public class DoubleMaxHeap implements DoubleHeap { public double replaceTopElement(double reinsert) { final double ret = twoheap[0]; heapifyDown( reinsert); - ++modCount; return ret; } @@ -205,7 +125,7 @@ public class DoubleMaxHeap implements DoubleHeap { * @param twopos Position in 2-ary heap. * @param cur Current object */ - private void heapifyUp2(int twopos, double cur) { + private void heapifyUp(int twopos, double cur) { while (twopos > 0) { final int parent = (twopos - 1) >>> 1; double par = twoheap[parent]; @@ -218,80 +138,29 @@ public class DoubleMaxHeap implements DoubleHeap { twoheap[twopos] = cur; } - /** - * Heapify-Up method for 4-ary heap. - * - * @param fourpos Position in 4-ary heap. - * @param cur Current object - */ - private void heapifyUp4(int fourpos, double cur) { - while (fourpos > 0) { - final int parent = (fourpos - 1) >> 2; - double par = fourheap[parent]; - if (cur <= par) { - break; - } - fourheap[fourpos] = par; - fourpos = parent; - } - if (fourpos == 0 && twoheap[0] < cur) { - fourheap[0] = twoheap[0]; - twoheap[0] = cur; - } else { - fourheap[fourpos] = cur; - } - } - @Override public double poll() { final double ret = twoheap[0]; --size; // Replacement object: - if (size >= TWO_HEAP_MAX_SIZE) { - final int last = size - TWO_HEAP_MAX_SIZE; - final double reinsert = fourheap[last]; - fourheap[last] = 0.0; - heapifyDown(reinsert); - } else if (size > 0) { + if (size > 0) { final double reinsert = twoheap[size]; twoheap[size] = 0.0; heapifyDown(reinsert); } else { twoheap[0] = 0.0; } - ++modCount; return ret; } /** * Invoke heapify-down for the root object. * - * @param reinsert Object to insert. - */ - private void heapifyDown(double reinsert) { - if (size > TWO_HEAP_MAX_SIZE) { - // Special case: 3-ary situation. - final int best = (twoheap[1] >= twoheap[2]) ? 1 : 2; - if (fourheap[0] > twoheap[best]) { - twoheap[0] = fourheap[0]; - heapifyDown4(0, reinsert); - } else { - twoheap[0] = twoheap[best]; - heapifyDown2(best, reinsert); - } - return; - } - heapifyDown2(0, reinsert); - } - - /** - * Heapify-Down for 2-ary heap. - * - * @param twopos Position in 2-ary heap. - * @param cur Current object + * @param cur Object to insert. */ - private void heapifyDown2(int twopos, double cur) { - final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + private void heapifyDown(double cur) { + final int stop = size >>> 1; + int twopos = 0; while (twopos < stop) { int bestchild = (twopos << 1) + 1; double best = twoheap[bestchild]; @@ -309,51 +178,6 @@ public class DoubleMaxHeap implements DoubleHeap { twoheap[twopos] = cur; } - /** - * Heapify-Down for 4-ary heap. - * - * @param fourpos Position in 4-ary heap. - * @param cur Current object - */ - private void heapifyDown4(int fourpos, double cur) { - final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; - while (fourpos < stop) { - final int child = (fourpos << 2) + 1; - double best = fourheap[child]; - int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; - if (size > minsize) { - double nextchild = fourheap[candidate]; - if (best < nextchild) { - bestchild = candidate; - best = nextchild; - } - - minsize += 2; - if (size >= minsize) { - nextchild = fourheap[++candidate]; - if (best < nextchild) { - bestchild = candidate; - best = nextchild; - } - - if (size > minsize) { - nextchild = fourheap[++candidate]; - if (best < nextchild) { - bestchild = candidate; - best = nextchild; - } - } - } - } - if (cur >= best) { - break; - } - fourheap[fourpos] = best; - fourpos = bestchild; - } - fourheap[fourpos] = cur; - } - @Override public double peek() { return twoheap[0]; @@ -396,16 +220,8 @@ public class DoubleMaxHeap implements DoubleHeap { */ protected int pos = 0; - /** - * Modification counter we were initialized at. - */ - protected final int myModCount = modCount; - @Override public boolean valid() { - if (modCount != myModCount) { - throw new ConcurrentModificationException(); - } return pos < size; } @@ -416,7 +232,7 @@ public class DoubleMaxHeap implements DoubleHeap { @Override public double get() { - return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + return twoheap[pos]; } } } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMinHeap.java index e9334153..afc50296 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMinHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMinHeap.java @@ -24,32 +24,11 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; */ import java.util.Arrays; -import java.util.ConcurrentModificationException; import de.lmu.ifi.dbs.elki.math.MathUtil; /** - * Advanced priority queue class, based on a binary heap (for small sizes), - * which will for larger heaps be accompanied by a 4-ary heap (attached below - * the root of the two-ary heap, making the root actually 3-ary). - * - * This code was automatically instantiated for the type: Double - * - * This combination was found to work quite well in benchmarks, but YMMV. - * - * Some other observations from benchmarking: - * <ul> - * <li>Bulk loading did not improve things</li> - * <li>Primitive heaps are substantially faster.</li> - * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and - * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> - * <li>Workload makes a huge difference. A load-once, poll-until-empty priority - * queue is something different than e.g. a top-k heap, which will see a lot of - * top element replacements.</li> - * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for - * top-k make a difference.</li> - * <li>Different day, different benchmark results ...</li> - * </ul> + * Binary heap for primitive types. * * @author Erich Schubert * @@ -62,44 +41,16 @@ public class DoubleMinHeap implements DoubleHeap { protected double[] twoheap; /** - * Extension heap. - */ - protected double[] fourheap; - - /** * Current size of heap. */ protected int size; /** - * (Structural) modification counter. Used to invalidate iterators. - */ - protected int modCount = 0; - - /** - * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. - */ - private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; - - /** * Initial size of the 2-ary heap. */ private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; /** - * Initial size of 4-ary heap when initialized. - * - * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 - * - * 85 = 4-ary heap of height 3: 21 + 4*4*4 - * - * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 - * - * Since we last grew by 255 (to 511), let's use 341. - */ - private final static int FOUR_HEAP_INITIAL_SIZE = 341; - - /** * Constructor, with default size. */ public DoubleMinHeap() { @@ -107,9 +58,6 @@ public class DoubleMinHeap implements DoubleHeap { double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE]; this.twoheap = twoheap; - this.fourheap = null; - this.size = 0; - this.modCount = 0; } /** @@ -119,27 +67,15 @@ public class DoubleMinHeap implements DoubleHeap { */ public DoubleMinHeap(int minsize) { super(); - if (minsize < TWO_HEAP_MAX_SIZE) { - final int size = MathUtil.nextPow2Int(minsize + 1) - 1; - double[] twoheap = new double[size]; + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + double[] twoheap = new double[size]; - this.twoheap = twoheap; - this.fourheap = null; - } else { - double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE]; - double[] fourheap = new double[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; - this.twoheap = twoheap; - this.fourheap = fourheap; - } - this.size = 0; - this.modCount = 0; + this.twoheap = twoheap; } @Override public void clear() { size = 0; - ++modCount; - fourheap = null; Arrays.fill(twoheap, 0.0); } @@ -157,29 +93,14 @@ public class DoubleMinHeap implements DoubleHeap { public void add(double o) { final double co = o; // System.err.println("Add: " + o); - if (size < TWO_HEAP_MAX_SIZE) { - if (size >= twoheap.length) { - // Grow by one layer. - twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); - } - final int twopos = size; - twoheap[twopos] = co; - ++size; - heapifyUp2(twopos, co); - ++modCount; - } else { - final int fourpos = size - TWO_HEAP_MAX_SIZE; - if (fourheap == null) { - fourheap = new double[FOUR_HEAP_INITIAL_SIZE]; - } else if (fourpos >= fourheap.length) { - // Grow extension heap by half. - fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); - } - fourheap[fourpos] = co; - ++size; - heapifyUp4(fourpos, co); - ++modCount; + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); } + final int twopos = size; + twoheap[twopos] = co; + ++size; + heapifyUp(twopos, co); } @Override @@ -195,7 +116,6 @@ public class DoubleMinHeap implements DoubleHeap { public double replaceTopElement(double reinsert) { final double ret = twoheap[0]; heapifyDown( reinsert); - ++modCount; return ret; } @@ -205,7 +125,7 @@ public class DoubleMinHeap implements DoubleHeap { * @param twopos Position in 2-ary heap. * @param cur Current object */ - private void heapifyUp2(int twopos, double cur) { + private void heapifyUp(int twopos, double cur) { while (twopos > 0) { final int parent = (twopos - 1) >>> 1; double par = twoheap[parent]; @@ -218,80 +138,29 @@ public class DoubleMinHeap implements DoubleHeap { twoheap[twopos] = cur; } - /** - * Heapify-Up method for 4-ary heap. - * - * @param fourpos Position in 4-ary heap. - * @param cur Current object - */ - private void heapifyUp4(int fourpos, double cur) { - while (fourpos > 0) { - final int parent = (fourpos - 1) >> 2; - double par = fourheap[parent]; - if (cur >= par) { - break; - } - fourheap[fourpos] = par; - fourpos = parent; - } - if (fourpos == 0 && twoheap[0] > cur) { - fourheap[0] = twoheap[0]; - twoheap[0] = cur; - } else { - fourheap[fourpos] = cur; - } - } - @Override public double poll() { final double ret = twoheap[0]; --size; // Replacement object: - if (size >= TWO_HEAP_MAX_SIZE) { - final int last = size - TWO_HEAP_MAX_SIZE; - final double reinsert = fourheap[last]; - fourheap[last] = 0.0; - heapifyDown(reinsert); - } else if (size > 0) { + if (size > 0) { final double reinsert = twoheap[size]; twoheap[size] = 0.0; heapifyDown(reinsert); } else { twoheap[0] = 0.0; } - ++modCount; return ret; } /** * Invoke heapify-down for the root object. * - * @param reinsert Object to insert. - */ - private void heapifyDown(double reinsert) { - if (size > TWO_HEAP_MAX_SIZE) { - // Special case: 3-ary situation. - final int best = (twoheap[1] <= twoheap[2]) ? 1 : 2; - if (fourheap[0] < twoheap[best]) { - twoheap[0] = fourheap[0]; - heapifyDown4(0, reinsert); - } else { - twoheap[0] = twoheap[best]; - heapifyDown2(best, reinsert); - } - return; - } - heapifyDown2(0, reinsert); - } - - /** - * Heapify-Down for 2-ary heap. - * - * @param twopos Position in 2-ary heap. - * @param cur Current object + * @param cur Object to insert. */ - private void heapifyDown2(int twopos, double cur) { - final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + private void heapifyDown(double cur) { + final int stop = size >>> 1; + int twopos = 0; while (twopos < stop) { int bestchild = (twopos << 1) + 1; double best = twoheap[bestchild]; @@ -309,51 +178,6 @@ public class DoubleMinHeap implements DoubleHeap { twoheap[twopos] = cur; } - /** - * Heapify-Down for 4-ary heap. - * - * @param fourpos Position in 4-ary heap. - * @param cur Current object - */ - private void heapifyDown4(int fourpos, double cur) { - final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; - while (fourpos < stop) { - final int child = (fourpos << 2) + 1; - double best = fourheap[child]; - int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; - if (size > minsize) { - double nextchild = fourheap[candidate]; - if (best > nextchild) { - bestchild = candidate; - best = nextchild; - } - - minsize += 2; - if (size >= minsize) { - nextchild = fourheap[++candidate]; - if (best > nextchild) { - bestchild = candidate; - best = nextchild; - } - - if (size > minsize) { - nextchild = fourheap[++candidate]; - if (best > nextchild) { - bestchild = candidate; - best = nextchild; - } - } - } - } - if (cur <= best) { - break; - } - fourheap[fourpos] = best; - fourpos = bestchild; - } - fourheap[fourpos] = cur; - } - @Override public double peek() { return twoheap[0]; @@ -396,16 +220,8 @@ public class DoubleMinHeap implements DoubleHeap { */ protected int pos = 0; - /** - * Modification counter we were initialized at. - */ - protected final int myModCount = modCount; - @Override public boolean valid() { - if (modCount != myModCount) { - throw new ConcurrentModificationException(); - } return pos < size; } @@ -416,7 +232,7 @@ public class DoubleMinHeap implements DoubleHeap { @Override public double get() { - return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + return twoheap[pos]; } } } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectHeap.java index db65ce81..7323cd8d 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectHeap.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectMaxHeap.java index dd89573c..939c4d7e 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectMaxHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectMaxHeap.java @@ -24,32 +24,11 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; */ import java.util.Arrays; -import java.util.ConcurrentModificationException; import de.lmu.ifi.dbs.elki.math.MathUtil; /** - * Advanced priority queue class, based on a binary heap (for small sizes), - * which will for larger heaps be accompanied by a 4-ary heap (attached below - * the root of the two-ary heap, making the root actually 3-ary). - * - * This code was automatically instantiated for the types: Double and Object - * - * This combination was found to work quite well in benchmarks, but YMMV. - * - * Some other observations from benchmarking: - * <ul> - * <li>Bulk loading did not improve things</li> - * <li>Primitive heaps are substantially faster.</li> - * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and - * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> - * <li>Workload makes a huge difference. A load-once, poll-until-empty priority - * queue is something different than e.g. a top-k heap, which will see a lot of - * top element replacements.</li> - * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for - * top-k make a difference.</li> - * <li>Different day, different benchmark results ...</li> - * </ul> + * Binary heap for primitive types. * * @author Erich Schubert * @@ -68,49 +47,16 @@ public class DoubleObjectMaxHeap<V> implements DoubleObjectHeap<V> { protected Object[] twovals; /** - * Extension heap. - */ - protected double[] fourheap; - - /** - * Extension heapvalues. - */ - protected Object[] fourvals; - - /** * Current size of heap. */ protected int size; /** - * (Structural) modification counter. Used to invalidate iterators. - */ - protected int modCount = 0; - - /** - * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. - */ - private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; - - /** * Initial size of the 2-ary heap. */ private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; /** - * Initial size of 4-ary heap when initialized. - * - * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 - * - * 85 = 4-ary heap of height 3: 21 + 4*4*4 - * - * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 - * - * Since we last grew by 255 (to 511), let's use 341. - */ - private final static int FOUR_HEAP_INITIAL_SIZE = 341; - - /** * Constructor, with default size. */ public DoubleObjectMaxHeap() { @@ -120,10 +66,6 @@ public class DoubleObjectMaxHeap<V> implements DoubleObjectHeap<V> { this.twoheap = twoheap; this.twovals = twovals; - this.fourheap = null; - this.fourvals = null; - this.size = 0; - this.modCount = 0; } /** @@ -133,35 +75,17 @@ public class DoubleObjectMaxHeap<V> implements DoubleObjectHeap<V> { */ public DoubleObjectMaxHeap(int minsize) { super(); - if (minsize < TWO_HEAP_MAX_SIZE) { - final int size = MathUtil.nextPow2Int(minsize + 1) - 1; - double[] twoheap = new double[size]; - Object[] twovals = new Object[size]; + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + double[] twoheap = new double[size]; + Object[] twovals = new Object[size]; - this.twoheap = twoheap; - this.twovals = twovals; - this.fourheap = null; - this.fourvals = null; - } else { - double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE]; - Object[] twovals = new Object[TWO_HEAP_INITIAL_SIZE]; - double[] fourheap = new double[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; - Object[] fourvals = new Object[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; - this.twoheap = twoheap; - this.twovals = twovals; - this.fourheap = fourheap; - this.fourvals = fourvals; - } - this.size = 0; - this.modCount = 0; + this.twoheap = twoheap; + this.twovals = twovals; } @Override public void clear() { size = 0; - ++modCount; - fourheap = null; - fourvals = null; Arrays.fill(twoheap, 0.0); Arrays.fill(twovals, null); } @@ -181,34 +105,16 @@ public class DoubleObjectMaxHeap<V> implements DoubleObjectHeap<V> { final double co = o; final Object cv = (Object)v; // System.err.println("Add: " + o); - if (size < TWO_HEAP_MAX_SIZE) { - if (size >= twoheap.length) { - // Grow by one layer. - twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); - twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1); - } - final int twopos = size; - twoheap[twopos] = co; - twovals[twopos] = cv; - ++size; - heapifyUp2(twopos, co, cv); - ++modCount; - } else { - final int fourpos = size - TWO_HEAP_MAX_SIZE; - if (fourheap == null) { - fourheap = new double[FOUR_HEAP_INITIAL_SIZE]; - fourvals = new Object[FOUR_HEAP_INITIAL_SIZE]; - } else if (fourpos >= fourheap.length) { - // Grow extension heap by half. - fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); - fourvals = Arrays.copyOf(fourvals, fourvals.length + (fourvals.length >> 1)); - } - fourheap[fourpos] = co; - fourvals[fourpos] = cv; - ++size; - heapifyUp4(fourpos, co, cv); - ++modCount; + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); + twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1); } + final int twopos = size; + twoheap[twopos] = co; + twovals[twopos] = cv; + ++size; + heapifyUp(twopos, co, cv); } @Override @@ -223,7 +129,6 @@ public class DoubleObjectMaxHeap<V> implements DoubleObjectHeap<V> { @Override public void replaceTopElement(double reinsert, V val) { heapifyDown(reinsert, (Object)val); - ++modCount; } /** @@ -233,7 +138,7 @@ public class DoubleObjectMaxHeap<V> implements DoubleObjectHeap<V> { * @param cur Current object * @param val Current value */ - private void heapifyUp2(int twopos, double cur, Object val) { + private void heapifyUp(int twopos, double cur, Object val) { while (twopos > 0) { final int parent = (twopos - 1) >>> 1; double par = twoheap[parent]; @@ -248,47 +153,11 @@ public class DoubleObjectMaxHeap<V> implements DoubleObjectHeap<V> { twovals[twopos] = val; } - /** - * Heapify-Up method for 4-ary heap. - * - * @param fourpos Position in 4-ary heap. - * @param cur Current object - * @param val Current value - */ - private void heapifyUp4(int fourpos, double cur, Object val) { - while (fourpos > 0) { - final int parent = (fourpos - 1) >> 2; - double par = fourheap[parent]; - if (cur <= par) { - break; - } - fourheap[fourpos] = par; - fourvals[fourpos] = fourvals[parent]; - fourpos = parent; - } - if (fourpos == 0 && twoheap[0] < cur) { - fourheap[0] = twoheap[0]; - fourvals[0] = twovals[0]; - twoheap[0] = cur; - twovals[0] = val; - } else { - fourheap[fourpos] = cur; - fourvals[fourpos] = val; - } - } - @Override public void poll() { --size; // Replacement object: - if (size >= TWO_HEAP_MAX_SIZE) { - final int last = size - TWO_HEAP_MAX_SIZE; - final double reinsert = fourheap[last]; - final Object reinsertv = fourvals[last]; - fourheap[last] = 0.0; - fourvals[last] = null; - heapifyDown(reinsert, reinsertv); - } else if (size > 0) { + if (size > 0) { final double reinsert = twoheap[size]; final Object reinsertv = twovals[size]; twoheap[size] = 0.0; @@ -298,42 +167,17 @@ public class DoubleObjectMaxHeap<V> implements DoubleObjectHeap<V> { twoheap[0] = 0.0; twovals[0] = null; } - ++modCount; } /** * Invoke heapify-down for the root object. * - * @param reinsert Object to insert. - * @param val Value to reinsert. - */ - private void heapifyDown(double reinsert, Object val) { - if (size > TWO_HEAP_MAX_SIZE) { - // Special case: 3-ary situation. - final int best = (twoheap[1] >= twoheap[2]) ? 1 : 2; - if (fourheap[0] > twoheap[best]) { - twoheap[0] = fourheap[0]; - twovals[0] = fourvals[0]; - heapifyDown4(0, reinsert, val); - } else { - twoheap[0] = twoheap[best]; - twovals[0] = twovals[best]; - heapifyDown2(best, reinsert, val); - } - return; - } - heapifyDown2(0, reinsert, val); - } - - /** - * Heapify-Down for 2-ary heap. - * - * @param twopos Position in 2-ary heap. - * @param cur Current object + * @param cur Object to insert. * @param val Value to reinsert. */ - private void heapifyDown2(int twopos, double cur, Object val) { - final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + private void heapifyDown(double cur, Object val) { + final int stop = size >>> 1; + int twopos = 0; while (twopos < stop) { int bestchild = (twopos << 1) + 1; double best = twoheap[bestchild]; @@ -353,54 +197,6 @@ public class DoubleObjectMaxHeap<V> implements DoubleObjectHeap<V> { twovals[twopos] = val; } - /** - * Heapify-Down for 4-ary heap. - * - * @param fourpos Position in 4-ary heap. - * @param cur Current object - * @param val Value to reinsert. - */ - private void heapifyDown4(int fourpos, double cur, Object val) { - final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; - while (fourpos < stop) { - final int child = (fourpos << 2) + 1; - double best = fourheap[child]; - int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; - if (size > minsize) { - double nextchild = fourheap[candidate]; - if (best < nextchild) { - bestchild = candidate; - best = nextchild; - } - - minsize += 2; - if (size >= minsize) { - nextchild = fourheap[++candidate]; - if (best < nextchild) { - bestchild = candidate; - best = nextchild; - } - - if (size > minsize) { - nextchild = fourheap[++candidate]; - if (best < nextchild) { - bestchild = candidate; - best = nextchild; - } - } - } - } - if (cur >= best) { - break; - } - fourheap[fourpos] = best; - fourvals[fourpos] = fourvals[bestchild]; - fourpos = bestchild; - } - fourheap[fourpos] = cur; - fourvals[fourpos] = val; - } - @Override public double peekKey() { return twoheap[0]; @@ -449,16 +245,8 @@ public class DoubleObjectMaxHeap<V> implements DoubleObjectHeap<V> { */ protected int pos = 0; - /** - * Modification counter we were initialized at. - */ - protected final int myModCount = modCount; - @Override public boolean valid() { - if (modCount != myModCount) { - throw new ConcurrentModificationException(); - } return pos < size; } @@ -469,14 +257,14 @@ public class DoubleObjectMaxHeap<V> implements DoubleObjectHeap<V> { @Override public double getKey() { - return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + return twoheap[pos]; } @SuppressWarnings("unchecked") @Override public V getValue() { - return (V)((pos < TWO_HEAP_MAX_SIZE) ? twovals[pos] : fourvals[pos - TWO_HEAP_MAX_SIZE]); + return (V)twovals[pos]; } } } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectMinHeap.java index 905cdedb..01b8e58d 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectMinHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectMinHeap.java @@ -24,32 +24,11 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; */ import java.util.Arrays; -import java.util.ConcurrentModificationException; import de.lmu.ifi.dbs.elki.math.MathUtil; /** - * Advanced priority queue class, based on a binary heap (for small sizes), - * which will for larger heaps be accompanied by a 4-ary heap (attached below - * the root of the two-ary heap, making the root actually 3-ary). - * - * This code was automatically instantiated for the types: Double and Object - * - * This combination was found to work quite well in benchmarks, but YMMV. - * - * Some other observations from benchmarking: - * <ul> - * <li>Bulk loading did not improve things</li> - * <li>Primitive heaps are substantially faster.</li> - * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and - * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> - * <li>Workload makes a huge difference. A load-once, poll-until-empty priority - * queue is something different than e.g. a top-k heap, which will see a lot of - * top element replacements.</li> - * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for - * top-k make a difference.</li> - * <li>Different day, different benchmark results ...</li> - * </ul> + * Binary heap for primitive types. * * @author Erich Schubert * @@ -68,49 +47,16 @@ public class DoubleObjectMinHeap<V> implements DoubleObjectHeap<V> { protected Object[] twovals; /** - * Extension heap. - */ - protected double[] fourheap; - - /** - * Extension heapvalues. - */ - protected Object[] fourvals; - - /** * Current size of heap. */ protected int size; /** - * (Structural) modification counter. Used to invalidate iterators. - */ - protected int modCount = 0; - - /** - * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. - */ - private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; - - /** * Initial size of the 2-ary heap. */ private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; /** - * Initial size of 4-ary heap when initialized. - * - * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 - * - * 85 = 4-ary heap of height 3: 21 + 4*4*4 - * - * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 - * - * Since we last grew by 255 (to 511), let's use 341. - */ - private final static int FOUR_HEAP_INITIAL_SIZE = 341; - - /** * Constructor, with default size. */ public DoubleObjectMinHeap() { @@ -120,10 +66,6 @@ public class DoubleObjectMinHeap<V> implements DoubleObjectHeap<V> { this.twoheap = twoheap; this.twovals = twovals; - this.fourheap = null; - this.fourvals = null; - this.size = 0; - this.modCount = 0; } /** @@ -133,35 +75,17 @@ public class DoubleObjectMinHeap<V> implements DoubleObjectHeap<V> { */ public DoubleObjectMinHeap(int minsize) { super(); - if (minsize < TWO_HEAP_MAX_SIZE) { - final int size = MathUtil.nextPow2Int(minsize + 1) - 1; - double[] twoheap = new double[size]; - Object[] twovals = new Object[size]; + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + double[] twoheap = new double[size]; + Object[] twovals = new Object[size]; - this.twoheap = twoheap; - this.twovals = twovals; - this.fourheap = null; - this.fourvals = null; - } else { - double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE]; - Object[] twovals = new Object[TWO_HEAP_INITIAL_SIZE]; - double[] fourheap = new double[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; - Object[] fourvals = new Object[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; - this.twoheap = twoheap; - this.twovals = twovals; - this.fourheap = fourheap; - this.fourvals = fourvals; - } - this.size = 0; - this.modCount = 0; + this.twoheap = twoheap; + this.twovals = twovals; } @Override public void clear() { size = 0; - ++modCount; - fourheap = null; - fourvals = null; Arrays.fill(twoheap, 0.0); Arrays.fill(twovals, null); } @@ -181,34 +105,16 @@ public class DoubleObjectMinHeap<V> implements DoubleObjectHeap<V> { final double co = o; final Object cv = (Object)v; // System.err.println("Add: " + o); - if (size < TWO_HEAP_MAX_SIZE) { - if (size >= twoheap.length) { - // Grow by one layer. - twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); - twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1); - } - final int twopos = size; - twoheap[twopos] = co; - twovals[twopos] = cv; - ++size; - heapifyUp2(twopos, co, cv); - ++modCount; - } else { - final int fourpos = size - TWO_HEAP_MAX_SIZE; - if (fourheap == null) { - fourheap = new double[FOUR_HEAP_INITIAL_SIZE]; - fourvals = new Object[FOUR_HEAP_INITIAL_SIZE]; - } else if (fourpos >= fourheap.length) { - // Grow extension heap by half. - fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); - fourvals = Arrays.copyOf(fourvals, fourvals.length + (fourvals.length >> 1)); - } - fourheap[fourpos] = co; - fourvals[fourpos] = cv; - ++size; - heapifyUp4(fourpos, co, cv); - ++modCount; + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); + twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1); } + final int twopos = size; + twoheap[twopos] = co; + twovals[twopos] = cv; + ++size; + heapifyUp(twopos, co, cv); } @Override @@ -223,7 +129,6 @@ public class DoubleObjectMinHeap<V> implements DoubleObjectHeap<V> { @Override public void replaceTopElement(double reinsert, V val) { heapifyDown(reinsert, (Object)val); - ++modCount; } /** @@ -233,7 +138,7 @@ public class DoubleObjectMinHeap<V> implements DoubleObjectHeap<V> { * @param cur Current object * @param val Current value */ - private void heapifyUp2(int twopos, double cur, Object val) { + private void heapifyUp(int twopos, double cur, Object val) { while (twopos > 0) { final int parent = (twopos - 1) >>> 1; double par = twoheap[parent]; @@ -248,47 +153,11 @@ public class DoubleObjectMinHeap<V> implements DoubleObjectHeap<V> { twovals[twopos] = val; } - /** - * Heapify-Up method for 4-ary heap. - * - * @param fourpos Position in 4-ary heap. - * @param cur Current object - * @param val Current value - */ - private void heapifyUp4(int fourpos, double cur, Object val) { - while (fourpos > 0) { - final int parent = (fourpos - 1) >> 2; - double par = fourheap[parent]; - if (cur >= par) { - break; - } - fourheap[fourpos] = par; - fourvals[fourpos] = fourvals[parent]; - fourpos = parent; - } - if (fourpos == 0 && twoheap[0] > cur) { - fourheap[0] = twoheap[0]; - fourvals[0] = twovals[0]; - twoheap[0] = cur; - twovals[0] = val; - } else { - fourheap[fourpos] = cur; - fourvals[fourpos] = val; - } - } - @Override public void poll() { --size; // Replacement object: - if (size >= TWO_HEAP_MAX_SIZE) { - final int last = size - TWO_HEAP_MAX_SIZE; - final double reinsert = fourheap[last]; - final Object reinsertv = fourvals[last]; - fourheap[last] = 0.0; - fourvals[last] = null; - heapifyDown(reinsert, reinsertv); - } else if (size > 0) { + if (size > 0) { final double reinsert = twoheap[size]; final Object reinsertv = twovals[size]; twoheap[size] = 0.0; @@ -298,42 +167,17 @@ public class DoubleObjectMinHeap<V> implements DoubleObjectHeap<V> { twoheap[0] = 0.0; twovals[0] = null; } - ++modCount; } /** * Invoke heapify-down for the root object. * - * @param reinsert Object to insert. - * @param val Value to reinsert. - */ - private void heapifyDown(double reinsert, Object val) { - if (size > TWO_HEAP_MAX_SIZE) { - // Special case: 3-ary situation. - final int best = (twoheap[1] <= twoheap[2]) ? 1 : 2; - if (fourheap[0] < twoheap[best]) { - twoheap[0] = fourheap[0]; - twovals[0] = fourvals[0]; - heapifyDown4(0, reinsert, val); - } else { - twoheap[0] = twoheap[best]; - twovals[0] = twovals[best]; - heapifyDown2(best, reinsert, val); - } - return; - } - heapifyDown2(0, reinsert, val); - } - - /** - * Heapify-Down for 2-ary heap. - * - * @param twopos Position in 2-ary heap. - * @param cur Current object + * @param cur Object to insert. * @param val Value to reinsert. */ - private void heapifyDown2(int twopos, double cur, Object val) { - final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + private void heapifyDown(double cur, Object val) { + final int stop = size >>> 1; + int twopos = 0; while (twopos < stop) { int bestchild = (twopos << 1) + 1; double best = twoheap[bestchild]; @@ -353,54 +197,6 @@ public class DoubleObjectMinHeap<V> implements DoubleObjectHeap<V> { twovals[twopos] = val; } - /** - * Heapify-Down for 4-ary heap. - * - * @param fourpos Position in 4-ary heap. - * @param cur Current object - * @param val Value to reinsert. - */ - private void heapifyDown4(int fourpos, double cur, Object val) { - final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; - while (fourpos < stop) { - final int child = (fourpos << 2) + 1; - double best = fourheap[child]; - int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; - if (size > minsize) { - double nextchild = fourheap[candidate]; - if (best > nextchild) { - bestchild = candidate; - best = nextchild; - } - - minsize += 2; - if (size >= minsize) { - nextchild = fourheap[++candidate]; - if (best > nextchild) { - bestchild = candidate; - best = nextchild; - } - - if (size > minsize) { - nextchild = fourheap[++candidate]; - if (best > nextchild) { - bestchild = candidate; - best = nextchild; - } - } - } - } - if (cur <= best) { - break; - } - fourheap[fourpos] = best; - fourvals[fourpos] = fourvals[bestchild]; - fourpos = bestchild; - } - fourheap[fourpos] = cur; - fourvals[fourpos] = val; - } - @Override public double peekKey() { return twoheap[0]; @@ -449,16 +245,8 @@ public class DoubleObjectMinHeap<V> implements DoubleObjectHeap<V> { */ protected int pos = 0; - /** - * Modification counter we were initialized at. - */ - protected final int myModCount = modCount; - @Override public boolean valid() { - if (modCount != myModCount) { - throw new ConcurrentModificationException(); - } return pos < size; } @@ -469,14 +257,14 @@ public class DoubleObjectMinHeap<V> implements DoubleObjectHeap<V> { @Override public double getKey() { - return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + return twoheap[pos]; } @SuppressWarnings("unchecked") @Override public V getValue() { - return (V)((pos < TWO_HEAP_MAX_SIZE) ? twovals[pos] : fourvals[pos - TWO_HEAP_MAX_SIZE]); + return (V)twovals[pos]; } } } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerHeap.java index 3235926b..77e5f3e5 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerHeap.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMaxHeap.java index 60f61d99..4f3b1495 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMaxHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMaxHeap.java @@ -24,32 +24,11 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; */ import java.util.Arrays; -import java.util.ConcurrentModificationException; import de.lmu.ifi.dbs.elki.math.MathUtil; /** - * Advanced priority queue class, based on a binary heap (for small sizes), - * which will for larger heaps be accompanied by a 4-ary heap (attached below - * the root of the two-ary heap, making the root actually 3-ary). - * - * This code was automatically instantiated for the type: Integer - * - * This combination was found to work quite well in benchmarks, but YMMV. - * - * Some other observations from benchmarking: - * <ul> - * <li>Bulk loading did not improve things</li> - * <li>Primitive heaps are substantially faster.</li> - * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and - * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> - * <li>Workload makes a huge difference. A load-once, poll-until-empty priority - * queue is something different than e.g. a top-k heap, which will see a lot of - * top element replacements.</li> - * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for - * top-k make a difference.</li> - * <li>Different day, different benchmark results ...</li> - * </ul> + * Binary heap for primitive types. * * @author Erich Schubert * @@ -62,44 +41,16 @@ public class IntegerMaxHeap implements IntegerHeap { protected int[] twoheap; /** - * Extension heap. - */ - protected int[] fourheap; - - /** * Current size of heap. */ protected int size; /** - * (Structural) modification counter. Used to invalidate iterators. - */ - protected int modCount = 0; - - /** - * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. - */ - private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; - - /** * Initial size of the 2-ary heap. */ private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; /** - * Initial size of 4-ary heap when initialized. - * - * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 - * - * 85 = 4-ary heap of height 3: 21 + 4*4*4 - * - * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 - * - * Since we last grew by 255 (to 511), let's use 341. - */ - private final static int FOUR_HEAP_INITIAL_SIZE = 341; - - /** * Constructor, with default size. */ public IntegerMaxHeap() { @@ -107,9 +58,6 @@ public class IntegerMaxHeap implements IntegerHeap { int[] twoheap = new int[TWO_HEAP_INITIAL_SIZE]; this.twoheap = twoheap; - this.fourheap = null; - this.size = 0; - this.modCount = 0; } /** @@ -119,27 +67,15 @@ public class IntegerMaxHeap implements IntegerHeap { */ public IntegerMaxHeap(int minsize) { super(); - if (minsize < TWO_HEAP_MAX_SIZE) { - final int size = MathUtil.nextPow2Int(minsize + 1) - 1; - int[] twoheap = new int[size]; + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + int[] twoheap = new int[size]; - this.twoheap = twoheap; - this.fourheap = null; - } else { - int[] twoheap = new int[TWO_HEAP_INITIAL_SIZE]; - int[] fourheap = new int[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; - this.twoheap = twoheap; - this.fourheap = fourheap; - } - this.size = 0; - this.modCount = 0; + this.twoheap = twoheap; } @Override public void clear() { size = 0; - ++modCount; - fourheap = null; Arrays.fill(twoheap, 0); } @@ -157,29 +93,14 @@ public class IntegerMaxHeap implements IntegerHeap { public void add(int o) { final int co = o; // System.err.println("Add: " + o); - if (size < TWO_HEAP_MAX_SIZE) { - if (size >= twoheap.length) { - // Grow by one layer. - twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); - } - final int twopos = size; - twoheap[twopos] = co; - ++size; - heapifyUp2(twopos, co); - ++modCount; - } else { - final int fourpos = size - TWO_HEAP_MAX_SIZE; - if (fourheap == null) { - fourheap = new int[FOUR_HEAP_INITIAL_SIZE]; - } else if (fourpos >= fourheap.length) { - // Grow extension heap by half. - fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); - } - fourheap[fourpos] = co; - ++size; - heapifyUp4(fourpos, co); - ++modCount; + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); } + final int twopos = size; + twoheap[twopos] = co; + ++size; + heapifyUp(twopos, co); } @Override @@ -195,7 +116,6 @@ public class IntegerMaxHeap implements IntegerHeap { public int replaceTopElement(int reinsert) { final int ret = twoheap[0]; heapifyDown( reinsert); - ++modCount; return ret; } @@ -205,7 +125,7 @@ public class IntegerMaxHeap implements IntegerHeap { * @param twopos Position in 2-ary heap. * @param cur Current object */ - private void heapifyUp2(int twopos, int cur) { + private void heapifyUp(int twopos, int cur) { while (twopos > 0) { final int parent = (twopos - 1) >>> 1; int par = twoheap[parent]; @@ -218,80 +138,29 @@ public class IntegerMaxHeap implements IntegerHeap { twoheap[twopos] = cur; } - /** - * Heapify-Up method for 4-ary heap. - * - * @param fourpos Position in 4-ary heap. - * @param cur Current object - */ - private void heapifyUp4(int fourpos, int cur) { - while (fourpos > 0) { - final int parent = (fourpos - 1) >> 2; - int par = fourheap[parent]; - if (cur <= par) { - break; - } - fourheap[fourpos] = par; - fourpos = parent; - } - if (fourpos == 0 && twoheap[0] < cur) { - fourheap[0] = twoheap[0]; - twoheap[0] = cur; - } else { - fourheap[fourpos] = cur; - } - } - @Override public int poll() { final int ret = twoheap[0]; --size; // Replacement object: - if (size >= TWO_HEAP_MAX_SIZE) { - final int last = size - TWO_HEAP_MAX_SIZE; - final int reinsert = fourheap[last]; - fourheap[last] = 0; - heapifyDown(reinsert); - } else if (size > 0) { + if (size > 0) { final int reinsert = twoheap[size]; twoheap[size] = 0; heapifyDown(reinsert); } else { twoheap[0] = 0; } - ++modCount; return ret; } /** * Invoke heapify-down for the root object. * - * @param reinsert Object to insert. - */ - private void heapifyDown(int reinsert) { - if (size > TWO_HEAP_MAX_SIZE) { - // Special case: 3-ary situation. - final int best = (twoheap[1] >= twoheap[2]) ? 1 : 2; - if (fourheap[0] > twoheap[best]) { - twoheap[0] = fourheap[0]; - heapifyDown4(0, reinsert); - } else { - twoheap[0] = twoheap[best]; - heapifyDown2(best, reinsert); - } - return; - } - heapifyDown2(0, reinsert); - } - - /** - * Heapify-Down for 2-ary heap. - * - * @param twopos Position in 2-ary heap. - * @param cur Current object + * @param cur Object to insert. */ - private void heapifyDown2(int twopos, int cur) { - final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + private void heapifyDown(int cur) { + final int stop = size >>> 1; + int twopos = 0; while (twopos < stop) { int bestchild = (twopos << 1) + 1; int best = twoheap[bestchild]; @@ -309,51 +178,6 @@ public class IntegerMaxHeap implements IntegerHeap { twoheap[twopos] = cur; } - /** - * Heapify-Down for 4-ary heap. - * - * @param fourpos Position in 4-ary heap. - * @param cur Current object - */ - private void heapifyDown4(int fourpos, int cur) { - final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; - while (fourpos < stop) { - final int child = (fourpos << 2) + 1; - int best = fourheap[child]; - int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; - if (size > minsize) { - int nextchild = fourheap[candidate]; - if (best < nextchild) { - bestchild = candidate; - best = nextchild; - } - - minsize += 2; - if (size >= minsize) { - nextchild = fourheap[++candidate]; - if (best < nextchild) { - bestchild = candidate; - best = nextchild; - } - - if (size > minsize) { - nextchild = fourheap[++candidate]; - if (best < nextchild) { - bestchild = candidate; - best = nextchild; - } - } - } - } - if (cur >= best) { - break; - } - fourheap[fourpos] = best; - fourpos = bestchild; - } - fourheap[fourpos] = cur; - } - @Override public int peek() { return twoheap[0]; @@ -396,16 +220,8 @@ public class IntegerMaxHeap implements IntegerHeap { */ protected int pos = 0; - /** - * Modification counter we were initialized at. - */ - protected final int myModCount = modCount; - @Override public boolean valid() { - if (modCount != myModCount) { - throw new ConcurrentModificationException(); - } return pos < size; } @@ -416,7 +232,7 @@ public class IntegerMaxHeap implements IntegerHeap { @Override public int get() { - return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + return twoheap[pos]; } } } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMinHeap.java index c352ece4..b02e04db 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMinHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMinHeap.java @@ -24,32 +24,11 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; */ import java.util.Arrays; -import java.util.ConcurrentModificationException; import de.lmu.ifi.dbs.elki.math.MathUtil; /** - * Advanced priority queue class, based on a binary heap (for small sizes), - * which will for larger heaps be accompanied by a 4-ary heap (attached below - * the root of the two-ary heap, making the root actually 3-ary). - * - * This code was automatically instantiated for the type: Integer - * - * This combination was found to work quite well in benchmarks, but YMMV. - * - * Some other observations from benchmarking: - * <ul> - * <li>Bulk loading did not improve things</li> - * <li>Primitive heaps are substantially faster.</li> - * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and - * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> - * <li>Workload makes a huge difference. A load-once, poll-until-empty priority - * queue is something different than e.g. a top-k heap, which will see a lot of - * top element replacements.</li> - * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for - * top-k make a difference.</li> - * <li>Different day, different benchmark results ...</li> - * </ul> + * Binary heap for primitive types. * * @author Erich Schubert * @@ -62,44 +41,16 @@ public class IntegerMinHeap implements IntegerHeap { protected int[] twoheap; /** - * Extension heap. - */ - protected int[] fourheap; - - /** * Current size of heap. */ protected int size; /** - * (Structural) modification counter. Used to invalidate iterators. - */ - protected int modCount = 0; - - /** - * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. - */ - private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; - - /** * Initial size of the 2-ary heap. */ private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; /** - * Initial size of 4-ary heap when initialized. - * - * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 - * - * 85 = 4-ary heap of height 3: 21 + 4*4*4 - * - * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 - * - * Since we last grew by 255 (to 511), let's use 341. - */ - private final static int FOUR_HEAP_INITIAL_SIZE = 341; - - /** * Constructor, with default size. */ public IntegerMinHeap() { @@ -107,9 +58,6 @@ public class IntegerMinHeap implements IntegerHeap { int[] twoheap = new int[TWO_HEAP_INITIAL_SIZE]; this.twoheap = twoheap; - this.fourheap = null; - this.size = 0; - this.modCount = 0; } /** @@ -119,27 +67,15 @@ public class IntegerMinHeap implements IntegerHeap { */ public IntegerMinHeap(int minsize) { super(); - if (minsize < TWO_HEAP_MAX_SIZE) { - final int size = MathUtil.nextPow2Int(minsize + 1) - 1; - int[] twoheap = new int[size]; + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + int[] twoheap = new int[size]; - this.twoheap = twoheap; - this.fourheap = null; - } else { - int[] twoheap = new int[TWO_HEAP_INITIAL_SIZE]; - int[] fourheap = new int[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; - this.twoheap = twoheap; - this.fourheap = fourheap; - } - this.size = 0; - this.modCount = 0; + this.twoheap = twoheap; } @Override public void clear() { size = 0; - ++modCount; - fourheap = null; Arrays.fill(twoheap, 0); } @@ -157,29 +93,14 @@ public class IntegerMinHeap implements IntegerHeap { public void add(int o) { final int co = o; // System.err.println("Add: " + o); - if (size < TWO_HEAP_MAX_SIZE) { - if (size >= twoheap.length) { - // Grow by one layer. - twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); - } - final int twopos = size; - twoheap[twopos] = co; - ++size; - heapifyUp2(twopos, co); - ++modCount; - } else { - final int fourpos = size - TWO_HEAP_MAX_SIZE; - if (fourheap == null) { - fourheap = new int[FOUR_HEAP_INITIAL_SIZE]; - } else if (fourpos >= fourheap.length) { - // Grow extension heap by half. - fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); - } - fourheap[fourpos] = co; - ++size; - heapifyUp4(fourpos, co); - ++modCount; + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); } + final int twopos = size; + twoheap[twopos] = co; + ++size; + heapifyUp(twopos, co); } @Override @@ -195,7 +116,6 @@ public class IntegerMinHeap implements IntegerHeap { public int replaceTopElement(int reinsert) { final int ret = twoheap[0]; heapifyDown( reinsert); - ++modCount; return ret; } @@ -205,7 +125,7 @@ public class IntegerMinHeap implements IntegerHeap { * @param twopos Position in 2-ary heap. * @param cur Current object */ - private void heapifyUp2(int twopos, int cur) { + private void heapifyUp(int twopos, int cur) { while (twopos > 0) { final int parent = (twopos - 1) >>> 1; int par = twoheap[parent]; @@ -218,80 +138,29 @@ public class IntegerMinHeap implements IntegerHeap { twoheap[twopos] = cur; } - /** - * Heapify-Up method for 4-ary heap. - * - * @param fourpos Position in 4-ary heap. - * @param cur Current object - */ - private void heapifyUp4(int fourpos, int cur) { - while (fourpos > 0) { - final int parent = (fourpos - 1) >> 2; - int par = fourheap[parent]; - if (cur >= par) { - break; - } - fourheap[fourpos] = par; - fourpos = parent; - } - if (fourpos == 0 && twoheap[0] > cur) { - fourheap[0] = twoheap[0]; - twoheap[0] = cur; - } else { - fourheap[fourpos] = cur; - } - } - @Override public int poll() { final int ret = twoheap[0]; --size; // Replacement object: - if (size >= TWO_HEAP_MAX_SIZE) { - final int last = size - TWO_HEAP_MAX_SIZE; - final int reinsert = fourheap[last]; - fourheap[last] = 0; - heapifyDown(reinsert); - } else if (size > 0) { + if (size > 0) { final int reinsert = twoheap[size]; twoheap[size] = 0; heapifyDown(reinsert); } else { twoheap[0] = 0; } - ++modCount; return ret; } /** * Invoke heapify-down for the root object. * - * @param reinsert Object to insert. - */ - private void heapifyDown(int reinsert) { - if (size > TWO_HEAP_MAX_SIZE) { - // Special case: 3-ary situation. - final int best = (twoheap[1] <= twoheap[2]) ? 1 : 2; - if (fourheap[0] < twoheap[best]) { - twoheap[0] = fourheap[0]; - heapifyDown4(0, reinsert); - } else { - twoheap[0] = twoheap[best]; - heapifyDown2(best, reinsert); - } - return; - } - heapifyDown2(0, reinsert); - } - - /** - * Heapify-Down for 2-ary heap. - * - * @param twopos Position in 2-ary heap. - * @param cur Current object + * @param cur Object to insert. */ - private void heapifyDown2(int twopos, int cur) { - final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + private void heapifyDown(int cur) { + final int stop = size >>> 1; + int twopos = 0; while (twopos < stop) { int bestchild = (twopos << 1) + 1; int best = twoheap[bestchild]; @@ -309,51 +178,6 @@ public class IntegerMinHeap implements IntegerHeap { twoheap[twopos] = cur; } - /** - * Heapify-Down for 4-ary heap. - * - * @param fourpos Position in 4-ary heap. - * @param cur Current object - */ - private void heapifyDown4(int fourpos, int cur) { - final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; - while (fourpos < stop) { - final int child = (fourpos << 2) + 1; - int best = fourheap[child]; - int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; - if (size > minsize) { - int nextchild = fourheap[candidate]; - if (best > nextchild) { - bestchild = candidate; - best = nextchild; - } - - minsize += 2; - if (size >= minsize) { - nextchild = fourheap[++candidate]; - if (best > nextchild) { - bestchild = candidate; - best = nextchild; - } - - if (size > minsize) { - nextchild = fourheap[++candidate]; - if (best > nextchild) { - bestchild = candidate; - best = nextchild; - } - } - } - } - if (cur <= best) { - break; - } - fourheap[fourpos] = best; - fourpos = bestchild; - } - fourheap[fourpos] = cur; - } - @Override public int peek() { return twoheap[0]; @@ -396,16 +220,8 @@ public class IntegerMinHeap implements IntegerHeap { */ protected int pos = 0; - /** - * Modification counter we were initialized at. - */ - protected final int myModCount = modCount; - @Override public boolean valid() { - if (modCount != myModCount) { - throw new ConcurrentModificationException(); - } return pos < size; } @@ -416,7 +232,7 @@ public class IntegerMinHeap implements IntegerHeap { @Override public int get() { - return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + return twoheap[pos]; } } } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectHeap.java index 01f7aea0..e4f577af 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectHeap.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectMaxHeap.java index 93a4e75a..036a9520 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectMaxHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectMaxHeap.java @@ -24,32 +24,11 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; */ import java.util.Arrays; -import java.util.ConcurrentModificationException; import de.lmu.ifi.dbs.elki.math.MathUtil; /** - * Advanced priority queue class, based on a binary heap (for small sizes), - * which will for larger heaps be accompanied by a 4-ary heap (attached below - * the root of the two-ary heap, making the root actually 3-ary). - * - * This code was automatically instantiated for the types: Integer and Object - * - * This combination was found to work quite well in benchmarks, but YMMV. - * - * Some other observations from benchmarking: - * <ul> - * <li>Bulk loading did not improve things</li> - * <li>Primitive heaps are substantially faster.</li> - * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and - * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> - * <li>Workload makes a huge difference. A load-once, poll-until-empty priority - * queue is something different than e.g. a top-k heap, which will see a lot of - * top element replacements.</li> - * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for - * top-k make a difference.</li> - * <li>Different day, different benchmark results ...</li> - * </ul> + * Binary heap for primitive types. * * @author Erich Schubert * @@ -68,49 +47,16 @@ public class IntegerObjectMaxHeap<V> implements IntegerObjectHeap<V> { protected Object[] twovals; /** - * Extension heap. - */ - protected int[] fourheap; - - /** - * Extension heapvalues. - */ - protected Object[] fourvals; - - /** * Current size of heap. */ protected int size; /** - * (Structural) modification counter. Used to invalidate iterators. - */ - protected int modCount = 0; - - /** - * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. - */ - private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; - - /** * Initial size of the 2-ary heap. */ private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; /** - * Initial size of 4-ary heap when initialized. - * - * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 - * - * 85 = 4-ary heap of height 3: 21 + 4*4*4 - * - * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 - * - * Since we last grew by 255 (to 511), let's use 341. - */ - private final static int FOUR_HEAP_INITIAL_SIZE = 341; - - /** * Constructor, with default size. */ public IntegerObjectMaxHeap() { @@ -120,10 +66,6 @@ public class IntegerObjectMaxHeap<V> implements IntegerObjectHeap<V> { this.twoheap = twoheap; this.twovals = twovals; - this.fourheap = null; - this.fourvals = null; - this.size = 0; - this.modCount = 0; } /** @@ -133,35 +75,17 @@ public class IntegerObjectMaxHeap<V> implements IntegerObjectHeap<V> { */ public IntegerObjectMaxHeap(int minsize) { super(); - if (minsize < TWO_HEAP_MAX_SIZE) { - final int size = MathUtil.nextPow2Int(minsize + 1) - 1; - int[] twoheap = new int[size]; - Object[] twovals = new Object[size]; + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + int[] twoheap = new int[size]; + Object[] twovals = new Object[size]; - this.twoheap = twoheap; - this.twovals = twovals; - this.fourheap = null; - this.fourvals = null; - } else { - int[] twoheap = new int[TWO_HEAP_INITIAL_SIZE]; - Object[] twovals = new Object[TWO_HEAP_INITIAL_SIZE]; - int[] fourheap = new int[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; - Object[] fourvals = new Object[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; - this.twoheap = twoheap; - this.twovals = twovals; - this.fourheap = fourheap; - this.fourvals = fourvals; - } - this.size = 0; - this.modCount = 0; + this.twoheap = twoheap; + this.twovals = twovals; } @Override public void clear() { size = 0; - ++modCount; - fourheap = null; - fourvals = null; Arrays.fill(twoheap, 0); Arrays.fill(twovals, null); } @@ -181,34 +105,16 @@ public class IntegerObjectMaxHeap<V> implements IntegerObjectHeap<V> { final int co = o; final Object cv = (Object)v; // System.err.println("Add: " + o); - if (size < TWO_HEAP_MAX_SIZE) { - if (size >= twoheap.length) { - // Grow by one layer. - twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); - twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1); - } - final int twopos = size; - twoheap[twopos] = co; - twovals[twopos] = cv; - ++size; - heapifyUp2(twopos, co, cv); - ++modCount; - } else { - final int fourpos = size - TWO_HEAP_MAX_SIZE; - if (fourheap == null) { - fourheap = new int[FOUR_HEAP_INITIAL_SIZE]; - fourvals = new Object[FOUR_HEAP_INITIAL_SIZE]; - } else if (fourpos >= fourheap.length) { - // Grow extension heap by half. - fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); - fourvals = Arrays.copyOf(fourvals, fourvals.length + (fourvals.length >> 1)); - } - fourheap[fourpos] = co; - fourvals[fourpos] = cv; - ++size; - heapifyUp4(fourpos, co, cv); - ++modCount; + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); + twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1); } + final int twopos = size; + twoheap[twopos] = co; + twovals[twopos] = cv; + ++size; + heapifyUp(twopos, co, cv); } @Override @@ -223,7 +129,6 @@ public class IntegerObjectMaxHeap<V> implements IntegerObjectHeap<V> { @Override public void replaceTopElement(int reinsert, V val) { heapifyDown(reinsert, (Object)val); - ++modCount; } /** @@ -233,7 +138,7 @@ public class IntegerObjectMaxHeap<V> implements IntegerObjectHeap<V> { * @param cur Current object * @param val Current value */ - private void heapifyUp2(int twopos, int cur, Object val) { + private void heapifyUp(int twopos, int cur, Object val) { while (twopos > 0) { final int parent = (twopos - 1) >>> 1; int par = twoheap[parent]; @@ -248,47 +153,11 @@ public class IntegerObjectMaxHeap<V> implements IntegerObjectHeap<V> { twovals[twopos] = val; } - /** - * Heapify-Up method for 4-ary heap. - * - * @param fourpos Position in 4-ary heap. - * @param cur Current object - * @param val Current value - */ - private void heapifyUp4(int fourpos, int cur, Object val) { - while (fourpos > 0) { - final int parent = (fourpos - 1) >> 2; - int par = fourheap[parent]; - if (cur <= par) { - break; - } - fourheap[fourpos] = par; - fourvals[fourpos] = fourvals[parent]; - fourpos = parent; - } - if (fourpos == 0 && twoheap[0] < cur) { - fourheap[0] = twoheap[0]; - fourvals[0] = twovals[0]; - twoheap[0] = cur; - twovals[0] = val; - } else { - fourheap[fourpos] = cur; - fourvals[fourpos] = val; - } - } - @Override public void poll() { --size; // Replacement object: - if (size >= TWO_HEAP_MAX_SIZE) { - final int last = size - TWO_HEAP_MAX_SIZE; - final int reinsert = fourheap[last]; - final Object reinsertv = fourvals[last]; - fourheap[last] = 0; - fourvals[last] = null; - heapifyDown(reinsert, reinsertv); - } else if (size > 0) { + if (size > 0) { final int reinsert = twoheap[size]; final Object reinsertv = twovals[size]; twoheap[size] = 0; @@ -298,42 +167,17 @@ public class IntegerObjectMaxHeap<V> implements IntegerObjectHeap<V> { twoheap[0] = 0; twovals[0] = null; } - ++modCount; } /** * Invoke heapify-down for the root object. * - * @param reinsert Object to insert. - * @param val Value to reinsert. - */ - private void heapifyDown(int reinsert, Object val) { - if (size > TWO_HEAP_MAX_SIZE) { - // Special case: 3-ary situation. - final int best = (twoheap[1] >= twoheap[2]) ? 1 : 2; - if (fourheap[0] > twoheap[best]) { - twoheap[0] = fourheap[0]; - twovals[0] = fourvals[0]; - heapifyDown4(0, reinsert, val); - } else { - twoheap[0] = twoheap[best]; - twovals[0] = twovals[best]; - heapifyDown2(best, reinsert, val); - } - return; - } - heapifyDown2(0, reinsert, val); - } - - /** - * Heapify-Down for 2-ary heap. - * - * @param twopos Position in 2-ary heap. - * @param cur Current object + * @param cur Object to insert. * @param val Value to reinsert. */ - private void heapifyDown2(int twopos, int cur, Object val) { - final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + private void heapifyDown(int cur, Object val) { + final int stop = size >>> 1; + int twopos = 0; while (twopos < stop) { int bestchild = (twopos << 1) + 1; int best = twoheap[bestchild]; @@ -353,54 +197,6 @@ public class IntegerObjectMaxHeap<V> implements IntegerObjectHeap<V> { twovals[twopos] = val; } - /** - * Heapify-Down for 4-ary heap. - * - * @param fourpos Position in 4-ary heap. - * @param cur Current object - * @param val Value to reinsert. - */ - private void heapifyDown4(int fourpos, int cur, Object val) { - final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; - while (fourpos < stop) { - final int child = (fourpos << 2) + 1; - int best = fourheap[child]; - int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; - if (size > minsize) { - int nextchild = fourheap[candidate]; - if (best < nextchild) { - bestchild = candidate; - best = nextchild; - } - - minsize += 2; - if (size >= minsize) { - nextchild = fourheap[++candidate]; - if (best < nextchild) { - bestchild = candidate; - best = nextchild; - } - - if (size > minsize) { - nextchild = fourheap[++candidate]; - if (best < nextchild) { - bestchild = candidate; - best = nextchild; - } - } - } - } - if (cur >= best) { - break; - } - fourheap[fourpos] = best; - fourvals[fourpos] = fourvals[bestchild]; - fourpos = bestchild; - } - fourheap[fourpos] = cur; - fourvals[fourpos] = val; - } - @Override public int peekKey() { return twoheap[0]; @@ -449,16 +245,8 @@ public class IntegerObjectMaxHeap<V> implements IntegerObjectHeap<V> { */ protected int pos = 0; - /** - * Modification counter we were initialized at. - */ - protected final int myModCount = modCount; - @Override public boolean valid() { - if (modCount != myModCount) { - throw new ConcurrentModificationException(); - } return pos < size; } @@ -469,14 +257,14 @@ public class IntegerObjectMaxHeap<V> implements IntegerObjectHeap<V> { @Override public int getKey() { - return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + return twoheap[pos]; } @SuppressWarnings("unchecked") @Override public V getValue() { - return (V)((pos < TWO_HEAP_MAX_SIZE) ? twovals[pos] : fourvals[pos - TWO_HEAP_MAX_SIZE]); + return (V)twovals[pos]; } } } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectMinHeap.java index e54c7d28..cc816a0e 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectMinHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectMinHeap.java @@ -24,32 +24,11 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; */ import java.util.Arrays; -import java.util.ConcurrentModificationException; import de.lmu.ifi.dbs.elki.math.MathUtil; /** - * Advanced priority queue class, based on a binary heap (for small sizes), - * which will for larger heaps be accompanied by a 4-ary heap (attached below - * the root of the two-ary heap, making the root actually 3-ary). - * - * This code was automatically instantiated for the types: Integer and Object - * - * This combination was found to work quite well in benchmarks, but YMMV. - * - * Some other observations from benchmarking: - * <ul> - * <li>Bulk loading did not improve things</li> - * <li>Primitive heaps are substantially faster.</li> - * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and - * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> - * <li>Workload makes a huge difference. A load-once, poll-until-empty priority - * queue is something different than e.g. a top-k heap, which will see a lot of - * top element replacements.</li> - * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for - * top-k make a difference.</li> - * <li>Different day, different benchmark results ...</li> - * </ul> + * Binary heap for primitive types. * * @author Erich Schubert * @@ -68,49 +47,16 @@ public class IntegerObjectMinHeap<V> implements IntegerObjectHeap<V> { protected Object[] twovals; /** - * Extension heap. - */ - protected int[] fourheap; - - /** - * Extension heapvalues. - */ - protected Object[] fourvals; - - /** * Current size of heap. */ protected int size; /** - * (Structural) modification counter. Used to invalidate iterators. - */ - protected int modCount = 0; - - /** - * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. - */ - private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; - - /** * Initial size of the 2-ary heap. */ private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; /** - * Initial size of 4-ary heap when initialized. - * - * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 - * - * 85 = 4-ary heap of height 3: 21 + 4*4*4 - * - * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 - * - * Since we last grew by 255 (to 511), let's use 341. - */ - private final static int FOUR_HEAP_INITIAL_SIZE = 341; - - /** * Constructor, with default size. */ public IntegerObjectMinHeap() { @@ -120,10 +66,6 @@ public class IntegerObjectMinHeap<V> implements IntegerObjectHeap<V> { this.twoheap = twoheap; this.twovals = twovals; - this.fourheap = null; - this.fourvals = null; - this.size = 0; - this.modCount = 0; } /** @@ -133,35 +75,17 @@ public class IntegerObjectMinHeap<V> implements IntegerObjectHeap<V> { */ public IntegerObjectMinHeap(int minsize) { super(); - if (minsize < TWO_HEAP_MAX_SIZE) { - final int size = MathUtil.nextPow2Int(minsize + 1) - 1; - int[] twoheap = new int[size]; - Object[] twovals = new Object[size]; + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + int[] twoheap = new int[size]; + Object[] twovals = new Object[size]; - this.twoheap = twoheap; - this.twovals = twovals; - this.fourheap = null; - this.fourvals = null; - } else { - int[] twoheap = new int[TWO_HEAP_INITIAL_SIZE]; - Object[] twovals = new Object[TWO_HEAP_INITIAL_SIZE]; - int[] fourheap = new int[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; - Object[] fourvals = new Object[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; - this.twoheap = twoheap; - this.twovals = twovals; - this.fourheap = fourheap; - this.fourvals = fourvals; - } - this.size = 0; - this.modCount = 0; + this.twoheap = twoheap; + this.twovals = twovals; } @Override public void clear() { size = 0; - ++modCount; - fourheap = null; - fourvals = null; Arrays.fill(twoheap, 0); Arrays.fill(twovals, null); } @@ -181,34 +105,16 @@ public class IntegerObjectMinHeap<V> implements IntegerObjectHeap<V> { final int co = o; final Object cv = (Object)v; // System.err.println("Add: " + o); - if (size < TWO_HEAP_MAX_SIZE) { - if (size >= twoheap.length) { - // Grow by one layer. - twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); - twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1); - } - final int twopos = size; - twoheap[twopos] = co; - twovals[twopos] = cv; - ++size; - heapifyUp2(twopos, co, cv); - ++modCount; - } else { - final int fourpos = size - TWO_HEAP_MAX_SIZE; - if (fourheap == null) { - fourheap = new int[FOUR_HEAP_INITIAL_SIZE]; - fourvals = new Object[FOUR_HEAP_INITIAL_SIZE]; - } else if (fourpos >= fourheap.length) { - // Grow extension heap by half. - fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); - fourvals = Arrays.copyOf(fourvals, fourvals.length + (fourvals.length >> 1)); - } - fourheap[fourpos] = co; - fourvals[fourpos] = cv; - ++size; - heapifyUp4(fourpos, co, cv); - ++modCount; + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); + twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1); } + final int twopos = size; + twoheap[twopos] = co; + twovals[twopos] = cv; + ++size; + heapifyUp(twopos, co, cv); } @Override @@ -223,7 +129,6 @@ public class IntegerObjectMinHeap<V> implements IntegerObjectHeap<V> { @Override public void replaceTopElement(int reinsert, V val) { heapifyDown(reinsert, (Object)val); - ++modCount; } /** @@ -233,7 +138,7 @@ public class IntegerObjectMinHeap<V> implements IntegerObjectHeap<V> { * @param cur Current object * @param val Current value */ - private void heapifyUp2(int twopos, int cur, Object val) { + private void heapifyUp(int twopos, int cur, Object val) { while (twopos > 0) { final int parent = (twopos - 1) >>> 1; int par = twoheap[parent]; @@ -248,47 +153,11 @@ public class IntegerObjectMinHeap<V> implements IntegerObjectHeap<V> { twovals[twopos] = val; } - /** - * Heapify-Up method for 4-ary heap. - * - * @param fourpos Position in 4-ary heap. - * @param cur Current object - * @param val Current value - */ - private void heapifyUp4(int fourpos, int cur, Object val) { - while (fourpos > 0) { - final int parent = (fourpos - 1) >> 2; - int par = fourheap[parent]; - if (cur >= par) { - break; - } - fourheap[fourpos] = par; - fourvals[fourpos] = fourvals[parent]; - fourpos = parent; - } - if (fourpos == 0 && twoheap[0] > cur) { - fourheap[0] = twoheap[0]; - fourvals[0] = twovals[0]; - twoheap[0] = cur; - twovals[0] = val; - } else { - fourheap[fourpos] = cur; - fourvals[fourpos] = val; - } - } - @Override public void poll() { --size; // Replacement object: - if (size >= TWO_HEAP_MAX_SIZE) { - final int last = size - TWO_HEAP_MAX_SIZE; - final int reinsert = fourheap[last]; - final Object reinsertv = fourvals[last]; - fourheap[last] = 0; - fourvals[last] = null; - heapifyDown(reinsert, reinsertv); - } else if (size > 0) { + if (size > 0) { final int reinsert = twoheap[size]; final Object reinsertv = twovals[size]; twoheap[size] = 0; @@ -298,42 +167,17 @@ public class IntegerObjectMinHeap<V> implements IntegerObjectHeap<V> { twoheap[0] = 0; twovals[0] = null; } - ++modCount; } /** * Invoke heapify-down for the root object. * - * @param reinsert Object to insert. - * @param val Value to reinsert. - */ - private void heapifyDown(int reinsert, Object val) { - if (size > TWO_HEAP_MAX_SIZE) { - // Special case: 3-ary situation. - final int best = (twoheap[1] <= twoheap[2]) ? 1 : 2; - if (fourheap[0] < twoheap[best]) { - twoheap[0] = fourheap[0]; - twovals[0] = fourvals[0]; - heapifyDown4(0, reinsert, val); - } else { - twoheap[0] = twoheap[best]; - twovals[0] = twovals[best]; - heapifyDown2(best, reinsert, val); - } - return; - } - heapifyDown2(0, reinsert, val); - } - - /** - * Heapify-Down for 2-ary heap. - * - * @param twopos Position in 2-ary heap. - * @param cur Current object + * @param cur Object to insert. * @param val Value to reinsert. */ - private void heapifyDown2(int twopos, int cur, Object val) { - final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + private void heapifyDown(int cur, Object val) { + final int stop = size >>> 1; + int twopos = 0; while (twopos < stop) { int bestchild = (twopos << 1) + 1; int best = twoheap[bestchild]; @@ -353,54 +197,6 @@ public class IntegerObjectMinHeap<V> implements IntegerObjectHeap<V> { twovals[twopos] = val; } - /** - * Heapify-Down for 4-ary heap. - * - * @param fourpos Position in 4-ary heap. - * @param cur Current object - * @param val Value to reinsert. - */ - private void heapifyDown4(int fourpos, int cur, Object val) { - final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; - while (fourpos < stop) { - final int child = (fourpos << 2) + 1; - int best = fourheap[child]; - int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; - if (size > minsize) { - int nextchild = fourheap[candidate]; - if (best > nextchild) { - bestchild = candidate; - best = nextchild; - } - - minsize += 2; - if (size >= minsize) { - nextchild = fourheap[++candidate]; - if (best > nextchild) { - bestchild = candidate; - best = nextchild; - } - - if (size > minsize) { - nextchild = fourheap[++candidate]; - if (best > nextchild) { - bestchild = candidate; - best = nextchild; - } - } - } - } - if (cur <= best) { - break; - } - fourheap[fourpos] = best; - fourvals[fourpos] = fourvals[bestchild]; - fourpos = bestchild; - } - fourheap[fourpos] = cur; - fourvals[fourpos] = val; - } - @Override public int peekKey() { return twoheap[0]; @@ -449,16 +245,8 @@ public class IntegerObjectMinHeap<V> implements IntegerObjectHeap<V> { */ protected int pos = 0; - /** - * Modification counter we were initialized at. - */ - protected final int myModCount = modCount; - @Override public boolean valid() { - if (modCount != myModCount) { - throw new ConcurrentModificationException(); - } return pos < size; } @@ -469,14 +257,14 @@ public class IntegerObjectMinHeap<V> implements IntegerObjectHeap<V> { @Override public int getKey() { - return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + return twoheap[pos]; } @SuppressWarnings("unchecked") @Override public V getValue() { - return (V)((pos < TWO_HEAP_MAX_SIZE) ? twovals[pos] : fourvals[pos - TWO_HEAP_MAX_SIZE]); + return (V)twovals[pos]; } } } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ObjectHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ObjectHeap.java index b5dbbb0e..2b03740e 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ObjectHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ObjectHeap.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -31,7 +31,6 @@ import de.lmu.ifi.dbs.elki.utilities.datastructures.iterator.Iter; * @author Erich Schubert * * @apiviz.has UnsortedIter - * * @param <K> Key type */ public interface ObjectHeap<K> { @@ -85,7 +84,7 @@ public interface ObjectHeap<K> { * @return Size */ public int size(); - + /** * Is the heap empty? * @@ -112,7 +111,6 @@ public interface ObjectHeap<K> { * </pre> * * @author Erich Schubert - * * @param <K> Key type */ public static interface UnsortedIter<K> extends Iter { diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntStaticHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntStaticHistogram.java index 7b1eed94..84b55cd1 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntStaticHistogram.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntStaticHistogram.java @@ -73,7 +73,7 @@ public class IntStaticHistogram extends AbstractStaticHistogram implements IntHi } else { // Shift in place and clear head System.arraycopy(data, 0, data, -bin, size); - Arrays.fill(data, 0, -bin, (int) 0); + Arrays.fill(data, 0, -bin, 0); } data[0] = val; // Note that bin is negative, -bin is the shift offset! |