summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/utilities/datastructures
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/utilities/datastructures')
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/AnyMap.java94
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/HashMapList.java124
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/MaskedArrayList.java174
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/QuickSelect.java767
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ArrayAdapter.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ArrayDBIDsAdapter.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ArrayLikeUtil.java67
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/DoubleArrayAdapter.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ExtendedArray.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/FeatureVectorAdapter.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/FloatArrayAdapter.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/IdentityArrayAdapter.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ListArrayAdapter.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/NumberArrayAdapter.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/NumberListArrayAdapter.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/NumberVectorAdapter.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/SingleSubsetArrayAdapter.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/SubsetArrayAdapter.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/SubsetNumberArrayAdapter.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/TDoubleListAdapter.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/VectorAdapter.java85
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/IntegerArrayQuickSort.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/IntegerComparator.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/AbstractHeap.java103
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMaxHeap.java402
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMinHeap.java402
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparatorMaxHeap.java440
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparatorMinHeap.java440
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleHeap.java222
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerHeap.java127
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerMaxHeap.java478
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerMinHeap.java478
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleLongHeap.java127
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleLongMaxHeap.java478
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleLongMinHeap.java478
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMaxHeap.java394
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMinHeap.java394
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjMaxHeap.java328
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjMinHeap.java328
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectHeap.java129
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectMaxHeap.java482
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectMinHeap.java482
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoublePriorityObject.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/Heap.java192
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerHeap.java222
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMaxHeap.java394
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMinHeap.java394
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectHeap.java129
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectMaxHeap.java482
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectMinHeap.java482
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerPriorityObject.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ObjectHeap.java227
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TiedTopBoundedHeap.java53
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TiedTopBoundedUpdatableHeap.java11
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TopBoundedHeap.java3
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TopBoundedUpdatableHeap.java3
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/UpdatableHeap.java114
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/HashMapHierarchy.java580
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/Hierarchical.java90
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/Hierarchy.java68
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/HierarchyHashmapList.java299
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/HierarchyReferenceLists.java232
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/ModifiableHierarchy.java32
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractObjDynamicHistogram.java37
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractObjStaticHistogram.java42
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractStaticHistogram.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleArrayStaticHistogram.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleDynamicHistogram.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleHistogram.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleStaticHistogram.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatDynamicHistogram.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatHistogram.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatStaticHistogram.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/Histogram.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntArrayStaticHistogram.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntDynamicHistogram.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntHistogram.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntStaticHistogram.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongArrayStaticHistogram.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongDynamicHistogram.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongHistogram.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongStaticHistogram.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/MeanVarianceStaticHistogram.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ObjHistogram.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortDynamicHistogram.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortHistogram.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortStaticHistogram.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/iterator/ArrayIter.java59
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/iterator/ArrayListIter.java99
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/iterator/Iter.java71
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/iterator/MIter.java54
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/iterator/package-info.java40
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/package-info.java2
98 files changed, 9598 insertions, 2932 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/AnyMap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/AnyMap.java
deleted file mode 100644
index 763ce105..00000000
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/AnyMap.java
+++ /dev/null
@@ -1,94 +0,0 @@
-package de.lmu.ifi.dbs.elki.utilities.datastructures;
-
-/*
- This file is part of ELKI:
- Environment for Developing KDD-Applications Supported by Index-Structures
-
- Copyright (C) 2012
- Ludwig-Maximilians-Universität München
- Lehr- und Forschungseinheit für Datenbanksysteme
- ELKI Development Team
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-import java.util.HashMap;
-
-/**
- * Associative storage based on a {@link HashMap} for multiple object types that
- * offers a type checked {@link #get(Object, Class)} method.
- *
- * @author Erich Schubert
- *
- * @param <K> Key class type
- */
-public class AnyMap<K> extends HashMap<K, Object> {
- /**
- * Serial version.
- */
- private static final long serialVersionUID = 1L;
-
- /**
- * Constructor
- */
- public AnyMap() {
- super();
- }
-
- /**
- * Type checked get method
- *
- * @param <T> Return type
- * @param key Key
- * @param restriction restriction class
- * @return Object that is guaranteed to be of class restriction or null
- */
- public <T> T get(K key, Class<T> restriction) {
- Object o = super.get(key);
- if(o == null) {
- return null;
- }
- try {
- return restriction.cast(o);
- }
- catch(ClassCastException e) {
- return null;
- }
- }
-
- /**
- * (Largely) type checked get method for use with generic types
- *
- * @param <T> Return type
- * @param key Key
- * @param restriction restriction class
- * @return Object that is guaranteed to be of class restriction or null
- */
- @SuppressWarnings("unchecked")
- public <T> T getGenerics(K key, Class<?> restriction) {
- return (T) get(key, restriction);
- }
-
- /**
- * Depreciate the use of the untyped get method.
- *
- * @deprecated use {@link #get(Object, Class)} or
- * {@link #getGenerics(Object, Class)} instead, for type safety!
- */
- @Override
- @Deprecated
- public Object get(Object key) {
- return super.get(key);
- }
-} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/HashMapList.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/HashMapList.java
deleted file mode 100644
index 26fa4d19..00000000
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/HashMapList.java
+++ /dev/null
@@ -1,124 +0,0 @@
-package de.lmu.ifi.dbs.elki.utilities.datastructures;
-
-/*
- This file is part of ELKI:
- Environment for Developing KDD-Applications Supported by Index-Structures
-
- Copyright (C) 2012
- Ludwig-Maximilians-Universität München
- Lehr- und Forschungseinheit für Datenbanksysteme
- ELKI Development Team
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-
-/**
- * Multi-Associative container, that stores a list of values for a particular key.
- *
- * @author Erich Schubert
- *
- * @apiviz.has List oneway - - contains
- *
- * @param <K> Key type
- * @param <V> Value type
- */
-// TODO: use MultiValueMap from apache collections instead?
-public class HashMapList<K, V> extends HashMap<K, List<V>> {
- /**
- * Serial version
- */
- private static final long serialVersionUID = 3883242025598456055L;
-
- /**
- * Constructor.
- */
- public HashMapList() {
- super();
- }
-
- /**
- * Constructor with initial capacity (of the hash)
- *
- * @param initialCapacity initial capacity
- */
- public HashMapList(int initialCapacity) {
- super(initialCapacity);
- }
-
- /**
- * Add a single value to the given key.
- *
- * @param key Key
- * @param value Additional Value
- */
- public synchronized void add(K key, V value) {
- List<V> list = super.get(key);
- if (list == null) {
- list = new ArrayList<V>(1);
- super.put(key, list);
- }
- list.add(value);
- }
-
- /**
- * Check that there is at least one value for the key.
- */
- @Override
- public boolean containsKey(Object key) {
- List<V> list = super.get(key);
- if (list == null) {
- return false;
- }
- return list.size() > 0;
- }
-
- /**
- * Remove a single value from the map.
- *
- * @param key Key to remove
- * @param value Value to remove.
- * @return <tt>true</tt> if this list contained the specified element
- */
- public synchronized boolean remove(K key, V value) {
- List<V> list = super.get(key);
- if (list == null) {
- return false;
- }
- boolean success = list.remove(value);
- // remove empty lists.
- if (list.size() == 0) {
- super.remove(key);
- }
- return success;
- }
-
- /**
- * Test if a given value is already present for the key.
- *
- * @param key Key
- * @param value Value
- * @return <tt>true</tt> if the keys list contains the specified element
- */
- public boolean contains(K key, V value) {
- List<V> list = super.get(key);
- if (list == null) {
- return false;
- }
- return list.contains(value);
- }
-}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/MaskedArrayList.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/MaskedArrayList.java
deleted file mode 100644
index c24519d1..00000000
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/MaskedArrayList.java
+++ /dev/null
@@ -1,174 +0,0 @@
-package de.lmu.ifi.dbs.elki.utilities.datastructures;
-
-/*
- This file is part of ELKI:
- Environment for Developing KDD-Applications Supported by Index-Structures
-
- Copyright (C) 2012
- Ludwig-Maximilians-Universität München
- Lehr- und Forschungseinheit für Datenbanksysteme
- ELKI Development Team
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-import java.util.AbstractCollection;
-import java.util.ArrayList;
-import java.util.BitSet;
-import java.util.Collection;
-import java.util.Iterator;
-
-/**
- * This class is a virtual collection based on masking an array list using a bit
- * mask.
- *
- * @author Erich Schubert
- *
- * @apiviz.stereotype decorator
- * @apiviz.composedOf java.util.ArrayList
- * @apiviz.composedOf java.util.BitSet
- *
- * @param <T> Object type
- */
-public class MaskedArrayList<T> extends AbstractCollection<T> implements Collection<T> {
- /**
- * Data storage
- */
- protected ArrayList<T> data;
-
- /**
- * The bitmask used for masking
- */
- protected BitSet bits;
-
- /**
- * Flag whether to iterator over set or unset values.
- */
- protected boolean inverse = false;
-
- /**
- * Constructor.
- *
- * @param data Data
- * @param bits Bitset to use as mask
- * @param inverse Flag to inverse the masking rule
- */
- public MaskedArrayList(ArrayList<T> data, BitSet bits, boolean inverse) {
- super();
- this.data = data;
- this.bits = bits;
- this.inverse = inverse;
- }
-
- @Override
- public boolean add(T e) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public Iterator<T> iterator() {
- if(inverse) {
- return new InvItr();
- }
- else {
- return new Itr();
- }
- }
-
- @Override
- public int size() {
- if(inverse) {
- return data.size() - bits.cardinality();
- }
- else {
- return bits.cardinality();
- }
- }
-
- /**
- * Iterator over set bits
- *
- * @author Erich Schubert
- *
- * @apiviz.exclude
- */
- protected class Itr implements Iterator<T> {
- /**
- * Next position.
- */
- private int pos;
-
- /**
- * Constructor
- */
- protected Itr() {
- this.pos = bits.nextSetBit(0);
- }
-
- @Override
- public boolean hasNext() {
- return (pos >= 0) && (pos < data.size());
- }
-
- @Override
- public T next() {
- T cur = data.get(pos);
- pos = bits.nextSetBit(pos + 1);
- return cur;
- }
-
- @Override
- public void remove() {
- throw new UnsupportedOperationException();
- }
- }
-
- /**
- * Iterator over unset elements.
- *
- * @author Erich Schubert
- *
- * @apiviz.exclude
- */
- protected class InvItr implements Iterator<T> {
- /**
- * Next unset position.
- */
- private int pos;
-
- /**
- * Constructor
- */
- protected InvItr() {
- this.pos = bits.nextClearBit(0);
- }
-
- @Override
- public boolean hasNext() {
- return (pos >= 0) && (pos < data.size());
- }
-
- @Override
- public T next() {
- T cur = data.get(pos);
- pos = bits.nextClearBit(pos + 1);
- return cur;
- }
-
- @Override
- public void remove() {
- throw new UnsupportedOperationException();
- }
- }
-} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/QuickSelect.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/QuickSelect.java
index bfa7950d..3746ff87 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/QuickSelect.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/QuickSelect.java
@@ -1,18 +1,10 @@
package de.lmu.ifi.dbs.elki.utilities.datastructures;
-import java.util.Comparator;
-import java.util.List;
-
-import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
-
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -31,6 +23,14 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+import java.util.Comparator;
+import java.util.List;
+
+import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
+
/**
* QuickSelect computes ("selects") the element at a given rank and can be used
* to compute Medians and arbitrary quantiles by computing the appropriate rank.
@@ -43,13 +43,297 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
*
* @apiviz.uses ArrayModifiableDBIDs
* @apiviz.uses List
- * @apiviz.uses Comparator
+ * @apiviz.uses Adapter
*/
public class QuickSelect {
/**
* For small arrays, use a simpler method:
*/
- private static final int SMALL = 10;
+ private static final int SMALL = 47;
+
+ /**
+ * Choose the best pivot for the given rank.
+ *
+ * @param rank Rank
+ * @param m1 Pivot candidate
+ * @param m2 Pivot candidate
+ * @param m3 Pivot candidate
+ * @param m4 Pivot candidate
+ * @param m5 Pivot candidate
+ * @return Best pivot candidate
+ */
+ private static final int bestPivot(int rank, int m1, int m2, int m3, int m4, int m5) {
+ if (rank < m1) {
+ return m1;
+ }
+ if (rank > m5) {
+ return m5;
+ }
+ if (rank < m2) {
+ return m2;
+ }
+ if (rank > m4) {
+ return m4;
+ }
+ return m3;
+ }
+
+ /**
+ * QuickSelect is essentially quicksort, except that we only "sort" that half
+ * of the array that we are interested in.
+ *
+ * @param data Data to process
+ * @param start Interval start
+ * @param end Interval end (exclusive)
+ * @param rank rank position we are interested in (starting at 0)
+ */
+ public static <T> void quickSelect(T data, Adapter<T> adapter, int start, int end, int rank) {
+ while (true) {
+ // Optimization for small arrays
+ // This also ensures a minimum size below
+ if (start + SMALL > end) {
+ insertionSort(data, adapter, start, end);
+ return;
+ }
+
+ // Best of 5 pivot picking:
+ // Choose pivots by looking at five candidates.
+ final int len = end - start;
+ final int seventh = (len >> 3) + (len >> 6) + 1;
+ final int m3 = (start + end) >> 1; // middle
+ final int m2 = m3 - seventh;
+ final int m1 = m2 - seventh;
+ final int m4 = m3 + seventh;
+ final int m5 = m4 + seventh;
+
+ // Explicit (and optimal) sorting network for 5 elements
+ // See Knuth for details.
+ if (adapter.compareGreater(data, m1, m2)) {
+ adapter.swap(data, m1, m2);
+ }
+ if (adapter.compareGreater(data, m1, m3)) {
+ adapter.swap(data, m1, m3);
+ }
+ if (adapter.compareGreater(data, m2, m3)) {
+ adapter.swap(data, m2, m3);
+ }
+ if (adapter.compareGreater(data, m4, m5)) {
+ adapter.swap(data, m4, m5);
+ }
+ if (adapter.compareGreater(data, m1, m4)) {
+ adapter.swap(data, m1, m4);
+ }
+ if (adapter.compareGreater(data, m3, m4)) {
+ adapter.swap(data, m3, m4);
+ }
+ if (adapter.compareGreater(data, m2, m5)) {
+ adapter.swap(data, m2, m5);
+ }
+ if (adapter.compareGreater(data, m2, m3)) {
+ adapter.swap(data, m2, m3);
+ }
+ if (adapter.compareGreater(data, m4, m5)) {
+ adapter.swap(data, m4, m5);
+ }
+
+ int best = bestPivot(rank, m1, m2, m3, m4, m5);
+ // final double pivot = data[best];
+ // Move middle element out of the way.
+ adapter.swap(data, best, end - 1);
+
+ // Begin partitioning
+ int i = start, j = end - 2;
+ // This is classic quicksort stuff
+ while (true) {
+ while (i <= j && adapter.compareGreater(data, end - 1, i)) {
+ i++;
+ }
+ while (j >= i && !adapter.compareGreater(data, end - 1, j)) {
+ j--;
+ }
+ if (i >= j) {
+ break;
+ }
+ adapter.swap(data, i, j);
+ }
+
+ // Move pivot (former middle element) back into the appropriate place
+ adapter.swap(data, i, end - 1);
+
+ // In contrast to quicksort, we only need to recurse into the half we are
+ // interested in. Instead of recursion we now use iteration.
+ if (rank < i) {
+ end = i;
+ } else if (rank > i) {
+ start = i + 1;
+ } else {
+ break;
+ }
+ } // Loop until rank==i
+ }
+
+ /**
+ * Sort a small array using repetitive insertion sort.
+ *
+ * @param data Data to sort
+ * @param start Interval start
+ * @param end Interval end
+ */
+ private static <T> void insertionSort(T data, Adapter<T> adapter, int start, int end) {
+ for (int i = start + 1; i < end; i++) {
+ for (int j = i; j > start && adapter.compareGreater(data, j - 1, j); j--) {
+ adapter.swap(data, j, j - 1);
+ }
+ }
+ }
+
+ /**
+ * Adapter class to apply QuickSelect to arbitrary data structures.
+ *
+ * @author Erich Schubert
+ *
+ * @param <T> Data structure type
+ */
+ public static interface Adapter<T> {
+ /**
+ * Swap the two elements at positions i and j.
+ *
+ * @param data Data structure
+ * @param i Position i
+ * @param j Position j
+ */
+ void swap(T data, int i, int j);
+
+ /**
+ * Compare two elements.
+ *
+ * @param data Data structure
+ * @param i Position i
+ * @param j Position j
+ * @return {@code true} when the element at position i is greater than that
+ * at position j.
+ */
+ boolean compareGreater(T data, int i, int j);
+ }
+
+ /**
+ * Adapter for double arrays.
+ */
+ public static Adapter<double[]> DOUBLE_ADAPTER = new Adapter<double[]>() {
+ @Override
+ public void swap(double[] data, int i, int j) {
+ double tmp = data[i];
+ data[i] = data[j];
+ data[j] = tmp;
+ }
+
+ @Override
+ public boolean compareGreater(double[] data, int i, int j) {
+ return data[i] > data[j];
+ }
+ };
+
+ /**
+ * Adapter for integer arrays.
+ */
+ public static Adapter<int[]> INTEGER_ADAPTER = new Adapter<int[]>() {
+ @Override
+ public void swap(int[] data, int i, int j) {
+ int tmp = data[i];
+ data[i] = data[j];
+ data[j] = tmp;
+ }
+
+ @Override
+ public boolean compareGreater(int[] data, int i, int j) {
+ return data[i] > data[j];
+ }
+ };
+
+ /**
+ * Adapter for float arrays.
+ */
+ public static Adapter<float[]> FLOAT_ADAPTER = new Adapter<float[]>() {
+ @Override
+ public void swap(float[] data, int i, int j) {
+ float tmp = data[i];
+ data[i] = data[j];
+ data[j] = tmp;
+ }
+
+ @Override
+ public boolean compareGreater(float[] data, int i, int j) {
+ return data[i] > data[j];
+ }
+ };
+
+ /**
+ * Adapter for short arrays.
+ */
+ public static Adapter<short[]> SHORT_ADAPTER = new Adapter<short[]>() {
+ @Override
+ public void swap(short[] data, int i, int j) {
+ short tmp = data[i];
+ data[i] = data[j];
+ data[j] = tmp;
+ }
+
+ @Override
+ public boolean compareGreater(short[] data, int i, int j) {
+ return data[i] > data[j];
+ }
+ };
+
+ /**
+ * Adapter for long arrays.
+ */
+ public static Adapter<long[]> LONG_ADAPTER = new Adapter<long[]>() {
+ @Override
+ public void swap(long[] data, int i, int j) {
+ long tmp = data[i];
+ data[i] = data[j];
+ data[j] = tmp;
+ }
+
+ @Override
+ public boolean compareGreater(long[] data, int i, int j) {
+ return data[i] > data[j];
+ }
+ };
+
+ /**
+ * Adapter for byte arrays.
+ */
+ public static Adapter<byte[]> BYTE_ADAPTER = new Adapter<byte[]>() {
+ @Override
+ public void swap(byte[] data, int i, int j) {
+ byte tmp = data[i];
+ data[i] = data[j];
+ data[j] = tmp;
+ }
+
+ @Override
+ public boolean compareGreater(byte[] data, int i, int j) {
+ return data[i] > data[j];
+ }
+ };
+
+ /**
+ * Adapter for char arrays.
+ */
+ public static Adapter<char[]> CHAR_ADAPTER = new Adapter<char[]>() {
+ @Override
+ public void swap(char[] data, int i, int j) {
+ char tmp = data[i];
+ data[i] = data[j];
+ data[j] = tmp;
+ }
+
+ @Override
+ public boolean compareGreater(char[] data, int i, int j) {
+ return data[i] > data[j];
+ }
+ };
/**
* QuickSelect is essentially quicksort, except that we only "sort" that half
@@ -94,11 +378,10 @@ public class QuickSelect {
// Integer division is "floor" since we are non-negative.
final int left = begin + ((length - 1) >> 1);
quickSelect(data, begin, end, left);
- if(length % 2 == 1) {
+ if (length % 2 == 1) {
return data[left];
- }
- else {
- quickSelect(data, begin, end, left + 1);
+ } else {
+ quickSelect(data, left + 1, end, left + 1);
return data[left] + .5 * (data[left + 1] - data[left]);
}
}
@@ -136,11 +419,10 @@ public class QuickSelect {
final double err = dleft - ileft;
quickSelect(data, begin, end, ileft);
- if(err <= Double.MIN_NORMAL) {
+ if (err <= Double.MIN_NORMAL) {
return data[ileft];
- }
- else {
- quickSelect(data, begin, end, ileft + 1);
+ } else {
+ quickSelect(data, ileft + 1, end, ileft + 1);
// Mix:
double mix = data[ileft] + (data[ileft + 1] - data[ileft]) * err;
return mix;
@@ -155,66 +437,94 @@ public class QuickSelect {
* @param start Interval start
* @param end Interval end (exclusive)
* @param rank rank position we are interested in (starting at 0)
+ * @return Element at the given rank (starting at 0).
*/
- public static void quickSelect(double[] data, int start, int end, int rank) {
- while(true) {
+ public static double quickSelect(double[] data, int start, int end, int rank) {
+ while (true) {
// Optimization for small arrays
// This also ensures a minimum size below
- if(start + SMALL > end) {
+ if (start + SMALL > end) {
insertionSort(data, start, end);
- return;
+ return data[rank];
}
- // Pick pivot from three candidates: start, middle, end
- // Since we compare them, we can also just "bubble sort" them.
- final int middle = (start + end) >> 1;
- if(data[start] > data[middle]) {
- swap(data, start, middle);
+ // Best of 5 pivot picking:
+ // Choose pivots by looking at five candidates.
+ final int len = end - start;
+ final int seventh = (len >> 3) + (len >> 6) + 1;
+ final int m3 = (start + end) >> 1; // middle
+ final int m2 = m3 - seventh;
+ final int m1 = m2 - seventh;
+ final int m4 = m3 + seventh;
+ final int m5 = m4 + seventh;
+
+ // Explicit (and optimal) sorting network for 5 elements
+ // See Knuth for details.
+ if (data[m1] > data[m2]) {
+ swap(data, m1, m2);
+ }
+ if (data[m1] > data[m3]) {
+ swap(data, m1, m3);
+ }
+ if (data[m2] > data[m3]) {
+ swap(data, m2, m3);
+ }
+ if (data[m4] > data[m5]) {
+ swap(data, m4, m5);
+ }
+ if (data[m1] > data[m4]) {
+ swap(data, m1, m4);
}
- if(data[start] > data[end - 1]) {
- swap(data, start, end - 1);
+ if (data[m3] > data[m4]) {
+ swap(data, m3, m4);
}
- if(data[middle] > data[end - 1]) {
- swap(data, middle, end - 1);
+ if (data[m2] > data[m5]) {
+ swap(data, m2, m5);
+ }
+ if (data[m2] > data[m3]) {
+ swap(data, m2, m3);
+ }
+ if (data[m4] > data[m5]) {
+ swap(data, m4, m5);
}
- // TODO: use more candidates for larger arrays?
- final double pivot = data[middle];
- // Move middle element out of the way, just before end
- // (Since we already know that "end" is bigger)
- swap(data, middle, end - 2);
+ int best = bestPivot(rank, m1, m2, m3, m4, m5);
+ final double pivot = data[best];
+ // Move middle element out of the way.
+ swap(data, best, end - 1);
// Begin partitioning
- int i = start + 1, j = end - 3;
+ int i = start, j = end - 2;
// This is classic quicksort stuff
- while(true) {
- while(data[i] <= pivot && i <= j) {
+ while (true) {
+ while (i <= j && data[i] <= pivot) {
i++;
}
- while(data[j] >= pivot && j >= i) {
+ while (j >= i && data[j] >= pivot) {
j--;
}
- if(i >= j) {
+ if (i >= j) {
break;
}
swap(data, i, j);
+ i++;
+ j--;
}
// Move pivot (former middle element) back into the appropriate place
- swap(data, i, end - 2);
+ swap(data, i, end - 1);
// In contrast to quicksort, we only need to recurse into the half we are
// interested in. Instead of recursion we now use iteration.
- if(rank < i) {
+ if (rank < i) {
end = i;
- }
- else if(rank > i) {
+ } else if (rank > i) {
start = i + 1;
- }
- else {
+ } else {
break;
}
} // Loop until rank==i
+ return data[rank];
}
/**
@@ -225,8 +535,8 @@ public class QuickSelect {
* @param end Interval end
*/
private static void insertionSort(double[] data, int start, int end) {
- for(int i = start + 1; i < end; i++) {
- for(int j = i; j > start && data[j - 1] > data[j]; j--) {
+ for (int i = start + 1; i < end; i++) {
+ for (int j = i; j > start && data[j - 1] > data[j]; j--) {
swap(data, j, j - 1);
}
}
@@ -345,61 +655,85 @@ public class QuickSelect {
* @param rank rank position we are interested in (starting at 0)
*/
public static <T extends Comparable<? super T>> void quickSelect(T[] data, int start, int end, int rank) {
- while(true) {
+ while (true) {
// Optimization for small arrays
// This also ensures a minimum size below
- if(start + SMALL > end) {
+ if (start + SMALL > end) {
insertionSort(data, start, end);
return;
}
- // Pick pivot from three candidates: start, middle, end
- // Since we compare them, we can also just "bubble sort" them.
- final int middle = (start + end) >> 1;
- if(data[start].compareTo(data[middle]) > 0) {
- swap(data, start, middle);
+ // Best of 5 pivot picking:
+ // Choose pivots by looking at five candidates.
+ final int len = end - start;
+ final int seventh = (len >> 3) + (len >> 6) + 1;
+ final int m3 = (start + end) >> 1; // middle
+ final int m2 = m3 - seventh;
+ final int m1 = m2 - seventh;
+ final int m4 = m3 + seventh;
+ final int m5 = m4 + seventh;
+
+ // Explicit (and optimal) sorting network for 5 elements
+ // See Knuth for details.
+ if (data[m1].compareTo(data[m2]) > 0) {
+ swap(data, m1, m2);
+ }
+ if (data[m1].compareTo(data[m3]) > 0) {
+ swap(data, m1, m3);
+ }
+ if (data[m2].compareTo(data[m3]) > 0) {
+ swap(data, m2, m3);
+ }
+ if (data[m4].compareTo(data[m5]) > 0) {
+ swap(data, m4, m5);
+ }
+ if (data[m1].compareTo(data[m4]) > 0) {
+ swap(data, m1, m4);
}
- if(data[start].compareTo(data[end - 1]) > 0) {
- swap(data, start, end - 1);
+ if (data[m3].compareTo(data[m4]) > 0) {
+ swap(data, m3, m4);
}
- if(data[middle].compareTo(data[end - 1]) > 0) {
- swap(data, middle, end - 1);
+ if (data[m2].compareTo(data[m5]) > 0) {
+ swap(data, m2, m5);
+ }
+ if (data[m2].compareTo(data[m3]) > 0) {
+ swap(data, m2, m3);
+ }
+ if (data[m4].compareTo(data[m5]) > 0) {
+ swap(data, m4, m5);
}
- // TODO: use more candidates for larger arrays?
- final T pivot = data[middle];
- // Move middle element out of the way, just before end
- // (Since we already know that "end" is bigger)
- swap(data, middle, end - 2);
+ int best = bestPivot(rank, m1, m2, m3, m4, m5);
+ final T pivot = data[best];
+ // Move middle element out of the way.
+ swap(data, best, end - 1);
// Begin partitioning
- int i = start + 1, j = end - 3;
+ int i = start, j = end - 2;
// This is classic quicksort stuff
- while(true) {
- while(data[i].compareTo(pivot) <= 0 && i <= j) {
+ while (true) {
+ while (i <= j && data[i].compareTo(pivot) <= 0) {
i++;
}
- while(data[j].compareTo(pivot) >= 0 && j >= i) {
+ while (j >= i && data[j].compareTo(pivot) >= 0) {
j--;
}
- if(i >= j) {
+ if (i >= j) {
break;
}
swap(data, i, j);
}
// Move pivot (former middle element) back into the appropriate place
- swap(data, i, end - 2);
+ swap(data, i, end - 1);
// In contrast to quicksort, we only need to recurse into the half we are
// interested in. Instead of recursion we now use iteration.
- if(rank < i) {
+ if (rank < i) {
end = i;
- }
- else if(rank > i) {
+ } else if (rank > i) {
start = i + 1;
- }
- else {
+ } else {
break;
}
} // Loop until rank==i
@@ -414,8 +748,8 @@ public class QuickSelect {
* @param end Interval end
*/
private static <T extends Comparable<? super T>> void insertionSort(T[] data, int start, int end) {
- for(int i = start + 1; i < end; i++) {
- for(int j = i; j > start && data[j - 1].compareTo(data[j]) > 0; j--) {
+ for (int i = start + 1; i < end; i++) {
+ for (int j = i; j > start && data[j - 1].compareTo(data[j]) > 0; j--) {
swap(data, j, j - 1);
}
}
@@ -536,61 +870,86 @@ public class QuickSelect {
* @param rank rank position we are interested in (starting at 0)
*/
public static <T extends Comparable<? super T>> void quickSelect(List<? extends T> data, int start, int end, int rank) {
- while(true) {
+ while (true) {
// Optimization for small arrays
// This also ensures a minimum size below
- if(start + SMALL > end) {
+ if (start + SMALL > end) {
insertionSort(data, start, end);
return;
}
- // Pick pivot from three candidates: start, middle, end
- // Since we compare them, we can also just "bubble sort" them.
- final int middle = (start + end) >> 1;
- if(data.get(start).compareTo(data.get(middle)) > 0) {
- swap(data, start, middle);
+ // Best of 5 pivot picking:
+ // Choose pivots by looking at five candidates.
+ final int len = end - start;
+ final int seventh = (len >> 3) + (len >> 6) + 1;
+ final int m3 = (start + end) >> 1; // middle
+ final int m2 = m3 - seventh;
+ final int m1 = m2 - seventh;
+ final int m4 = m3 + seventh;
+ final int m5 = m4 + seventh;
+
+ // Explicit (and optimal) sorting network for 5 elements
+ // See Knuth for details.
+ if (data.get(m1).compareTo(data.get(m2)) > 0) {
+ swap(data, m1, m2);
}
- if(data.get(start).compareTo(data.get(end - 1)) > 0) {
- swap(data, start, end - 1);
+ if (data.get(m1).compareTo(data.get(m3)) > 0) {
+ swap(data, m1, m3);
}
- if(data.get(middle).compareTo(data.get(end - 1)) > 0) {
- swap(data, middle, end - 1);
+ if (data.get(m2).compareTo(data.get(m3)) > 0) {
+ swap(data, m2, m3);
+ }
+ if (data.get(m4).compareTo(data.get(m5)) > 0) {
+ swap(data, m4, m5);
+ }
+ if (data.get(m1).compareTo(data.get(m4)) > 0) {
+ swap(data, m1, m4);
+ }
+ if (data.get(m3).compareTo(data.get(m4)) > 0) {
+ swap(data, m3, m4);
+ }
+ if (data.get(m2).compareTo(data.get(m5)) > 0) {
+ swap(data, m2, m5);
+ }
+ if (data.get(m2).compareTo(data.get(m3)) > 0) {
+ swap(data, m2, m3);
+ }
+ if (data.get(m4).compareTo(data.get(m5)) > 0) {
+ swap(data, m4, m5);
}
- // TODO: use more candidates for larger arrays?
- final T pivot = data.get(middle);
+ int best = bestPivot(rank, m1, m2, m3, m4, m5);
+ final T pivot = data.get(best);
// Move middle element out of the way, just before end
// (Since we already know that "end" is bigger)
- swap(data, middle, end - 2);
+ swap(data, best, end - 1);
// Begin partitioning
- int i = start + 1, j = end - 3;
+ int i = start, j = end - 2;
// This is classic quicksort stuff
- while(true) {
- while(data.get(i).compareTo(pivot) <= 0 && i <= j) {
+ while (true) {
+ while (i <= j && data.get(i).compareTo(pivot) <= 0) {
i++;
}
- while(data.get(j).compareTo(pivot) >= 0 && j >= i) {
+ while (j >= i && data.get(j).compareTo(pivot) >= 0) {
j--;
}
- if(i >= j) {
+ if (i >= j) {
break;
}
swap(data, i, j);
}
// Move pivot (former middle element) back into the appropriate place
- swap(data, i, end - 2);
+ swap(data, i, end - 1);
// In contrast to quicksort, we only need to recurse into the half we are
// interested in. Instead of recursion we now use iteration.
- if(rank < i) {
+ if (rank < i) {
end = i;
- }
- else if(rank > i) {
+ } else if (rank > i) {
start = i + 1;
- }
- else {
+ } else {
break;
}
} // Loop until rank==i
@@ -605,8 +964,8 @@ public class QuickSelect {
* @param end Interval end
*/
private static <T extends Comparable<? super T>> void insertionSort(List<T> data, int start, int end) {
- for(int i = start + 1; i < end; i++) {
- for(int j = i; j > start && data.get(j - 1).compareTo(data.get(j)) > 0; j--) {
+ for (int i = start + 1; i < end; i++) {
+ for (int j = i; j > start && data.get(j - 1).compareTo(data.get(j)) > 0; j--) {
swap(data, j, j - 1);
}
}
@@ -731,61 +1090,86 @@ public class QuickSelect {
* @param rank rank position we are interested in (starting at 0)
*/
public static <T> void quickSelect(List<? extends T> data, Comparator<? super T> comparator, int start, int end, int rank) {
- while(true) {
+ while (true) {
// Optimization for small arrays
// This also ensures a minimum size below
- if(start + SMALL > end) {
+ if (start + SMALL > end) {
insertionSort(data, comparator, start, end);
return;
}
- // Pick pivot from three candidates: start, middle, end
- // Since we compare them, we can also just "bubble sort" them.
- final int middle = (start + end) >> 1;
- if(comparator.compare(data.get(start), data.get(middle)) > 0) {
- swap(data, start, middle);
+ // Best of 5 pivot picking:
+ // Choose pivots by looking at five candidates.
+ final int len = end - start;
+ final int seventh = (len >> 3) + (len >> 6) + 1;
+ final int m3 = (start + end) >> 1; // middle
+ final int m2 = m3 - seventh;
+ final int m1 = m2 - seventh;
+ final int m4 = m3 + seventh;
+ final int m5 = m4 + seventh;
+
+ // Explicit (and optimal) sorting network for 5 elements
+ // See Knuth for details.
+ if (comparator.compare(data.get(m1), data.get(m2)) > 0) {
+ swap(data, m1, m2);
+ }
+ if (comparator.compare(data.get(m1), data.get(m3)) > 0) {
+ swap(data, m1, m3);
+ }
+ if (comparator.compare(data.get(m2), data.get(m3)) > 0) {
+ swap(data, m2, m3);
+ }
+ if (comparator.compare(data.get(m4), data.get(m5)) > 0) {
+ swap(data, m4, m5);
}
- if(comparator.compare(data.get(start), data.get(end - 1)) > 0) {
- swap(data, start, end - 1);
+ if (comparator.compare(data.get(m1), data.get(m4)) > 0) {
+ swap(data, m1, m4);
}
- if(comparator.compare(data.get(middle), data.get(end - 1)) > 0) {
- swap(data, middle, end - 1);
+ if (comparator.compare(data.get(m3), data.get(m4)) > 0) {
+ swap(data, m3, m4);
+ }
+ if (comparator.compare(data.get(m2), data.get(m5)) > 0) {
+ swap(data, m2, m5);
+ }
+ if (comparator.compare(data.get(m2), data.get(m3)) > 0) {
+ swap(data, m2, m3);
+ }
+ if (comparator.compare(data.get(m4), data.get(m5)) > 0) {
+ swap(data, m4, m5);
}
- // TODO: use more candidates for larger arrays?
- final T pivot = data.get(middle);
+ int best = bestPivot(rank, m1, m2, m3, m4, m5);
+ final T pivot = data.get(best);
// Move middle element out of the way, just before end
// (Since we already know that "end" is bigger)
- swap(data, middle, end - 2);
+ swap(data, best, end - 1);
// Begin partitioning
- int i = start + 1, j = end - 3;
+ int i = start, j = end - 2;
// This is classic quicksort stuff
- while(true) {
- while(comparator.compare(data.get(i), pivot) <= 0 && i <= j) {
+ while (true) {
+ while (i <= j && comparator.compare(data.get(i), pivot) <= 0) {
i++;
}
- while(comparator.compare(data.get(j), pivot) >= 0 && j >= i) {
+ while (j >= i && comparator.compare(data.get(j), pivot) >= 0) {
j--;
}
- if(i >= j) {
+ if (i >= j) {
break;
}
swap(data, i, j);
}
// Move pivot (former middle element) back into the appropriate place
- swap(data, i, end - 2);
+ swap(data, i, end - 1);
// In contrast to quicksort, we only need to recurse into the half we are
// interested in. Instead of recursion we now use iteration.
- if(rank < i) {
+ if (rank < i) {
end = i;
- }
- else if(rank > i) {
+ } else if (rank > i) {
start = i + 1;
- }
- else {
+ } else {
break;
}
} // Loop until rank==i
@@ -800,8 +1184,8 @@ public class QuickSelect {
* @param end Interval end
*/
private static <T> void insertionSort(List<T> data, Comparator<? super T> comparator, int start, int end) {
- for(int i = start + 1; i < end; i++) {
- for(int j = i; j > start && comparator.compare(data.get(j - 1), data.get(j)) > 0; j--) {
+ for (int i = start + 1; i < end; i++) {
+ for (int j = i; j > start && comparator.compare(data.get(j - 1), data.get(j)) > 0; j--) {
swap(data, j, j - 1);
}
}
@@ -882,7 +1266,7 @@ public class QuickSelect {
* @param data Data to process
* @param comparator Comparator to use
* @param begin Begin of valid values
- * @param end End of valid values (inclusive!)
+ * @param end End of valid values (exclusive)
* @param quant Quantile to compute
* @return Value at quantile
*/
@@ -904,93 +1288,132 @@ public class QuickSelect {
* @param data Data to process
* @param comparator Comparator to use
* @param start Interval start
- * @param end Interval end (inclusive)
+ * @param end Interval end (exclusive)
* @param rank rank position we are interested in (starting at 0)
*/
public static void quickSelect(ArrayModifiableDBIDs data, Comparator<? super DBIDRef> comparator, int start, int end, int rank) {
- while(true) {
+ DBIDArrayIter refi = data.iter(), refj = data.iter(), pivot = data.iter();
+ while (true) {
// Optimization for small arrays
// This also ensures a minimum size below
- if(start + SMALL > end) {
- insertionSort(data, comparator, start, end);
+ if (start + SMALL > end) {
+ insertionSort(data, comparator, start, end, refi, refj);
return;
}
- // Pick pivot from three candidates: start, middle, end
- // Since we compare them, we can also just "bubble sort" them.
- final int middle = (start + end) >> 1;
- if(comparator.compare(data.get(start), data.get(middle)) > 0) {
- data.swap(start, middle);
+ // Best of 5 pivot picking:
+ // Choose pivots by looking at five candidates.
+ final int len = end - start;
+ final int seventh = (len >> 3) + (len >> 6) + 1;
+ final int m3 = (start + end) >> 1; // middle
+ final int m2 = m3 - seventh;
+ final int m1 = m2 - seventh;
+ final int m4 = m3 + seventh;
+ final int m5 = m4 + seventh;
+
+ // Explicit (and optimal) sorting network for 5 elements
+ // See Knuth for details.
+ if (compare(refi, m1, refj, m2, comparator) > 0) {
+ data.swap(m1, m2);
+ }
+ if (compare(refi, m1, refj, m3, comparator) > 0) {
+ data.swap(m1, m3);
+ }
+ if (compare(refi, m2, refj, m3, comparator) > 0) {
+ data.swap(m2, m3);
+ }
+ if (compare(refi, m4, refj, m5, comparator) > 0) {
+ data.swap(m4, m5);
}
- if(comparator.compare(data.get(start), data.get(end - 1)) > 0) {
- data.swap(start, end - 1);
+ if (compare(refi, m1, refj, m4, comparator) > 0) {
+ data.swap(m1, m4);
}
- if(comparator.compare(data.get(middle), data.get(end - 1)) > 0) {
- data.swap(middle, end - 1);
+ if (compare(refi, m3, refj, m4, comparator) > 0) {
+ data.swap(m3, m4);
+ }
+ if (compare(refi, m2, refj, m5, comparator) > 0) {
+ data.swap(m2, m5);
+ }
+ if (compare(refi, m2, refj, m3, comparator) > 0) {
+ data.swap(m2, m3);
+ }
+ if (compare(refi, m4, refj, m5, comparator) > 0) {
+ data.swap(m4, m5);
}
- // TODO: use more candidates for larger arrays?
- final DBID pivot = data.get(middle);
- // Move middle element out of the way, just before end
- // (Since we already know that "end" is bigger)
- data.swap(middle, end - 2);
+ int best = bestPivot(rank, m1, m2, m3, m4, m5);
+ // Move middle element out of the way.
+ data.swap(best, end - 1);
+ pivot.seek(end - 1);
// Begin partitioning
- int i = start + 1, j = end - 3;
- DBIDArrayIter refi = data.iter(), refj = data.iter();
+ int i = start, j = end - 3;
refi.seek(i);
refj.seek(j);
// This is classic quicksort stuff
- while(true) {
- while(comparator.compare(refi, pivot) <= 0 && i <= j) {
+ while (true) {
+ while (i <= j && comparator.compare(refi, pivot) <= 0) {
i++;
refi.advance();
}
- while(comparator.compare(refj, pivot) >= 0 && j >= i) {
+ while (j >= i && comparator.compare(refj, pivot) >= 0) {
j--;
refj.retract();
}
- if(i >= j) {
+ if (i >= j) {
break;
}
data.swap(i, j);
}
// Move pivot (former middle element) back into the appropriate place
- data.swap(i, end - 2);
+ data.swap(i, end - 1);
// In contrast to quicksort, we only need to recurse into the half we are
// interested in. Instead of recursion we now use iteration.
- if(rank < i) {
+ if (rank < i) {
end = i;
- }
- else if(rank > i) {
+ } else if (rank > i) {
start = i + 1;
- }
- else {
+ } else {
break;
}
} // Loop until rank==i
}
/**
+ * Compare two elements.
+ *
+ * @param i1 First scratch variable
+ * @param p1 Value for first
+ * @param i2 Second scratch variable
+ * @param p2 Value for second
+ * @param comp Comparator
+ * @return Comparison result
+ */
+ private static int compare(DBIDArrayIter i1, int p1, DBIDArrayIter i2, int p2, Comparator<? super DBIDRef> comp) {
+ i1.seek(p1);
+ i2.seek(p2);
+ return comp.compare(i1, i2);
+ }
+
+ /**
* Sort a small array using repetitive insertion sort.
*
* @param data Data to sort
* @param start Interval start
* @param end Interval end
*/
- private static void insertionSort(ArrayModifiableDBIDs data, Comparator<? super DBIDRef> comparator, int start, int end) {
- DBIDArrayIter iter1 = data.iter(), iter2 = data.iter();
- for(int i = start + 1; i < end; i++) {
- iter1.seek(i - 1);
- iter2.seek(i);
- for(int j = i; j > start; j--, iter1.retract(), iter2.retract()) {
- if(comparator.compare(iter1, iter2) > 0) {
+ private static void insertionSort(ArrayModifiableDBIDs data, Comparator<? super DBIDRef> comparator, int start, int end, DBIDArrayIter iter1, DBIDArrayIter iter2) {
+ for (int i = start + 1; i < end; i++) {
+ for (int j = i; j > start; j--) {
+ iter1.seek(j - 1);
+ iter2.seek(j);
+ if (comparator.compare(iter1, iter2) <= 0) {
break;
}
data.swap(j, j - 1);
}
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ArrayAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ArrayAdapter.java
index 969d068d..2c5eeed1 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ArrayAdapter.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ArrayAdapter.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ArrayDBIDsAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ArrayDBIDsAdapter.java
index 5b1b92b5..da831471 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ArrayDBIDsAdapter.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ArrayDBIDsAdapter.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ArrayLikeUtil.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ArrayLikeUtil.java
index a08feb1a..8fab6f2b 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ArrayLikeUtil.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ArrayLikeUtil.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -27,6 +27,7 @@ import java.util.List;
import de.lmu.ifi.dbs.elki.data.FeatureVector;
import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
/**
* Utility class that allows plug-in use of various "array-like" types such as
@@ -42,17 +43,17 @@ public final class ArrayLikeUtil {
/**
* Static instance for lists.
*/
- private static final ListArrayAdapter<Object> LISTADAPTER = new ListArrayAdapter<Object>();
+ private static final ListArrayAdapter<Object> LISTADAPTER = new ListArrayAdapter<>();
/**
* Static instance for lists of numbers.
*/
- private static final NumberListArrayAdapter<Number> NUMBERLISTADAPTER = new NumberListArrayAdapter<Number>();
+ private static final NumberListArrayAdapter<Number> NUMBERLISTADAPTER = new NumberListArrayAdapter<>();
/**
* Static instance.
*/
- private static final IdentityArrayAdapter<?> IDENTITYADAPTER = new IdentityArrayAdapter<Object>();
+ private static final IdentityArrayAdapter<?> IDENTITYADAPTER = new IdentityArrayAdapter<>();
/**
* Static instance.
@@ -83,7 +84,12 @@ public final class ArrayLikeUtil {
* Use ArrayDBIDs as array.
*/
public static final ArrayDBIDsAdapter ARRAYDBIDADAPTER = new ArrayDBIDsAdapter();
-
+
+ /**
+ * Adapter for vectors.
+ */
+ public static final NumberArrayAdapter<Double, Vector> VECTORADAPTER = new VectorAdapter();
+
/**
* Fake constructor. Do not instantiate!
*/
@@ -169,9 +175,9 @@ public final class ArrayLikeUtil {
final int size = adapter.size(array);
int index = 0;
double max = adapter.getDouble(array, 0);
- for(int i = 1; i < size; i++) {
+ for (int i = 1; i < size; i++) {
double val = adapter.getDouble(array, i);
- if(val > max) {
+ if (val > max) {
max = val;
index = i;
}
@@ -199,8 +205,11 @@ public final class ArrayLikeUtil {
* @return primitive double array
*/
public static <A> double[] toPrimitiveDoubleArray(A array, NumberArrayAdapter<?, ? super A> adapter) {
+ if (adapter == DOUBLEARRAYADAPTER) {
+ return ((double[]) array).clone();
+ }
double[] ret = new double[adapter.size(array)];
- for(int i = 0; i < ret.length; i++) {
+ for (int i = 0; i < ret.length; i++) {
ret[i] = adapter.getDouble(array, i);
}
return ret;
@@ -234,8 +243,11 @@ public final class ArrayLikeUtil {
* @return primitive float array
*/
public static <A> float[] toPrimitiveFloatArray(A array, NumberArrayAdapter<?, ? super A> adapter) {
+ if (adapter == FLOATARRAYADAPTER) {
+ return ((float[]) array).clone();
+ }
float[] ret = new float[adapter.size(array)];
- for(int i = 0; i < ret.length; i++) {
+ for (int i = 0; i < ret.length; i++) {
ret[i] = adapter.getFloat(array, i);
}
return ret;
@@ -260,4 +272,39 @@ public final class ArrayLikeUtil {
public static <N extends Number> float[] toPrimitiveFloatArray(NumberVector<N> obj) {
return toPrimitiveFloatArray(obj, numberVectorAdapter(obj));
}
-} \ No newline at end of file
+
+ /**
+ * Convert a numeric array-like to a <code>int[]</code>.
+ *
+ * @param array Array-like
+ * @param adapter Adapter
+ * @return primitive double array
+ */
+ public static <A> int[] toPrimitiveIntegerArray(A array, NumberArrayAdapter<?, ? super A> adapter) {
+ int[] ret = new int[adapter.size(array)];
+ for (int i = 0; i < ret.length; i++) {
+ ret[i] = adapter.getInteger(array, i);
+ }
+ return ret;
+ }
+
+ /**
+ * Convert a list of numbers to <code>int[]</code>.
+ *
+ * @param array List of numbers
+ * @return double array
+ */
+ public static int[] toPrimitiveIntegerArray(List<? extends Number> array) {
+ return toPrimitiveIntegerArray(array, NUMBERLISTADAPTER);
+ }
+
+ /**
+ * Convert a number vector to <code>int[]</code>.
+ *
+ * @param obj Object to convert
+ * @return primitive double array
+ */
+ public static <N extends Number> int[] toPrimitiveIntegerArray(NumberVector<N> obj) {
+ return toPrimitiveIntegerArray(obj, numberVectorAdapter(obj));
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/DoubleArrayAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/DoubleArrayAdapter.java
index 117f3845..0e31a61a 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/DoubleArrayAdapter.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/DoubleArrayAdapter.java
@@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ExtendedArray.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ExtendedArray.java
index 491c4f95..3af14982 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ExtendedArray.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ExtendedArray.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -91,6 +91,6 @@ public class ExtendedArray<T> implements ArrayAdapter<T, ExtendedArray<T>> {
*/
@SuppressWarnings("unchecked")
public static <T, A> ExtendedArray<T> extend(A array, ArrayAdapter<T, A> getter, T extra) {
- return new ExtendedArray<T>(array, (ArrayAdapter<T, Object>) getter, extra);
+ return new ExtendedArray<>(array, (ArrayAdapter<T, Object>) getter, extra);
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/FeatureVectorAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/FeatureVectorAdapter.java
index 38b662e8..deb5aafc 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/FeatureVectorAdapter.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/FeatureVectorAdapter.java
@@ -6,7 +6,7 @@ import de.lmu.ifi.dbs.elki.data.FeatureVector;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/FloatArrayAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/FloatArrayAdapter.java
index ae501039..831dc929 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/FloatArrayAdapter.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/FloatArrayAdapter.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/IdentityArrayAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/IdentityArrayAdapter.java
index 0c6e03dd..dfde46b7 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/IdentityArrayAdapter.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/IdentityArrayAdapter.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ListArrayAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ListArrayAdapter.java
index cba1e706..729dfab8 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ListArrayAdapter.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ListArrayAdapter.java
@@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/NumberArrayAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/NumberArrayAdapter.java
index 1dc823b1..5ebbcb0d 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/NumberArrayAdapter.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/NumberArrayAdapter.java
@@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/NumberListArrayAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/NumberListArrayAdapter.java
index 89a4e3d6..a2606347 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/NumberListArrayAdapter.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/NumberListArrayAdapter.java
@@ -6,7 +6,7 @@ import java.util.List;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/NumberVectorAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/NumberVectorAdapter.java
index fd1e6636..5e674026 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/NumberVectorAdapter.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/NumberVectorAdapter.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/SingleSubsetArrayAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/SingleSubsetArrayAdapter.java
index d7483e4d..941c6245 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/SingleSubsetArrayAdapter.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/SingleSubsetArrayAdapter.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/SubsetArrayAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/SubsetArrayAdapter.java
index c607759f..746647cc 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/SubsetArrayAdapter.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/SubsetArrayAdapter.java
@@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/SubsetNumberArrayAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/SubsetNumberArrayAdapter.java
index 6719b60e..c394f9b7 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/SubsetNumberArrayAdapter.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/SubsetNumberArrayAdapter.java
@@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/TDoubleListAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/TDoubleListAdapter.java
index cee393ac..a52ff15e 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/TDoubleListAdapter.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/TDoubleListAdapter.java
@@ -6,7 +6,7 @@ import gnu.trove.list.TDoubleList;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/VectorAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/VectorAdapter.java
new file mode 100644
index 00000000..0bb979e9
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/VectorAdapter.java
@@ -0,0 +1,85 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
+
+/**
+ * Adapter to use a feature vector as an array of features.
+ *
+ * Use the static instance from {@link ArrayLikeUtil}!
+ *
+ * @author Erich Schubert
+ */
+public class VectorAdapter implements NumberArrayAdapter<Double, Vector> {
+ /**
+ * Constructor.
+ *
+ * Use the static instance from {@link ArrayLikeUtil}!
+ */
+ protected VectorAdapter() {
+ super();
+ }
+
+ @Override
+ public int size(Vector array) {
+ return array.getDimensionality();
+ }
+
+ @Override
+ @Deprecated
+ public Double get(Vector array, int off) throws IndexOutOfBoundsException {
+ return array.getValue(off + 1);
+ }
+
+ @Override
+ public double getDouble(Vector array, int off) throws IndexOutOfBoundsException {
+ return array.doubleValue(off);
+ }
+
+ @Override
+ public float getFloat(Vector array, int off) throws IndexOutOfBoundsException {
+ return array.floatValue(off);
+ }
+
+ @Override
+ public int getInteger(Vector array, int off) throws IndexOutOfBoundsException {
+ return array.intValue(off);
+ }
+
+ @Override
+ public short getShort(Vector array, int off) throws IndexOutOfBoundsException {
+ return array.shortValue(off);
+ }
+
+ @Override
+ public long getLong(Vector array, int off) throws IndexOutOfBoundsException {
+ return array.longValue(off);
+ }
+
+ @Override
+ public byte getByte(Vector array, int off) throws IndexOutOfBoundsException {
+ return array.byteValue(off);
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/package-info.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/package-info.java
index 55627df4..33058cf4 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/package-info.java
@@ -5,7 +5,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/IntegerArrayQuickSort.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/IntegerArrayQuickSort.java
index a0c93997..eaf47738 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/IntegerArrayQuickSort.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/IntegerArrayQuickSort.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.arrays;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/IntegerComparator.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/IntegerComparator.java
index 51164425..0ccd47db 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/IntegerComparator.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/IntegerComparator.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.arrays;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/package-info.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/package-info.java
index 3db78032..874a6d44 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/package-info.java
@@ -5,7 +5,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2012
+Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/AbstractHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/AbstractHeap.java
deleted file mode 100644
index b6f098e6..00000000
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/AbstractHeap.java
+++ /dev/null
@@ -1,103 +0,0 @@
-package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
-
-/*
- This file is part of ELKI:
- Environment for Developing KDD-Applications Supported by Index-Structures
-
- Copyright (C) 2012
- Ludwig-Maximilians-Universität München
- Lehr- und Forschungseinheit für Datenbanksysteme
- ELKI Development Team
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-/**
- * Abstract base class for heaps.
- *
- * @author Erich Schubert
- */
-public class AbstractHeap {
- /**
- * Default initial capacity
- */
- public static final int DEFAULT_INITIAL_CAPACITY = 11;
-
- /**
- * Current number of objects
- */
- public int size = 0;
-
- /**
- * Indicate up to where the heap is valid
- */
- public int validSize = 0;
-
- /**
- * (Structural) modification counter. Used to invalidate iterators.
- */
- public int modCount = 0;
-
- /**
- * Constructor.
- */
- public AbstractHeap() {
- super();
- }
-
- /**
- * Query the size
- *
- * @return Size
- */
- public int size() {
- return this.size;
- }
-
- /**
- * Delete all elements from the heap.
- */
- public void clear() {
- this.size = 0;
- this.validSize = -1;
- heapModified();
- }
-
- /**
- * Test whether we need to resize to have the requested capacity.
- *
- * @param requiredSize required capacity
- * @param capacity Current capacity
- * @return new capacity
- */
- protected final int desiredSize(int requiredSize, int capacity) {
- // Double until 64, then increase by 50% each time.
- int newCapacity = ((capacity < 64) ? ((capacity + 1) * 2) : ((capacity / 2) * 3));
- // overflow?
- if (newCapacity < 0) {
- throw new OutOfMemoryError();
- }
- if (requiredSize > newCapacity) {
- newCapacity = requiredSize;
- }
- return newCapacity;
- }
-
- /**
- * Called at the end of each heap modification.
- */
- protected void heapModified() {
- modCount++;
- }
-}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMaxHeap.java
index ab8ef1bb..222fe83a 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMaxHeap.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMaxHeap.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -23,41 +23,409 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+import java.util.Arrays;
+import java.util.ConcurrentModificationException;
+
+import de.lmu.ifi.dbs.elki.math.MathUtil;
+
/**
- * Basic in-memory heap structure.
+ * Advanced priority queue class, based on a binary heap (for small sizes),
+ * which will for larger heaps be accompanied by a 4-ary heap (attached below
+ * the root of the two-ary heap, making the root actually 3-ary).
+ *
+ * This code was automatically instantiated for the type: Comparable
+ *
+ * This combination was found to work quite well in benchmarks, but YMMV.
*
- * This heap is built lazily: if you first add many elements, then poll the
- * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log
- * n). This is implemented via a simple validTo counter.
+ * Some other observations from benchmarking:
+ * <ul>
+ * <li>Bulk loading did not improve things</li>
+ * <li>Primitive heaps are substantially faster.</li>
+ * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and
+ * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li>
+ * <li>Workload makes a huge difference. A load-once, poll-until-empty priority
+ * queue is something different than e.g. a top-k heap, which will see a lot of
+ * top element replacements.</li>
+ * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for
+ * top-k make a difference.</li>
+ * <li>Different day, different benchmark results ...</li>
+ * </ul>
*
* @author Erich Schubert
+ *
+ * @apiviz.has UnsortedIter
+ * @param <K> Key type
*/
-public class ComparableMaxHeap<K extends Comparable<K>> extends ObjectHeap<K> {
+public class ComparableMaxHeap<K extends Comparable<? super K>> implements ObjectHeap<K> {
+ /**
+ * Base heap.
+ */
+ protected Comparable<Object>[] twoheap;
+
+ /**
+ * Extension heap.
+ */
+ protected Comparable<Object>[] fourheap;
+
+ /**
+ * Current size of heap.
+ */
+ protected int size;
+
+ /**
+ * (Structural) modification counter. Used to invalidate iterators.
+ */
+ protected int modCount = 0;
+
+ /**
+ * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements.
+ */
+ private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1;
+
+ /**
+ * Initial size of the 2-ary heap.
+ */
+ private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1;
+
+ /**
+ * Initial size of 4-ary heap when initialized.
+ *
+ * 21 = 4-ary heap of height 2: 1 + 4 + 4*4
+ *
+ * 85 = 4-ary heap of height 3: 21 + 4*4*4
+ *
+ * 341 = 4-ary heap of height 4: 85 + 4*4*4*4
+ *
+ * Since we last grew by 255 (to 511), let's use 341.
+ */
+ private final static int FOUR_HEAP_INITIAL_SIZE = 341;
+
/**
- * Constructor with default capacity.
+ * Constructor, with default size.
*/
+ @SuppressWarnings("unchecked")
public ComparableMaxHeap() {
- super(DEFAULT_INITIAL_CAPACITY);
+ super();
+ Comparable<Object>[] twoheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, TWO_HEAP_INITIAL_SIZE);
+
+ this.twoheap = twoheap;
+ this.fourheap = null;
+ this.size = 0;
+ this.modCount = 0;
}
/**
- * Constructor with initial capacity.
+ * Constructor, with given minimum size.
*
- * @param size initial capacity
+ * @param minsize Minimum size
*/
- public ComparableMaxHeap(int size) {
- super(size);
+ @SuppressWarnings("unchecked")
+ public ComparableMaxHeap(int minsize) {
+ super();
+ if (minsize < TWO_HEAP_MAX_SIZE) {
+ final int size = MathUtil.nextPow2Int(minsize + 1) - 1;
+ Comparable<Object>[] twoheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, size);
+
+ this.twoheap = twoheap;
+ this.fourheap = null;
+ } else {
+ Comparable<Object>[] twoheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, TWO_HEAP_INITIAL_SIZE);
+ Comparable<Object>[] fourheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, minsize - TWO_HEAP_MAX_SIZE);
+ this.twoheap = twoheap;
+ this.fourheap = fourheap;
+ }
+ this.size = 0;
+ this.modCount = 0;
+ }
+
+ @Override
+ public void clear() {
+ size = 0;
+ ++modCount;
+ fourheap = null;
+ Arrays.fill(twoheap, null);
+ }
+
+ @Override
+ public int size() {
+ return size;
+ }
+
+ @Override
+ public boolean isEmpty() {
+ return (size == 0);
+ }
+
+ @Override
+ @SuppressWarnings("unchecked")
+ public void add(K o) {
+ final Comparable<Object> co = (Comparable<Object>)o;
+ // System.err.println("Add: " + o);
+ if (size < TWO_HEAP_MAX_SIZE) {
+ if (size >= twoheap.length) {
+ // Grow by one layer.
+ twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1);
+ }
+ final int twopos = size;
+ twoheap[twopos] = co;
+ ++size;
+ heapifyUp2(twopos, co);
+ ++modCount;
+ } else {
+ final int fourpos = size - TWO_HEAP_MAX_SIZE;
+ if (fourheap == null) {
+ fourheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, FOUR_HEAP_INITIAL_SIZE);
+ } else if (fourpos >= fourheap.length) {
+ // Grow extension heap by half.
+ fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1));
+ }
+ fourheap[fourpos] = co;
+ ++size;
+ heapifyUp4(fourpos, co);
+ ++modCount;
+ }
+ }
+
+ @Override
+ public void add(K key, int max) {
+ if (size < max) {
+ add(key);
+ } else if (twoheap[0].compareTo(key) >= 0) {
+ replaceTopElement(key);
+ }
+ }
+
+ @Override
+ @SuppressWarnings("unchecked")
+ public K replaceTopElement(K reinsert) {
+ final Comparable<Object> ret = twoheap[0];
+ heapifyDown((Comparable<Object>) reinsert);
+ ++modCount;
+ return (K)ret;
}
/**
- * Compare two objects
+ * Heapify-Up method for 2-ary heap.
*
- * @param o1 First object
- * @param o2 Second object
+ * @param twopos Position in 2-ary heap.
+ * @param cur Current object
*/
+ private void heapifyUp2(int twopos, Comparable<Object> cur) {
+ while (twopos > 0) {
+ final int parent = (twopos - 1) >>> 1;
+ Comparable<Object> par = twoheap[parent];
+ if (cur.compareTo(par) <= 0) {
+ break;
+ }
+ twoheap[twopos] = par;
+ twopos = parent;
+ }
+ twoheap[twopos] = cur;
+ }
+
+ /**
+ * Heapify-Up method for 4-ary heap.
+ *
+ * @param fourpos Position in 4-ary heap.
+ * @param cur Current object
+ */
+ private void heapifyUp4(int fourpos, Comparable<Object> cur) {
+ while (fourpos > 0) {
+ final int parent = (fourpos - 1) >> 2;
+ Comparable<Object> par = fourheap[parent];
+ if (cur.compareTo(par) <= 0) {
+ break;
+ }
+ fourheap[fourpos] = par;
+ fourpos = parent;
+ }
+ if (fourpos == 0 && twoheap[0].compareTo(cur) < 0) {
+ fourheap[0] = twoheap[0];
+ twoheap[0] = cur;
+ } else {
+ fourheap[fourpos] = cur;
+ }
+ }
+
@Override
@SuppressWarnings("unchecked")
- protected boolean comp(Object o1, Object o2) {
- return ((K) o1).compareTo((K) o2) < 0;
+ public K poll() {
+ final Comparable<Object> ret = twoheap[0];
+ --size;
+ // Replacement object:
+ if (size >= TWO_HEAP_MAX_SIZE) {
+ final int last = size - TWO_HEAP_MAX_SIZE;
+ final Comparable<Object> reinsert = fourheap[last];
+ fourheap[last] = null;
+ heapifyDown(reinsert);
+ } else if (size > 0) {
+ final Comparable<Object> reinsert = twoheap[size];
+ twoheap[size] = null;
+ heapifyDown(reinsert);
+ } else {
+ twoheap[0] = null;
+ }
+ ++modCount;
+ return (K)ret;
+ }
+
+ /**
+ * Invoke heapify-down for the root object.
+ *
+ * @param reinsert Object to insert.
+ */
+ private void heapifyDown(Comparable<Object> reinsert) {
+ if (size > TWO_HEAP_MAX_SIZE) {
+ // Special case: 3-ary situation.
+ final int best = (twoheap[1].compareTo(twoheap[2]) >= 0) ? 1 : 2;
+ if (fourheap[0].compareTo(twoheap[best]) > 0) {
+ twoheap[0] = fourheap[0];
+ heapifyDown4(0, reinsert);
+ } else {
+ twoheap[0] = twoheap[best];
+ heapifyDown2(best, reinsert);
+ }
+ return;
+ }
+ heapifyDown2(0, reinsert);
+ }
+
+ /**
+ * Heapify-Down for 2-ary heap.
+ *
+ * @param twopos Position in 2-ary heap.
+ * @param cur Current object
+ */
+ private void heapifyDown2(int twopos, Comparable<Object> cur) {
+ final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1;
+ while (twopos < stop) {
+ int bestchild = (twopos << 1) + 1;
+ Comparable<Object> best = twoheap[bestchild];
+ final int right = bestchild + 1;
+ if (right < size && best.compareTo(twoheap[right]) < 0) {
+ bestchild = right;
+ best = twoheap[right];
+ }
+ if (cur.compareTo(best) >= 0) {
+ break;
+ }
+ twoheap[twopos] = best;
+ twopos = bestchild;
+ }
+ twoheap[twopos] = cur;
+ }
+
+ /**
+ * Heapify-Down for 4-ary heap.
+ *
+ * @param fourpos Position in 4-ary heap.
+ * @param cur Current object
+ */
+ private void heapifyDown4(int fourpos, Comparable<Object> cur) {
+ final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2;
+ while (fourpos < stop) {
+ final int child = (fourpos << 2) + 1;
+ Comparable<Object> best = fourheap[child];
+ int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE;
+ if (size > minsize) {
+ Comparable<Object> nextchild = fourheap[candidate];
+ if (best.compareTo(nextchild) < 0) {
+ bestchild = candidate;
+ best = nextchild;
+ }
+
+ minsize += 2;
+ if (size >= minsize) {
+ nextchild = fourheap[++candidate];
+ if (best.compareTo(nextchild) < 0) {
+ bestchild = candidate;
+ best = nextchild;
+ }
+
+ if (size > minsize) {
+ nextchild = fourheap[++candidate];
+ if (best.compareTo(nextchild) < 0) {
+ bestchild = candidate;
+ best = nextchild;
+ }
+ }
+ }
+ }
+ if (cur.compareTo(best) >= 0) {
+ break;
+ }
+ fourheap[fourpos] = best;
+ fourpos = bestchild;
+ }
+ fourheap[fourpos] = cur;
+ }
+
+ @Override
+ @SuppressWarnings("unchecked")
+ public K peek() {
+ return (K)twoheap[0];
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder buf = new StringBuilder();
+ buf.append(ComparableMaxHeap.class.getSimpleName()).append(" [");
+ for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) {
+ buf.append(iter.get()).append(',');
+ }
+ buf.append(']');
+ return buf.toString();
+ }
+
+ @Override
+ public UnsortedIter unsortedIter() {
+ return new UnsortedIter();
+ }
+
+ /**
+ * Unsorted iterator - in heap order. Does not poll the heap.
+ *
+ * Use this class as follows:
+ *
+ * <pre>
+ * {@code
+ * for (ObjectHeap.UnsortedIter<K> iter = heap.unsortedIter(); iter.valid(); iter.next()) {
+ * doSomething(iter.get());
+ * }
+ * }
+ * </pre>
+ *
+ * @author Erich Schubert
+ */
+ private class UnsortedIter implements ObjectHeap.UnsortedIter<K> {
+ /**
+ * Iterator position.
+ */
+ protected int pos = 0;
+
+ /**
+ * Modification counter we were initialized at.
+ */
+ protected final int myModCount = modCount;
+
+ @Override
+ public boolean valid() {
+ if (modCount != myModCount) {
+ throw new ConcurrentModificationException();
+ }
+ return pos < size;
+ }
+
+ @Override
+ public void advance() {
+ pos++;
+ }
+
+ @SuppressWarnings("unchecked")
+
+ @Override
+ public K get() {
+ return (K)((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]);
+ }
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMinHeap.java
index 06d2cb32..3cc5a02f 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMinHeap.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMinHeap.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -23,41 +23,409 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+import java.util.Arrays;
+import java.util.ConcurrentModificationException;
+
+import de.lmu.ifi.dbs.elki.math.MathUtil;
+
/**
- * Basic in-memory heap structure.
+ * Advanced priority queue class, based on a binary heap (for small sizes),
+ * which will for larger heaps be accompanied by a 4-ary heap (attached below
+ * the root of the two-ary heap, making the root actually 3-ary).
+ *
+ * This code was automatically instantiated for the type: Comparable
+ *
+ * This combination was found to work quite well in benchmarks, but YMMV.
*
- * This heap is built lazily: if you first add many elements, then poll the
- * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log
- * n). This is implemented via a simple validTo counter.
+ * Some other observations from benchmarking:
+ * <ul>
+ * <li>Bulk loading did not improve things</li>
+ * <li>Primitive heaps are substantially faster.</li>
+ * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and
+ * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li>
+ * <li>Workload makes a huge difference. A load-once, poll-until-empty priority
+ * queue is something different than e.g. a top-k heap, which will see a lot of
+ * top element replacements.</li>
+ * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for
+ * top-k make a difference.</li>
+ * <li>Different day, different benchmark results ...</li>
+ * </ul>
*
* @author Erich Schubert
+ *
+ * @apiviz.has UnsortedIter
+ * @param <K> Key type
*/
-public class ComparableMinHeap<K extends Comparable<K>> extends ObjectHeap<K> {
+public class ComparableMinHeap<K extends Comparable<? super K>> implements ObjectHeap<K> {
+ /**
+ * Base heap.
+ */
+ protected Comparable<Object>[] twoheap;
+
+ /**
+ * Extension heap.
+ */
+ protected Comparable<Object>[] fourheap;
+
+ /**
+ * Current size of heap.
+ */
+ protected int size;
+
+ /**
+ * (Structural) modification counter. Used to invalidate iterators.
+ */
+ protected int modCount = 0;
+
+ /**
+ * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements.
+ */
+ private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1;
+
+ /**
+ * Initial size of the 2-ary heap.
+ */
+ private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1;
+
+ /**
+ * Initial size of 4-ary heap when initialized.
+ *
+ * 21 = 4-ary heap of height 2: 1 + 4 + 4*4
+ *
+ * 85 = 4-ary heap of height 3: 21 + 4*4*4
+ *
+ * 341 = 4-ary heap of height 4: 85 + 4*4*4*4
+ *
+ * Since we last grew by 255 (to 511), let's use 341.
+ */
+ private final static int FOUR_HEAP_INITIAL_SIZE = 341;
+
/**
- * Constructor with default capacity.
+ * Constructor, with default size.
*/
+ @SuppressWarnings("unchecked")
public ComparableMinHeap() {
- super(DEFAULT_INITIAL_CAPACITY);
+ super();
+ Comparable<Object>[] twoheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, TWO_HEAP_INITIAL_SIZE);
+
+ this.twoheap = twoheap;
+ this.fourheap = null;
+ this.size = 0;
+ this.modCount = 0;
}
/**
- * Constructor with initial capacity.
+ * Constructor, with given minimum size.
*
- * @param size initial capacity
+ * @param minsize Minimum size
*/
- public ComparableMinHeap(int size) {
- super(size);
+ @SuppressWarnings("unchecked")
+ public ComparableMinHeap(int minsize) {
+ super();
+ if (minsize < TWO_HEAP_MAX_SIZE) {
+ final int size = MathUtil.nextPow2Int(minsize + 1) - 1;
+ Comparable<Object>[] twoheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, size);
+
+ this.twoheap = twoheap;
+ this.fourheap = null;
+ } else {
+ Comparable<Object>[] twoheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, TWO_HEAP_INITIAL_SIZE);
+ Comparable<Object>[] fourheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, minsize - TWO_HEAP_MAX_SIZE);
+ this.twoheap = twoheap;
+ this.fourheap = fourheap;
+ }
+ this.size = 0;
+ this.modCount = 0;
+ }
+
+ @Override
+ public void clear() {
+ size = 0;
+ ++modCount;
+ fourheap = null;
+ Arrays.fill(twoheap, null);
+ }
+
+ @Override
+ public int size() {
+ return size;
+ }
+
+ @Override
+ public boolean isEmpty() {
+ return (size == 0);
+ }
+
+ @Override
+ @SuppressWarnings("unchecked")
+ public void add(K o) {
+ final Comparable<Object> co = (Comparable<Object>)o;
+ // System.err.println("Add: " + o);
+ if (size < TWO_HEAP_MAX_SIZE) {
+ if (size >= twoheap.length) {
+ // Grow by one layer.
+ twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1);
+ }
+ final int twopos = size;
+ twoheap[twopos] = co;
+ ++size;
+ heapifyUp2(twopos, co);
+ ++modCount;
+ } else {
+ final int fourpos = size - TWO_HEAP_MAX_SIZE;
+ if (fourheap == null) {
+ fourheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, FOUR_HEAP_INITIAL_SIZE);
+ } else if (fourpos >= fourheap.length) {
+ // Grow extension heap by half.
+ fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1));
+ }
+ fourheap[fourpos] = co;
+ ++size;
+ heapifyUp4(fourpos, co);
+ ++modCount;
+ }
+ }
+
+ @Override
+ public void add(K key, int max) {
+ if (size < max) {
+ add(key);
+ } else if (twoheap[0].compareTo(key) <= 0) {
+ replaceTopElement(key);
+ }
+ }
+
+ @Override
+ @SuppressWarnings("unchecked")
+ public K replaceTopElement(K reinsert) {
+ final Comparable<Object> ret = twoheap[0];
+ heapifyDown((Comparable<Object>) reinsert);
+ ++modCount;
+ return (K)ret;
}
/**
- * Compare two objects
+ * Heapify-Up method for 2-ary heap.
*
- * @param o1 First object
- * @param o2 Second object
+ * @param twopos Position in 2-ary heap.
+ * @param cur Current object
*/
+ private void heapifyUp2(int twopos, Comparable<Object> cur) {
+ while (twopos > 0) {
+ final int parent = (twopos - 1) >>> 1;
+ Comparable<Object> par = twoheap[parent];
+ if (cur.compareTo(par) >= 0) {
+ break;
+ }
+ twoheap[twopos] = par;
+ twopos = parent;
+ }
+ twoheap[twopos] = cur;
+ }
+
+ /**
+ * Heapify-Up method for 4-ary heap.
+ *
+ * @param fourpos Position in 4-ary heap.
+ * @param cur Current object
+ */
+ private void heapifyUp4(int fourpos, Comparable<Object> cur) {
+ while (fourpos > 0) {
+ final int parent = (fourpos - 1) >> 2;
+ Comparable<Object> par = fourheap[parent];
+ if (cur.compareTo(par) >= 0) {
+ break;
+ }
+ fourheap[fourpos] = par;
+ fourpos = parent;
+ }
+ if (fourpos == 0 && twoheap[0].compareTo(cur) > 0) {
+ fourheap[0] = twoheap[0];
+ twoheap[0] = cur;
+ } else {
+ fourheap[fourpos] = cur;
+ }
+ }
+
@Override
@SuppressWarnings("unchecked")
- protected boolean comp(Object o1, Object o2) {
- return ((K) o1).compareTo((K) o2) > 0;
+ public K poll() {
+ final Comparable<Object> ret = twoheap[0];
+ --size;
+ // Replacement object:
+ if (size >= TWO_HEAP_MAX_SIZE) {
+ final int last = size - TWO_HEAP_MAX_SIZE;
+ final Comparable<Object> reinsert = fourheap[last];
+ fourheap[last] = null;
+ heapifyDown(reinsert);
+ } else if (size > 0) {
+ final Comparable<Object> reinsert = twoheap[size];
+ twoheap[size] = null;
+ heapifyDown(reinsert);
+ } else {
+ twoheap[0] = null;
+ }
+ ++modCount;
+ return (K)ret;
+ }
+
+ /**
+ * Invoke heapify-down for the root object.
+ *
+ * @param reinsert Object to insert.
+ */
+ private void heapifyDown(Comparable<Object> reinsert) {
+ if (size > TWO_HEAP_MAX_SIZE) {
+ // Special case: 3-ary situation.
+ final int best = (twoheap[1].compareTo(twoheap[2]) <= 0) ? 1 : 2;
+ if (fourheap[0].compareTo(twoheap[best]) < 0) {
+ twoheap[0] = fourheap[0];
+ heapifyDown4(0, reinsert);
+ } else {
+ twoheap[0] = twoheap[best];
+ heapifyDown2(best, reinsert);
+ }
+ return;
+ }
+ heapifyDown2(0, reinsert);
+ }
+
+ /**
+ * Heapify-Down for 2-ary heap.
+ *
+ * @param twopos Position in 2-ary heap.
+ * @param cur Current object
+ */
+ private void heapifyDown2(int twopos, Comparable<Object> cur) {
+ final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1;
+ while (twopos < stop) {
+ int bestchild = (twopos << 1) + 1;
+ Comparable<Object> best = twoheap[bestchild];
+ final int right = bestchild + 1;
+ if (right < size && best.compareTo(twoheap[right]) > 0) {
+ bestchild = right;
+ best = twoheap[right];
+ }
+ if (cur.compareTo(best) <= 0) {
+ break;
+ }
+ twoheap[twopos] = best;
+ twopos = bestchild;
+ }
+ twoheap[twopos] = cur;
+ }
+
+ /**
+ * Heapify-Down for 4-ary heap.
+ *
+ * @param fourpos Position in 4-ary heap.
+ * @param cur Current object
+ */
+ private void heapifyDown4(int fourpos, Comparable<Object> cur) {
+ final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2;
+ while (fourpos < stop) {
+ final int child = (fourpos << 2) + 1;
+ Comparable<Object> best = fourheap[child];
+ int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE;
+ if (size > minsize) {
+ Comparable<Object> nextchild = fourheap[candidate];
+ if (best.compareTo(nextchild) > 0) {
+ bestchild = candidate;
+ best = nextchild;
+ }
+
+ minsize += 2;
+ if (size >= minsize) {
+ nextchild = fourheap[++candidate];
+ if (best.compareTo(nextchild) > 0) {
+ bestchild = candidate;
+ best = nextchild;
+ }
+
+ if (size > minsize) {
+ nextchild = fourheap[++candidate];
+ if (best.compareTo(nextchild) > 0) {
+ bestchild = candidate;
+ best = nextchild;
+ }
+ }
+ }
+ }
+ if (cur.compareTo(best) <= 0) {
+ break;
+ }
+ fourheap[fourpos] = best;
+ fourpos = bestchild;
+ }
+ fourheap[fourpos] = cur;
+ }
+
+ @Override
+ @SuppressWarnings("unchecked")
+ public K peek() {
+ return (K)twoheap[0];
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder buf = new StringBuilder();
+ buf.append(ComparableMinHeap.class.getSimpleName()).append(" [");
+ for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) {
+ buf.append(iter.get()).append(',');
+ }
+ buf.append(']');
+ return buf.toString();
+ }
+
+ @Override
+ public UnsortedIter unsortedIter() {
+ return new UnsortedIter();
+ }
+
+ /**
+ * Unsorted iterator - in heap order. Does not poll the heap.
+ *
+ * Use this class as follows:
+ *
+ * <pre>
+ * {@code
+ * for (ObjectHeap.UnsortedIter<K> iter = heap.unsortedIter(); iter.valid(); iter.next()) {
+ * doSomething(iter.get());
+ * }
+ * }
+ * </pre>
+ *
+ * @author Erich Schubert
+ */
+ private class UnsortedIter implements ObjectHeap.UnsortedIter<K> {
+ /**
+ * Iterator position.
+ */
+ protected int pos = 0;
+
+ /**
+ * Modification counter we were initialized at.
+ */
+ protected final int myModCount = modCount;
+
+ @Override
+ public boolean valid() {
+ if (modCount != myModCount) {
+ throw new ConcurrentModificationException();
+ }
+ return pos < size;
+ }
+
+ @Override
+ public void advance() {
+ pos++;
+ }
+
+ @SuppressWarnings("unchecked")
+
+ @Override
+ public K get() {
+ return (K)((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]);
+ }
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparatorMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparatorMaxHeap.java
new file mode 100644
index 00000000..7b660d31
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparatorMaxHeap.java
@@ -0,0 +1,440 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.Arrays;
+import java.util.ConcurrentModificationException;
+
+import de.lmu.ifi.dbs.elki.math.MathUtil;
+
+/**
+ * Advanced priority queue class, based on a binary heap (for small sizes),
+ * which will for larger heaps be accompanied by a 4-ary heap (attached below
+ * the root of the two-ary heap, making the root actually 3-ary).
+ *
+ * This code was automatically instantiated for the type: Comparator
+ *
+ * This combination was found to work quite well in benchmarks, but YMMV.
+ *
+ * Some other observations from benchmarking:
+ * <ul>
+ * <li>Bulk loading did not improve things</li>
+ * <li>Primitive heaps are substantially faster.</li>
+ * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and
+ * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li>
+ * <li>Workload makes a huge difference. A load-once, poll-until-empty priority
+ * queue is something different than e.g. a top-k heap, which will see a lot of
+ * top element replacements.</li>
+ * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for
+ * top-k make a difference.</li>
+ * <li>Different day, different benchmark results ...</li>
+ * </ul>
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.has UnsortedIter
+ * @param <K> Key type
+ */
+public class ComparatorMaxHeap<K> implements ObjectHeap<K> {
+ /**
+ * Base heap.
+ */
+ protected Object[] twoheap;
+
+ /**
+ * Extension heap.
+ */
+ protected Object[] fourheap;
+
+ /**
+ * Current size of heap.
+ */
+ protected int size;
+
+ /**
+ * (Structural) modification counter. Used to invalidate iterators.
+ */
+ protected int modCount = 0;
+
+ /**
+ * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements.
+ */
+ private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1;
+
+ /**
+ * Initial size of the 2-ary heap.
+ */
+ private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1;
+
+ /**
+ * Initial size of 4-ary heap when initialized.
+ *
+ * 21 = 4-ary heap of height 2: 1 + 4 + 4*4
+ *
+ * 85 = 4-ary heap of height 3: 21 + 4*4*4
+ *
+ * 341 = 4-ary heap of height 4: 85 + 4*4*4*4
+ *
+ * Since we last grew by 255 (to 511), let's use 341.
+ */
+ private final static int FOUR_HEAP_INITIAL_SIZE = 341;
+
+
+ /**
+ * Comparator
+ */
+ protected java.util.Comparator<Object> comparator;
+
+ /**
+ * Constructor, with default size.
+ * @param comparator Comparator
+ */
+ @SuppressWarnings("unchecked")
+ public ComparatorMaxHeap(java.util.Comparator<? super K> comparator) {
+ super();
+ this.comparator = (java.util.Comparator<Object>) java.util.Comparator.class.cast(comparator);
+ Object[] twoheap = new Object[TWO_HEAP_INITIAL_SIZE];
+
+ this.twoheap = twoheap;
+ this.fourheap = null;
+ this.size = 0;
+ this.modCount = 0;
+ }
+
+ /**
+ * Constructor, with given minimum size.
+ *
+ * @param minsize Minimum size
+ * @param comparator Comparator
+ */
+ @SuppressWarnings("unchecked")
+ public ComparatorMaxHeap(int minsize, java.util.Comparator<? super K> comparator) {
+ super();
+ this.comparator = (java.util.Comparator<Object>) java.util.Comparator.class.cast(comparator);
+ if (minsize < TWO_HEAP_MAX_SIZE) {
+ final int size = MathUtil.nextPow2Int(minsize + 1) - 1;
+ Object[] twoheap = new Object[size];
+
+ this.twoheap = twoheap;
+ this.fourheap = null;
+ } else {
+ Object[] twoheap = new Object[TWO_HEAP_INITIAL_SIZE];
+ Object[] fourheap = new Object[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)];
+ this.twoheap = twoheap;
+ this.fourheap = fourheap;
+ }
+ this.size = 0;
+ this.modCount = 0;
+ }
+
+ @Override
+ public void clear() {
+ size = 0;
+ ++modCount;
+ fourheap = null;
+ Arrays.fill(twoheap, null);
+ }
+
+ @Override
+ public int size() {
+ return size;
+ }
+
+ @Override
+ public boolean isEmpty() {
+ return (size == 0);
+ }
+
+ @Override
+ public void add(K o) {
+ final Object co = o;
+ // System.err.println("Add: " + o);
+ if (size < TWO_HEAP_MAX_SIZE) {
+ if (size >= twoheap.length) {
+ // Grow by one layer.
+ twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1);
+ }
+ final int twopos = size;
+ twoheap[twopos] = co;
+ ++size;
+ heapifyUp2(twopos, co);
+ ++modCount;
+ } else {
+ final int fourpos = size - TWO_HEAP_MAX_SIZE;
+ if (fourheap == null) {
+ fourheap = new Object[FOUR_HEAP_INITIAL_SIZE];
+ } else if (fourpos >= fourheap.length) {
+ // Grow extension heap by half.
+ fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1));
+ }
+ fourheap[fourpos] = co;
+ ++size;
+ heapifyUp4(fourpos, co);
+ ++modCount;
+ }
+ }
+
+ @Override
+ public void add(K key, int max) {
+ if (size < max) {
+ add(key);
+ } else if (comparator.compare(twoheap[0], key) >= 0) {
+ replaceTopElement(key);
+ }
+ }
+
+ @Override
+ @SuppressWarnings("unchecked")
+ public K replaceTopElement(K reinsert) {
+ final Object ret = twoheap[0];
+ heapifyDown( reinsert);
+ ++modCount;
+ return (K)ret;
+ }
+
+ /**
+ * Heapify-Up method for 2-ary heap.
+ *
+ * @param twopos Position in 2-ary heap.
+ * @param cur Current object
+ */
+ private void heapifyUp2(int twopos, Object cur) {
+ while (twopos > 0) {
+ final int parent = (twopos - 1) >>> 1;
+ Object par = twoheap[parent];
+ if (comparator.compare(cur, par) <= 0) {
+ break;
+ }
+ twoheap[twopos] = par;
+ twopos = parent;
+ }
+ twoheap[twopos] = cur;
+ }
+
+ /**
+ * Heapify-Up method for 4-ary heap.
+ *
+ * @param fourpos Position in 4-ary heap.
+ * @param cur Current object
+ */
+ private void heapifyUp4(int fourpos, Object cur) {
+ while (fourpos > 0) {
+ final int parent = (fourpos - 1) >> 2;
+ Object par = fourheap[parent];
+ if (comparator.compare(cur, par) <= 0) {
+ break;
+ }
+ fourheap[fourpos] = par;
+ fourpos = parent;
+ }
+ if (fourpos == 0 && comparator.compare(twoheap[0], cur) < 0) {
+ fourheap[0] = twoheap[0];
+ twoheap[0] = cur;
+ } else {
+ fourheap[fourpos] = cur;
+ }
+ }
+
+ @Override
+ @SuppressWarnings("unchecked")
+ public K poll() {
+ final Object ret = twoheap[0];
+ --size;
+ // Replacement object:
+ if (size >= TWO_HEAP_MAX_SIZE) {
+ final int last = size - TWO_HEAP_MAX_SIZE;
+ final Object reinsert = fourheap[last];
+ fourheap[last] = null;
+ heapifyDown(reinsert);
+ } else if (size > 0) {
+ final Object reinsert = twoheap[size];
+ twoheap[size] = null;
+ heapifyDown(reinsert);
+ } else {
+ twoheap[0] = null;
+ }
+ ++modCount;
+ return (K)ret;
+ }
+
+ /**
+ * Invoke heapify-down for the root object.
+ *
+ * @param reinsert Object to insert.
+ */
+ private void heapifyDown(Object reinsert) {
+ if (size > TWO_HEAP_MAX_SIZE) {
+ // Special case: 3-ary situation.
+ final int best = (comparator.compare(twoheap[1], twoheap[2]) >= 0) ? 1 : 2;
+ if (comparator.compare(fourheap[0], twoheap[best]) > 0) {
+ twoheap[0] = fourheap[0];
+ heapifyDown4(0, reinsert);
+ } else {
+ twoheap[0] = twoheap[best];
+ heapifyDown2(best, reinsert);
+ }
+ return;
+ }
+ heapifyDown2(0, reinsert);
+ }
+
+ /**
+ * Heapify-Down for 2-ary heap.
+ *
+ * @param twopos Position in 2-ary heap.
+ * @param cur Current object
+ */
+ private void heapifyDown2(int twopos, Object cur) {
+ final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1;
+ while (twopos < stop) {
+ int bestchild = (twopos << 1) + 1;
+ Object best = twoheap[bestchild];
+ final int right = bestchild + 1;
+ if (right < size && comparator.compare(best, twoheap[right]) < 0) {
+ bestchild = right;
+ best = twoheap[right];
+ }
+ if (comparator.compare(cur, best) >= 0) {
+ break;
+ }
+ twoheap[twopos] = best;
+ twopos = bestchild;
+ }
+ twoheap[twopos] = cur;
+ }
+
+ /**
+ * Heapify-Down for 4-ary heap.
+ *
+ * @param fourpos Position in 4-ary heap.
+ * @param cur Current object
+ */
+ private void heapifyDown4(int fourpos, Object cur) {
+ final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2;
+ while (fourpos < stop) {
+ final int child = (fourpos << 2) + 1;
+ Object best = fourheap[child];
+ int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE;
+ if (size > minsize) {
+ Object nextchild = fourheap[candidate];
+ if (comparator.compare(best, nextchild) < 0) {
+ bestchild = candidate;
+ best = nextchild;
+ }
+
+ minsize += 2;
+ if (size >= minsize) {
+ nextchild = fourheap[++candidate];
+ if (comparator.compare(best, nextchild) < 0) {
+ bestchild = candidate;
+ best = nextchild;
+ }
+
+ if (size > minsize) {
+ nextchild = fourheap[++candidate];
+ if (comparator.compare(best, nextchild) < 0) {
+ bestchild = candidate;
+ best = nextchild;
+ }
+ }
+ }
+ }
+ if (comparator.compare(cur, best) >= 0) {
+ break;
+ }
+ fourheap[fourpos] = best;
+ fourpos = bestchild;
+ }
+ fourheap[fourpos] = cur;
+ }
+
+ @Override
+ @SuppressWarnings("unchecked")
+ public K peek() {
+ return (K)twoheap[0];
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder buf = new StringBuilder();
+ buf.append(ComparatorMaxHeap.class.getSimpleName()).append(" [");
+ for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) {
+ buf.append(iter.get()).append(',');
+ }
+ buf.append(']');
+ return buf.toString();
+ }
+
+ @Override
+ public UnsortedIter unsortedIter() {
+ return new UnsortedIter();
+ }
+
+ /**
+ * Unsorted iterator - in heap order. Does not poll the heap.
+ *
+ * Use this class as follows:
+ *
+ * <pre>
+ * {@code
+ * for (ObjectHeap.UnsortedIter<K> iter = heap.unsortedIter(); iter.valid(); iter.next()) {
+ * doSomething(iter.get());
+ * }
+ * }
+ * </pre>
+ *
+ * @author Erich Schubert
+ */
+ private class UnsortedIter implements ObjectHeap.UnsortedIter<K> {
+ /**
+ * Iterator position.
+ */
+ protected int pos = 0;
+
+ /**
+ * Modification counter we were initialized at.
+ */
+ protected final int myModCount = modCount;
+
+ @Override
+ public boolean valid() {
+ if (modCount != myModCount) {
+ throw new ConcurrentModificationException();
+ }
+ return pos < size;
+ }
+
+ @Override
+ public void advance() {
+ pos++;
+ }
+
+ @SuppressWarnings("unchecked")
+
+ @Override
+ public K get() {
+ return (K)((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparatorMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparatorMinHeap.java
new file mode 100644
index 00000000..e12c5f64
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparatorMinHeap.java
@@ -0,0 +1,440 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.Arrays;
+import java.util.ConcurrentModificationException;
+
+import de.lmu.ifi.dbs.elki.math.MathUtil;
+
+/**
+ * Advanced priority queue class, based on a binary heap (for small sizes),
+ * which will for larger heaps be accompanied by a 4-ary heap (attached below
+ * the root of the two-ary heap, making the root actually 3-ary).
+ *
+ * This code was automatically instantiated for the type: Comparator
+ *
+ * This combination was found to work quite well in benchmarks, but YMMV.
+ *
+ * Some other observations from benchmarking:
+ * <ul>
+ * <li>Bulk loading did not improve things</li>
+ * <li>Primitive heaps are substantially faster.</li>
+ * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and
+ * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li>
+ * <li>Workload makes a huge difference. A load-once, poll-until-empty priority
+ * queue is something different than e.g. a top-k heap, which will see a lot of
+ * top element replacements.</li>
+ * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for
+ * top-k make a difference.</li>
+ * <li>Different day, different benchmark results ...</li>
+ * </ul>
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.has UnsortedIter
+ * @param <K> Key type
+ */
+public class ComparatorMinHeap<K> implements ObjectHeap<K> {
+ /**
+ * Base heap.
+ */
+ protected Object[] twoheap;
+
+ /**
+ * Extension heap.
+ */
+ protected Object[] fourheap;
+
+ /**
+ * Current size of heap.
+ */
+ protected int size;
+
+ /**
+ * (Structural) modification counter. Used to invalidate iterators.
+ */
+ protected int modCount = 0;
+
+ /**
+ * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements.
+ */
+ private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1;
+
+ /**
+ * Initial size of the 2-ary heap.
+ */
+ private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1;
+
+ /**
+ * Initial size of 4-ary heap when initialized.
+ *
+ * 21 = 4-ary heap of height 2: 1 + 4 + 4*4
+ *
+ * 85 = 4-ary heap of height 3: 21 + 4*4*4
+ *
+ * 341 = 4-ary heap of height 4: 85 + 4*4*4*4
+ *
+ * Since we last grew by 255 (to 511), let's use 341.
+ */
+ private final static int FOUR_HEAP_INITIAL_SIZE = 341;
+
+
+ /**
+ * Comparator
+ */
+ protected java.util.Comparator<Object> comparator;
+
+ /**
+ * Constructor, with default size.
+ * @param comparator Comparator
+ */
+ @SuppressWarnings("unchecked")
+ public ComparatorMinHeap(java.util.Comparator<? super K> comparator) {
+ super();
+ this.comparator = (java.util.Comparator<Object>) java.util.Comparator.class.cast(comparator);
+ Object[] twoheap = new Object[TWO_HEAP_INITIAL_SIZE];
+
+ this.twoheap = twoheap;
+ this.fourheap = null;
+ this.size = 0;
+ this.modCount = 0;
+ }
+
+ /**
+ * Constructor, with given minimum size.
+ *
+ * @param minsize Minimum size
+ * @param comparator Comparator
+ */
+ @SuppressWarnings("unchecked")
+ public ComparatorMinHeap(int minsize, java.util.Comparator<? super K> comparator) {
+ super();
+ this.comparator = (java.util.Comparator<Object>) java.util.Comparator.class.cast(comparator);
+ if (minsize < TWO_HEAP_MAX_SIZE) {
+ final int size = MathUtil.nextPow2Int(minsize + 1) - 1;
+ Object[] twoheap = new Object[size];
+
+ this.twoheap = twoheap;
+ this.fourheap = null;
+ } else {
+ Object[] twoheap = new Object[TWO_HEAP_INITIAL_SIZE];
+ Object[] fourheap = new Object[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)];
+ this.twoheap = twoheap;
+ this.fourheap = fourheap;
+ }
+ this.size = 0;
+ this.modCount = 0;
+ }
+
+ @Override
+ public void clear() {
+ size = 0;
+ ++modCount;
+ fourheap = null;
+ Arrays.fill(twoheap, null);
+ }
+
+ @Override
+ public int size() {
+ return size;
+ }
+
+ @Override
+ public boolean isEmpty() {
+ return (size == 0);
+ }
+
+ @Override
+ public void add(K o) {
+ final Object co = o;
+ // System.err.println("Add: " + o);
+ if (size < TWO_HEAP_MAX_SIZE) {
+ if (size >= twoheap.length) {
+ // Grow by one layer.
+ twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1);
+ }
+ final int twopos = size;
+ twoheap[twopos] = co;
+ ++size;
+ heapifyUp2(twopos, co);
+ ++modCount;
+ } else {
+ final int fourpos = size - TWO_HEAP_MAX_SIZE;
+ if (fourheap == null) {
+ fourheap = new Object[FOUR_HEAP_INITIAL_SIZE];
+ } else if (fourpos >= fourheap.length) {
+ // Grow extension heap by half.
+ fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1));
+ }
+ fourheap[fourpos] = co;
+ ++size;
+ heapifyUp4(fourpos, co);
+ ++modCount;
+ }
+ }
+
+ @Override
+ public void add(K key, int max) {
+ if (size < max) {
+ add(key);
+ } else if (comparator.compare(twoheap[0], key) <= 0) {
+ replaceTopElement(key);
+ }
+ }
+
+ @Override
+ @SuppressWarnings("unchecked")
+ public K replaceTopElement(K reinsert) {
+ final Object ret = twoheap[0];
+ heapifyDown( reinsert);
+ ++modCount;
+ return (K)ret;
+ }
+
+ /**
+ * Heapify-Up method for 2-ary heap.
+ *
+ * @param twopos Position in 2-ary heap.
+ * @param cur Current object
+ */
+ private void heapifyUp2(int twopos, Object cur) {
+ while (twopos > 0) {
+ final int parent = (twopos - 1) >>> 1;
+ Object par = twoheap[parent];
+ if (comparator.compare(cur, par) >= 0) {
+ break;
+ }
+ twoheap[twopos] = par;
+ twopos = parent;
+ }
+ twoheap[twopos] = cur;
+ }
+
+ /**
+ * Heapify-Up method for 4-ary heap.
+ *
+ * @param fourpos Position in 4-ary heap.
+ * @param cur Current object
+ */
+ private void heapifyUp4(int fourpos, Object cur) {
+ while (fourpos > 0) {
+ final int parent = (fourpos - 1) >> 2;
+ Object par = fourheap[parent];
+ if (comparator.compare(cur, par) >= 0) {
+ break;
+ }
+ fourheap[fourpos] = par;
+ fourpos = parent;
+ }
+ if (fourpos == 0 && comparator.compare(twoheap[0], cur) > 0) {
+ fourheap[0] = twoheap[0];
+ twoheap[0] = cur;
+ } else {
+ fourheap[fourpos] = cur;
+ }
+ }
+
+ @Override
+ @SuppressWarnings("unchecked")
+ public K poll() {
+ final Object ret = twoheap[0];
+ --size;
+ // Replacement object:
+ if (size >= TWO_HEAP_MAX_SIZE) {
+ final int last = size - TWO_HEAP_MAX_SIZE;
+ final Object reinsert = fourheap[last];
+ fourheap[last] = null;
+ heapifyDown(reinsert);
+ } else if (size > 0) {
+ final Object reinsert = twoheap[size];
+ twoheap[size] = null;
+ heapifyDown(reinsert);
+ } else {
+ twoheap[0] = null;
+ }
+ ++modCount;
+ return (K)ret;
+ }
+
+ /**
+ * Invoke heapify-down for the root object.
+ *
+ * @param reinsert Object to insert.
+ */
+ private void heapifyDown(Object reinsert) {
+ if (size > TWO_HEAP_MAX_SIZE) {
+ // Special case: 3-ary situation.
+ final int best = (comparator.compare(twoheap[1], twoheap[2]) <= 0) ? 1 : 2;
+ if (comparator.compare(fourheap[0], twoheap[best]) < 0) {
+ twoheap[0] = fourheap[0];
+ heapifyDown4(0, reinsert);
+ } else {
+ twoheap[0] = twoheap[best];
+ heapifyDown2(best, reinsert);
+ }
+ return;
+ }
+ heapifyDown2(0, reinsert);
+ }
+
+ /**
+ * Heapify-Down for 2-ary heap.
+ *
+ * @param twopos Position in 2-ary heap.
+ * @param cur Current object
+ */
+ private void heapifyDown2(int twopos, Object cur) {
+ final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1;
+ while (twopos < stop) {
+ int bestchild = (twopos << 1) + 1;
+ Object best = twoheap[bestchild];
+ final int right = bestchild + 1;
+ if (right < size && comparator.compare(best, twoheap[right]) > 0) {
+ bestchild = right;
+ best = twoheap[right];
+ }
+ if (comparator.compare(cur, best) <= 0) {
+ break;
+ }
+ twoheap[twopos] = best;
+ twopos = bestchild;
+ }
+ twoheap[twopos] = cur;
+ }
+
+ /**
+ * Heapify-Down for 4-ary heap.
+ *
+ * @param fourpos Position in 4-ary heap.
+ * @param cur Current object
+ */
+ private void heapifyDown4(int fourpos, Object cur) {
+ final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2;
+ while (fourpos < stop) {
+ final int child = (fourpos << 2) + 1;
+ Object best = fourheap[child];
+ int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE;
+ if (size > minsize) {
+ Object nextchild = fourheap[candidate];
+ if (comparator.compare(best, nextchild) > 0) {
+ bestchild = candidate;
+ best = nextchild;
+ }
+
+ minsize += 2;
+ if (size >= minsize) {
+ nextchild = fourheap[++candidate];
+ if (comparator.compare(best, nextchild) > 0) {
+ bestchild = candidate;
+ best = nextchild;
+ }
+
+ if (size > minsize) {
+ nextchild = fourheap[++candidate];
+ if (comparator.compare(best, nextchild) > 0) {
+ bestchild = candidate;
+ best = nextchild;
+ }
+ }
+ }
+ }
+ if (comparator.compare(cur, best) <= 0) {
+ break;
+ }
+ fourheap[fourpos] = best;
+ fourpos = bestchild;
+ }
+ fourheap[fourpos] = cur;
+ }
+
+ @Override
+ @SuppressWarnings("unchecked")
+ public K peek() {
+ return (K)twoheap[0];
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder buf = new StringBuilder();
+ buf.append(ComparatorMinHeap.class.getSimpleName()).append(" [");
+ for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) {
+ buf.append(iter.get()).append(',');
+ }
+ buf.append(']');
+ return buf.toString();
+ }
+
+ @Override
+ public UnsortedIter unsortedIter() {
+ return new UnsortedIter();
+ }
+
+ /**
+ * Unsorted iterator - in heap order. Does not poll the heap.
+ *
+ * Use this class as follows:
+ *
+ * <pre>
+ * {@code
+ * for (ObjectHeap.UnsortedIter<K> iter = heap.unsortedIter(); iter.valid(); iter.next()) {
+ * doSomething(iter.get());
+ * }
+ * }
+ * </pre>
+ *
+ * @author Erich Schubert
+ */
+ private class UnsortedIter implements ObjectHeap.UnsortedIter<K> {
+ /**
+ * Iterator position.
+ */
+ protected int pos = 0;
+
+ /**
+ * Modification counter we were initialized at.
+ */
+ protected final int myModCount = modCount;
+
+ @Override
+ public boolean valid() {
+ if (modCount != myModCount) {
+ throw new ConcurrentModificationException();
+ }
+ return pos < size;
+ }
+
+ @Override
+ public void advance() {
+ pos++;
+ }
+
+ @SuppressWarnings("unchecked")
+
+ @Override
+ public K get() {
+ return (K)((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleHeap.java
index f9f928bd..acf77d86 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleHeap.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleHeap.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -23,53 +23,22 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import java.util.Arrays;
-
-import de.lmu.ifi.dbs.elki.math.MathUtil;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.iterator.Iter;
/**
- * Basic in-memory heap structure.
- *
- * This heap is built lazily: if you first add many elements, then poll the
- * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log
- * n). This is implemented via a simple validTo counter.
+ * Basic in-memory heap for double values.
*
* @author Erich Schubert
+ *
+ * @apiviz.has UnsortedIter
*/
-public abstract class DoubleHeap extends AbstractHeap {
- /**
- * Heap storage: queue
- */
- protected transient double[] queue;
-
- /**
- * Constructor with initial capacity.
- *
- * @param size initial capacity
- */
- public DoubleHeap(int size) {
- super();
- this.size = 0;
- this.queue = new double[size];
- }
-
+public interface DoubleHeap {
/**
* Add a key-value pair to the heap
*
* @param key Key
*/
- public void add(double key) {
- // resize when needed
- if (size + 1 > queue.length) {
- resize(size + 1);
- }
- // final int pos = size;
- this.queue[size] = key;
- this.size += 1;
- heapifyUp(size - 1, key);
- validSize += 1;
- heapModified();
- }
+ void add(double key);
/**
* Add a key-value pair to the heap, except if the new element is larger than
@@ -78,13 +47,7 @@ public abstract class DoubleHeap extends AbstractHeap {
* @param key Key
* @param max Maximum size of heap
*/
- public void add(double key, int max) {
- if (size < max) {
- add(key);
- } else if (comp(key, peek())) {
- replaceTopElement(key);
- }
- }
+ void add(double key, int max);
/**
* Combined operation that removes the top element, and inserts a new element
@@ -93,172 +56,67 @@ public abstract class DoubleHeap extends AbstractHeap {
* @param e New element to insert
* @return Previous top element of the heap
*/
- public double replaceTopElement(double e) {
- ensureValid();
- double oldroot = queue[0];
- heapifyDown(0, e);
- heapModified();
- return oldroot;
- }
+ double replaceTopElement(double e);
/**
* Get the current top key
*
* @return Top key
*/
- public double peek() {
- if (size == 0) {
- throw new ArrayIndexOutOfBoundsException("Peek() on an empty heap!");
- }
- ensureValid();
- return queue[0];
- }
+ double peek();
/**
* Remove the first element
*
* @return Top element
*/
- public double poll() {
- return removeAt(0);
- }
+ double poll();
/**
- * Repair the heap
+ * Delete all elements from the heap.
*/
- protected void ensureValid() {
- if (validSize != size) {
- if (size > 1) {
- // Parent of first invalid
- int nextmin = validSize > 0 ? ((validSize - 1) >>> 1) : 0;
- int curmin = MathUtil.nextAllOnesInt(nextmin); // Next line
- int nextmax = curmin - 1; // End of valid line
- int pos = (size - 2) >>> 1; // Parent of last element
- // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin+", "+nextmin);
- while (pos >= nextmin) {
- // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin);
- while (pos >= curmin) {
- if (!heapifyDown(pos, queue[pos])) {
- final int parent = (pos - 1) >>> 1;
- if (parent < curmin) {
- nextmin = Math.min(nextmin, parent);
- nextmax = Math.max(nextmax, parent);
- }
- }
- pos--;
- }
- curmin = nextmin;
- pos = Math.min(pos, nextmax);
- nextmax = -1;
- }
- }
- validSize = size;
- }
- }
+ void clear();
/**
- * Remove the element at the given position.
+ * Query the size
*
- * @param pos Element position.
- * @return Removed element
+ * @return Size
*/
- protected double removeAt(int pos) {
- if (pos < 0 || pos >= size) {
- return 0.0;
- }
- final double top = queue[0];
- // Replacement object:
- final double reinkey = queue[size - 1];
- // Keep heap in sync
- if (validSize == size) {
- size -= 1;
- validSize -= 1;
- heapifyDown(pos, reinkey);
- } else {
- size -= 1;
- validSize = Math.min(pos >>> 1, validSize);
- queue[pos] = reinkey;
- }
- heapModified();
- return top;
- }
-
+ public int size();
+
/**
- * Execute a "Heapify Upwards" aka "SiftUp". Used in insertions.
+ * Is the heap empty?
*
- * @param pos insertion position
- * @param curkey Current key
+ * @return {@code true} when the size is 0.
*/
- protected void heapifyUp(int pos, double curkey) {
- while (pos > 0) {
- final int parent = (pos - 1) >>> 1;
- double parkey = queue[parent];
-
- if (comp(curkey, parkey)) { // Compare
- break;
- }
- queue[pos] = parkey;
- pos = parent;
- }
- queue[pos] = curkey;
- }
+ public boolean isEmpty();
/**
- * Execute a "Heapify Downwards" aka "SiftDown". Used in deletions.
+ * Get an unsorted iterator to inspect the heap.
*
- * @param ipos re-insertion position
- * @param curkey Current key
- * @return true when the order was changed
+ * @return Iterator
*/
- protected boolean heapifyDown(final int ipos, double curkey) {
- int pos = ipos;
- final int half = size >>> 1;
- while (pos < half) {
- // Get left child (must exist!)
- int cpos = (pos << 1) + 1;
- double chikey = queue[cpos];
- // Test right child, if present
- final int rchild = cpos + 1;
- if (rchild < size) {
- double right = queue[rchild];
- if (comp(chikey, right)) { // Compare
- cpos = rchild;
- chikey = right;
- }
- }
-
- if (comp(chikey, curkey)) { // Compare
- break;
- }
- queue[pos] = chikey;
- pos = cpos;
- }
- queue[pos] = curkey;
- return (pos == ipos);
- }
+ UnsortedIter unsortedIter();
/**
- * Test whether we need to resize to have the requested capacity.
+ * Unsorted iterator - in heap order. Does not poll the heap.
*
- * @param requiredSize required capacity
- */
- protected final void resize(int requiredSize) {
- queue = Arrays.copyOf(queue, desiredSize(requiredSize, queue.length));
- }
-
- /**
- * Delete all elements from the heap.
+ * <pre>
+ * {@code
+ * for (DoubleHeap.UnsortedIter iter = heap.unsortedIter(); iter.valid(); iter.next()) {
+ * doSomething(iter.get());
+ * }
+ * }
+ * </pre>
+ *
+ * @author Erich Schubert
*/
- @Override
- public void clear() {
- super.clear();
- for (int i = 0; i < size; i++) {
- queue[i] = 0.0;
- }
+ public static interface UnsortedIter extends Iter {
+ /**
+ * Get the iterators current object.
+ *
+ * @return Current object
+ */
+ double get();
}
-
- /**
- * Compare two objects
- */
- abstract protected boolean comp(double o1, double o2);
}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerHeap.java
new file mode 100644
index 00000000..c3bf85f4
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerHeap.java
@@ -0,0 +1,127 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.utilities.datastructures.iterator.Iter;
+
+/**
+ * Basic in-memory heap interface, for double keys and int values.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.has UnsortedIter
+ */
+public interface DoubleIntegerHeap {
+ /**
+ * Add a key-value pair to the heap
+ *
+ * @param key Key
+ * @param val Value
+ */
+ void add(double key, int val);
+
+ /**
+ * Add a key-value pair to the heap if it improves the top.
+ *
+ * @param key Key
+ * @param val Value
+ * @param k Desired maximum size
+ */
+ void add(double key, int val, int k);
+
+ /**
+ * Combined operation that removes the top element, and inserts a new element
+ * instead.
+ *
+ * @param key Key of new element
+ * @param val Value of new element
+ */
+ void replaceTopElement(double key, int val);
+
+ /**
+ * Get the current top key
+ *
+ * @return Top key
+ */
+ double peekKey();
+
+ /**
+ * Get the current top value
+ *
+ * @return Value
+ */
+ int peekValue();
+
+ /**
+ * Remove the first element
+ */
+ void poll();
+
+ /**
+ * Clear the heap contents.
+ */
+ void clear();
+
+ /**
+ * Query the size
+ *
+ * @return Size
+ */
+ public int size();
+
+ /**
+ * Is the heap empty?
+ *
+ * @return {@code true} when the size is 0.
+ */
+ public boolean isEmpty();
+
+ /**
+ * Get an unsorted iterator to inspect the heap.
+ *
+ * @return Iterator
+ */
+ UnsortedIter unsortedIter();
+
+ /**
+ * Unsorted iterator - in heap order. Does not poll the heap.
+ *
+ * @author Erich Schubert
+ */
+ public static interface UnsortedIter extends Iter {
+ /**
+ * Get the current key
+ *
+ * @return Current key
+ */
+ double getKey();
+
+ /**
+ * Get the current value
+ *
+ * @return Current value
+ */
+ int getValue();
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerMaxHeap.java
new file mode 100644
index 00000000..34f1e889
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerMaxHeap.java
@@ -0,0 +1,478 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.Arrays;
+import java.util.ConcurrentModificationException;
+
+import de.lmu.ifi.dbs.elki.math.MathUtil;
+
+/**
+ * Advanced priority queue class, based on a binary heap (for small sizes),
+ * which will for larger heaps be accompanied by a 4-ary heap (attached below
+ * the root of the two-ary heap, making the root actually 3-ary).
+ *
+ * This code was automatically instantiated for the types: Double and Integer
+ *
+ * This combination was found to work quite well in benchmarks, but YMMV.
+ *
+ * Some other observations from benchmarking:
+ * <ul>
+ * <li>Bulk loading did not improve things</li>
+ * <li>Primitive heaps are substantially faster.</li>
+ * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and
+ * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li>
+ * <li>Workload makes a huge difference. A load-once, poll-until-empty priority
+ * queue is something different than e.g. a top-k heap, which will see a lot of
+ * top element replacements.</li>
+ * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for
+ * top-k make a difference.</li>
+ * <li>Different day, different benchmark results ...</li>
+ * </ul>
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.has UnsortedIter
+ */
+public class DoubleIntegerMaxHeap implements DoubleIntegerHeap {
+ /**
+ * Base heap.
+ */
+ protected double[] twoheap;
+
+ /**
+ * Base heap values.
+ */
+ protected int[] twovals;
+
+ /**
+ * Extension heap.
+ */
+ protected double[] fourheap;
+
+ /**
+ * Extension heapvalues.
+ */
+ protected int[] fourvals;
+
+ /**
+ * Current size of heap.
+ */
+ protected int size;
+
+ /**
+ * (Structural) modification counter. Used to invalidate iterators.
+ */
+ protected int modCount = 0;
+
+ /**
+ * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements.
+ */
+ private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1;
+
+ /**
+ * Initial size of the 2-ary heap.
+ */
+ private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1;
+
+ /**
+ * Initial size of 4-ary heap when initialized.
+ *
+ * 21 = 4-ary heap of height 2: 1 + 4 + 4*4
+ *
+ * 85 = 4-ary heap of height 3: 21 + 4*4*4
+ *
+ * 341 = 4-ary heap of height 4: 85 + 4*4*4*4
+ *
+ * Since we last grew by 255 (to 511), let's use 341.
+ */
+ private final static int FOUR_HEAP_INITIAL_SIZE = 341;
+
+ /**
+ * Constructor, with default size.
+ */
+ public DoubleIntegerMaxHeap() {
+ super();
+ double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE];
+ int[] twovals = new int[TWO_HEAP_INITIAL_SIZE];
+
+ this.twoheap = twoheap;
+ this.twovals = twovals;
+ this.fourheap = null;
+ this.fourvals = null;
+ this.size = 0;
+ this.modCount = 0;
+ }
+
+ /**
+ * Constructor, with given minimum size.
+ *
+ * @param minsize Minimum size
+ */
+ public DoubleIntegerMaxHeap(int minsize) {
+ super();
+ if (minsize < TWO_HEAP_MAX_SIZE) {
+ final int size = MathUtil.nextPow2Int(minsize + 1) - 1;
+ double[] twoheap = new double[size];
+ int[] twovals = new int[size];
+
+ this.twoheap = twoheap;
+ this.twovals = twovals;
+ this.fourheap = null;
+ this.fourvals = null;
+ } else {
+ double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE];
+ int[] twovals = new int[TWO_HEAP_INITIAL_SIZE];
+ double[] fourheap = new double[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)];
+ int[] fourvals = new int[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)];
+ this.twoheap = twoheap;
+ this.twovals = twovals;
+ this.fourheap = fourheap;
+ this.fourvals = fourvals;
+ }
+ this.size = 0;
+ this.modCount = 0;
+ }
+
+ @Override
+ public void clear() {
+ size = 0;
+ ++modCount;
+ fourheap = null;
+ fourvals = null;
+ Arrays.fill(twoheap, 0.0);
+ Arrays.fill(twovals, 0);
+ }
+
+ @Override
+ public int size() {
+ return size;
+ }
+
+ @Override
+ public boolean isEmpty() {
+ return (size == 0);
+ }
+
+ @Override
+ public void add(double o, int v) {
+ final double co = o;
+ final int cv = v;
+ // System.err.println("Add: " + o);
+ if (size < TWO_HEAP_MAX_SIZE) {
+ if (size >= twoheap.length) {
+ // Grow by one layer.
+ twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1);
+ twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1);
+ }
+ final int twopos = size;
+ twoheap[twopos] = co;
+ twovals[twopos] = cv;
+ ++size;
+ heapifyUp2(twopos, co, cv);
+ ++modCount;
+ } else {
+ final int fourpos = size - TWO_HEAP_MAX_SIZE;
+ if (fourheap == null) {
+ fourheap = new double[FOUR_HEAP_INITIAL_SIZE];
+ fourvals = new int[FOUR_HEAP_INITIAL_SIZE];
+ } else if (fourpos >= fourheap.length) {
+ // Grow extension heap by half.
+ fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1));
+ fourvals = Arrays.copyOf(fourvals, fourvals.length + (fourvals.length >> 1));
+ }
+ fourheap[fourpos] = co;
+ fourvals[fourpos] = cv;
+ ++size;
+ heapifyUp4(fourpos, co, cv);
+ ++modCount;
+ }
+ }
+
+ @Override
+ public void add(double key, int val, int max) {
+ if (size < max) {
+ add(key, val);
+ } else if (twoheap[0] >= key) {
+ replaceTopElement(key, val);
+ }
+ }
+
+ @Override
+ public void replaceTopElement(double reinsert, int val) {
+ heapifyDown(reinsert, val);
+ ++modCount;
+ }
+
+ /**
+ * Heapify-Up method for 2-ary heap.
+ *
+ * @param twopos Position in 2-ary heap.
+ * @param cur Current object
+ * @param val Current value
+ */
+ private void heapifyUp2(int twopos, double cur, int val) {
+ while (twopos > 0) {
+ final int parent = (twopos - 1) >>> 1;
+ double par = twoheap[parent];
+ if (cur <= par) {
+ break;
+ }
+ twoheap[twopos] = par;
+ twovals[twopos] = twovals[parent];
+ twopos = parent;
+ }
+ twoheap[twopos] = cur;
+ twovals[twopos] = val;
+ }
+
+ /**
+ * Heapify-Up method for 4-ary heap.
+ *
+ * @param fourpos Position in 4-ary heap.
+ * @param cur Current object
+ * @param val Current value
+ */
+ private void heapifyUp4(int fourpos, double cur, int val) {
+ while (fourpos > 0) {
+ final int parent = (fourpos - 1) >> 2;
+ double par = fourheap[parent];
+ if (cur <= par) {
+ break;
+ }
+ fourheap[fourpos] = par;
+ fourvals[fourpos] = fourvals[parent];
+ fourpos = parent;
+ }
+ if (fourpos == 0 && twoheap[0] < cur) {
+ fourheap[0] = twoheap[0];
+ fourvals[0] = twovals[0];
+ twoheap[0] = cur;
+ twovals[0] = val;
+ } else {
+ fourheap[fourpos] = cur;
+ fourvals[fourpos] = val;
+ }
+ }
+
+ @Override
+ public void poll() {
+ --size;
+ // Replacement object:
+ if (size >= TWO_HEAP_MAX_SIZE) {
+ final int last = size - TWO_HEAP_MAX_SIZE;
+ final double reinsert = fourheap[last];
+ final int reinsertv = fourvals[last];
+ fourheap[last] = 0.0;
+ fourvals[last] = 0;
+ heapifyDown(reinsert, reinsertv);
+ } else if (size > 0) {
+ final double reinsert = twoheap[size];
+ final int reinsertv = twovals[size];
+ twoheap[size] = 0.0;
+ twovals[size] = 0;
+ heapifyDown(reinsert, reinsertv);
+ } else {
+ twoheap[0] = 0.0;
+ twovals[0] = 0;
+ }
+ ++modCount;
+ }
+
+ /**
+ * Invoke heapify-down for the root object.
+ *
+ * @param reinsert Object to insert.
+ * @param val Value to reinsert.
+ */
+ private void heapifyDown(double reinsert, int val) {
+ if (size > TWO_HEAP_MAX_SIZE) {
+ // Special case: 3-ary situation.
+ final int best = (twoheap[1] >= twoheap[2]) ? 1 : 2;
+ if (fourheap[0] > twoheap[best]) {
+ twoheap[0] = fourheap[0];
+ twovals[0] = fourvals[0];
+ heapifyDown4(0, reinsert, val);
+ } else {
+ twoheap[0] = twoheap[best];
+ twovals[0] = twovals[best];
+ heapifyDown2(best, reinsert, val);
+ }
+ return;
+ }
+ heapifyDown2(0, reinsert, val);
+ }
+
+ /**
+ * Heapify-Down for 2-ary heap.
+ *
+ * @param twopos Position in 2-ary heap.
+ * @param cur Current object
+ * @param val Value to reinsert.
+ */
+ private void heapifyDown2(int twopos, double cur, int val) {
+ final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1;
+ while (twopos < stop) {
+ int bestchild = (twopos << 1) + 1;
+ double best = twoheap[bestchild];
+ final int right = bestchild + 1;
+ if (right < size && best < twoheap[right]) {
+ bestchild = right;
+ best = twoheap[right];
+ }
+ if (cur >= best) {
+ break;
+ }
+ twoheap[twopos] = best;
+ twovals[twopos] = twovals[bestchild];
+ twopos = bestchild;
+ }
+ twoheap[twopos] = cur;
+ twovals[twopos] = val;
+ }
+
+ /**
+ * Heapify-Down for 4-ary heap.
+ *
+ * @param fourpos Position in 4-ary heap.
+ * @param cur Current object
+ * @param val Value to reinsert.
+ */
+ private void heapifyDown4(int fourpos, double cur, int val) {
+ final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2;
+ while (fourpos < stop) {
+ final int child = (fourpos << 2) + 1;
+ double best = fourheap[child];
+ int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE;
+ if (size > minsize) {
+ double nextchild = fourheap[candidate];
+ if (best < nextchild) {
+ bestchild = candidate;
+ best = nextchild;
+ }
+
+ minsize += 2;
+ if (size >= minsize) {
+ nextchild = fourheap[++candidate];
+ if (best < nextchild) {
+ bestchild = candidate;
+ best = nextchild;
+ }
+
+ if (size > minsize) {
+ nextchild = fourheap[++candidate];
+ if (best < nextchild) {
+ bestchild = candidate;
+ best = nextchild;
+ }
+ }
+ }
+ }
+ if (cur >= best) {
+ break;
+ }
+ fourheap[fourpos] = best;
+ fourvals[fourpos] = fourvals[bestchild];
+ fourpos = bestchild;
+ }
+ fourheap[fourpos] = cur;
+ fourvals[fourpos] = val;
+ }
+
+ @Override
+ public double peekKey() {
+ return twoheap[0];
+ }
+
+ @Override
+ public int peekValue() {
+ return twovals[0];
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder buf = new StringBuilder();
+ buf.append(DoubleIntegerMaxHeap.class.getSimpleName()).append(" [");
+ for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) {
+ buf.append(iter.getKey()).append(':').append(iter.getValue()).append(',');
+ }
+ buf.append(']');
+ return buf.toString();
+ }
+
+ @Override
+ public UnsortedIter unsortedIter() {
+ return new UnsortedIter();
+ }
+
+ /**
+ * Unsorted iterator - in heap order. Does not poll the heap.
+ *
+ * Use this class as follows:
+ *
+ * <pre>
+ * {@code
+ * for (DoubleIntegerHeap.UnsortedIter iter = heap.unsortedIter(); iter.valid(); iter.next()) {
+ * doSomething(iter.get());
+ * }
+ * }
+ * </pre>
+ *
+ * @author Erich Schubert
+ */
+ private class UnsortedIter implements DoubleIntegerHeap.UnsortedIter {
+ /**
+ * Iterator position.
+ */
+ protected int pos = 0;
+
+ /**
+ * Modification counter we were initialized at.
+ */
+ protected final int myModCount = modCount;
+
+ @Override
+ public boolean valid() {
+ if (modCount != myModCount) {
+ throw new ConcurrentModificationException();
+ }
+ return pos < size;
+ }
+
+ @Override
+ public void advance() {
+ pos++;
+ }
+
+ @Override
+ public double getKey() {
+ return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]);
+ }
+
+ @Override
+ public int getValue() {
+ return ((pos < TWO_HEAP_MAX_SIZE) ? twovals[pos] : fourvals[pos - TWO_HEAP_MAX_SIZE]);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerMinHeap.java
new file mode 100644
index 00000000..ca6192ad
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerMinHeap.java
@@ -0,0 +1,478 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.Arrays;
+import java.util.ConcurrentModificationException;
+
+import de.lmu.ifi.dbs.elki.math.MathUtil;
+
+/**
+ * Advanced priority queue class, based on a binary heap (for small sizes),
+ * which will for larger heaps be accompanied by a 4-ary heap (attached below
+ * the root of the two-ary heap, making the root actually 3-ary).
+ *
+ * This code was automatically instantiated for the types: Double and Integer
+ *
+ * This combination was found to work quite well in benchmarks, but YMMV.
+ *
+ * Some other observations from benchmarking:
+ * <ul>
+ * <li>Bulk loading did not improve things</li>
+ * <li>Primitive heaps are substantially faster.</li>
+ * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and
+ * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li>
+ * <li>Workload makes a huge difference. A load-once, poll-until-empty priority
+ * queue is something different than e.g. a top-k heap, which will see a lot of
+ * top element replacements.</li>
+ * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for
+ * top-k make a difference.</li>
+ * <li>Different day, different benchmark results ...</li>
+ * </ul>
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.has UnsortedIter
+ */
+public class DoubleIntegerMinHeap implements DoubleIntegerHeap {
+ /**
+ * Base heap.
+ */
+ protected double[] twoheap;
+
+ /**
+ * Base heap values.
+ */
+ protected int[] twovals;
+
+ /**
+ * Extension heap.
+ */
+ protected double[] fourheap;
+
+ /**
+ * Extension heapvalues.
+ */
+ protected int[] fourvals;
+
+ /**
+ * Current size of heap.
+ */
+ protected int size;
+
+ /**
+ * (Structural) modification counter. Used to invalidate iterators.
+ */
+ protected int modCount = 0;
+
+ /**
+ * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements.
+ */
+ private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1;
+
+ /**
+ * Initial size of the 2-ary heap.
+ */
+ private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1;
+
+ /**
+ * Initial size of 4-ary heap when initialized.
+ *
+ * 21 = 4-ary heap of height 2: 1 + 4 + 4*4
+ *
+ * 85 = 4-ary heap of height 3: 21 + 4*4*4
+ *
+ * 341 = 4-ary heap of height 4: 85 + 4*4*4*4
+ *
+ * Since we last grew by 255 (to 511), let's use 341.
+ */
+ private final static int FOUR_HEAP_INITIAL_SIZE = 341;
+
+ /**
+ * Constructor, with default size.
+ */
+ public DoubleIntegerMinHeap() {
+ super();
+ double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE];
+ int[] twovals = new int[TWO_HEAP_INITIAL_SIZE];
+
+ this.twoheap = twoheap;
+ this.twovals = twovals;
+ this.fourheap = null;
+ this.fourvals = null;
+ this.size = 0;
+ this.modCount = 0;
+ }
+
+ /**
+ * Constructor, with given minimum size.
+ *
+ * @param minsize Minimum size
+ */
+ public DoubleIntegerMinHeap(int minsize) {
+ super();
+ if (minsize < TWO_HEAP_MAX_SIZE) {
+ final int size = MathUtil.nextPow2Int(minsize + 1) - 1;
+ double[] twoheap = new double[size];
+ int[] twovals = new int[size];
+
+ this.twoheap = twoheap;
+ this.twovals = twovals;
+ this.fourheap = null;
+ this.fourvals = null;
+ } else {
+ double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE];
+ int[] twovals = new int[TWO_HEAP_INITIAL_SIZE];
+ double[] fourheap = new double[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)];
+ int[] fourvals = new int[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)];
+ this.twoheap = twoheap;
+ this.twovals = twovals;
+ this.fourheap = fourheap;
+ this.fourvals = fourvals;
+ }
+ this.size = 0;
+ this.modCount = 0;
+ }
+
+ @Override
+ public void clear() {
+ size = 0;
+ ++modCount;
+ fourheap = null;
+ fourvals = null;
+ Arrays.fill(twoheap, 0.0);
+ Arrays.fill(twovals, 0);
+ }
+
+ @Override
+ public int size() {
+ return size;
+ }
+
+ @Override
+ public boolean isEmpty() {
+ return (size == 0);
+ }
+
+ @Override
+ public void add(double o, int v) {
+ final double co = o;
+ final int cv = v;
+ // System.err.println("Add: " + o);
+ if (size < TWO_HEAP_MAX_SIZE) {
+ if (size >= twoheap.length) {
+ // Grow by one layer.
+ twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1);
+ twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1);
+ }
+ final int twopos = size;
+ twoheap[twopos] = co;
+ twovals[twopos] = cv;
+ ++size;
+ heapifyUp2(twopos, co, cv);
+ ++modCount;
+ } else {
+ final int fourpos = size - TWO_HEAP_MAX_SIZE;
+ if (fourheap == null) {
+ fourheap = new double[FOUR_HEAP_INITIAL_SIZE];
+ fourvals = new int[FOUR_HEAP_INITIAL_SIZE];
+ } else if (fourpos >= fourheap.length) {
+ // Grow extension heap by half.
+ fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1));
+ fourvals = Arrays.copyOf(fourvals, fourvals.length + (fourvals.length >> 1));
+ }
+ fourheap[fourpos] = co;
+ fourvals[fourpos] = cv;
+ ++size;
+ heapifyUp4(fourpos, co, cv);
+ ++modCount;
+ }
+ }
+
+ @Override
+ public void add(double key, int val, int max) {
+ if (size < max) {
+ add(key, val);
+ } else if (twoheap[0] <= key) {
+ replaceTopElement(key, val);
+ }
+ }
+
+ @Override
+ public void replaceTopElement(double reinsert, int val) {
+ heapifyDown(reinsert, val);
+ ++modCount;
+ }
+
+ /**
+ * Heapify-Up method for 2-ary heap.
+ *
+ * @param twopos Position in 2-ary heap.
+ * @param cur Current object
+ * @param val Current value
+ */
+ private void heapifyUp2(int twopos, double cur, int val) {
+ while (twopos > 0) {
+ final int parent = (twopos - 1) >>> 1;
+ double par = twoheap[parent];
+ if (cur >= par) {
+ break;
+ }
+ twoheap[twopos] = par;
+ twovals[twopos] = twovals[parent];
+ twopos = parent;
+ }
+ twoheap[twopos] = cur;
+ twovals[twopos] = val;
+ }
+
+ /**
+ * Heapify-Up method for 4-ary heap.
+ *
+ * @param fourpos Position in 4-ary heap.
+ * @param cur Current object
+ * @param val Current value
+ */
+ private void heapifyUp4(int fourpos, double cur, int val) {
+ while (fourpos > 0) {
+ final int parent = (fourpos - 1) >> 2;
+ double par = fourheap[parent];
+ if (cur >= par) {
+ break;
+ }
+ fourheap[fourpos] = par;
+ fourvals[fourpos] = fourvals[parent];
+ fourpos = parent;
+ }
+ if (fourpos == 0 && twoheap[0] > cur) {
+ fourheap[0] = twoheap[0];
+ fourvals[0] = twovals[0];
+ twoheap[0] = cur;
+ twovals[0] = val;
+ } else {
+ fourheap[fourpos] = cur;
+ fourvals[fourpos] = val;
+ }
+ }
+
+ @Override
+ public void poll() {
+ --size;
+ // Replacement object:
+ if (size >= TWO_HEAP_MAX_SIZE) {
+ final int last = size - TWO_HEAP_MAX_SIZE;
+ final double reinsert = fourheap[last];
+ final int reinsertv = fourvals[last];
+ fourheap[last] = 0.0;
+ fourvals[last] = 0;
+ heapifyDown(reinsert, reinsertv);
+ } else if (size > 0) {
+ final double reinsert = twoheap[size];
+ final int reinsertv = twovals[size];
+ twoheap[size] = 0.0;
+ twovals[size] = 0;
+ heapifyDown(reinsert, reinsertv);
+ } else {
+ twoheap[0] = 0.0;
+ twovals[0] = 0;
+ }
+ ++modCount;
+ }
+
+ /**
+ * Invoke heapify-down for the root object.
+ *
+ * @param reinsert Object to insert.
+ * @param val Value to reinsert.
+ */
+ private void heapifyDown(double reinsert, int val) {
+ if (size > TWO_HEAP_MAX_SIZE) {
+ // Special case: 3-ary situation.
+ final int best = (twoheap[1] <= twoheap[2]) ? 1 : 2;
+ if (fourheap[0] < twoheap[best]) {
+ twoheap[0] = fourheap[0];
+ twovals[0] = fourvals[0];
+ heapifyDown4(0, reinsert, val);
+ } else {
+ twoheap[0] = twoheap[best];
+ twovals[0] = twovals[best];
+ heapifyDown2(best, reinsert, val);
+ }
+ return;
+ }
+ heapifyDown2(0, reinsert, val);
+ }
+
+ /**
+ * Heapify-Down for 2-ary heap.
+ *
+ * @param twopos Position in 2-ary heap.
+ * @param cur Current object
+ * @param val Value to reinsert.
+ */
+ private void heapifyDown2(int twopos, double cur, int val) {
+ final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1;
+ while (twopos < stop) {
+ int bestchild = (twopos << 1) + 1;
+ double best = twoheap[bestchild];
+ final int right = bestchild + 1;
+ if (right < size && best > twoheap[right]) {
+ bestchild = right;
+ best = twoheap[right];
+ }
+ if (cur <= best) {
+ break;
+ }
+ twoheap[twopos] = best;
+ twovals[twopos] = twovals[bestchild];
+ twopos = bestchild;
+ }
+ twoheap[twopos] = cur;
+ twovals[twopos] = val;
+ }
+
+ /**
+ * Heapify-Down for 4-ary heap.
+ *
+ * @param fourpos Position in 4-ary heap.
+ * @param cur Current object
+ * @param val Value to reinsert.
+ */
+ private void heapifyDown4(int fourpos, double cur, int val) {
+ final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2;
+ while (fourpos < stop) {
+ final int child = (fourpos << 2) + 1;
+ double best = fourheap[child];
+ int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE;
+ if (size > minsize) {
+ double nextchild = fourheap[candidate];
+ if (best > nextchild) {
+ bestchild = candidate;
+ best = nextchild;
+ }
+
+ minsize += 2;
+ if (size >= minsize) {
+ nextchild = fourheap[++candidate];
+ if (best > nextchild) {
+ bestchild = candidate;
+ best = nextchild;
+ }
+
+ if (size > minsize) {
+ nextchild = fourheap[++candidate];
+ if (best > nextchild) {
+ bestchild = candidate;
+ best = nextchild;
+ }
+ }
+ }
+ }
+ if (cur <= best) {
+ break;
+ }
+ fourheap[fourpos] = best;
+ fourvals[fourpos] = fourvals[bestchild];
+ fourpos = bestchild;
+ }
+ fourheap[fourpos] = cur;
+ fourvals[fourpos] = val;
+ }
+
+ @Override
+ public double peekKey() {
+ return twoheap[0];
+ }
+
+ @Override
+ public int peekValue() {
+ return twovals[0];
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder buf = new StringBuilder();
+ buf.append(DoubleIntegerMinHeap.class.getSimpleName()).append(" [");
+ for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) {
+ buf.append(iter.getKey()).append(':').append(iter.getValue()).append(',');
+ }
+ buf.append(']');
+ return buf.toString();
+ }
+
+ @Override
+ public UnsortedIter unsortedIter() {
+ return new UnsortedIter();
+ }
+
+ /**
+ * Unsorted iterator - in heap order. Does not poll the heap.
+ *
+ * Use this class as follows:
+ *
+ * <pre>
+ * {@code
+ * for (DoubleIntegerHeap.UnsortedIter iter = heap.unsortedIter(); iter.valid(); iter.next()) {
+ * doSomething(iter.get());
+ * }
+ * }
+ * </pre>
+ *
+ * @author Erich Schubert
+ */
+ private class UnsortedIter implements DoubleIntegerHeap.UnsortedIter {
+ /**
+ * Iterator position.
+ */
+ protected int pos = 0;
+
+ /**
+ * Modification counter we were initialized at.
+ */
+ protected final int myModCount = modCount;
+
+ @Override
+ public boolean valid() {
+ if (modCount != myModCount) {
+ throw new ConcurrentModificationException();
+ }
+ return pos < size;
+ }
+
+ @Override
+ public void advance() {
+ pos++;
+ }
+
+ @Override
+ public double getKey() {
+ return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]);
+ }
+
+ @Override
+ public int getValue() {
+ return ((pos < TWO_HEAP_MAX_SIZE) ? twovals[pos] : fourvals[pos - TWO_HEAP_MAX_SIZE]);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleLongHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleLongHeap.java
new file mode 100644
index 00000000..b93adafa
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleLongHeap.java
@@ -0,0 +1,127 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.utilities.datastructures.iterator.Iter;
+
+/**
+ * Basic in-memory heap interface, for double keys and long values.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.has UnsortedIter
+ */
+public interface DoubleLongHeap {
+ /**
+ * Add a key-value pair to the heap
+ *
+ * @param key Key
+ * @param val Value
+ */
+ void add(double key, long val);
+
+ /**
+ * Add a key-value pair to the heap if it improves the top.
+ *
+ * @param key Key
+ * @param val Value
+ * @param k Desired maximum size
+ */
+ void add(double key, long val, int k);
+
+ /**
+ * Combined operation that removes the top element, and inserts a new element
+ * instead.
+ *
+ * @param key Key of new element
+ * @param val Value of new element
+ */
+ void replaceTopElement(double key, long val);
+
+ /**
+ * Get the current top key
+ *
+ * @return Top key
+ */
+ double peekKey();
+
+ /**
+ * Get the current top value
+ *
+ * @return Value
+ */
+ long peekValue();
+
+ /**
+ * Remove the first element
+ */
+ void poll();
+
+ /**
+ * Clear the heap contents.
+ */
+ void clear();
+
+ /**
+ * Query the size
+ *
+ * @return Size
+ */
+ public int size();
+
+ /**
+ * Is the heap empty?
+ *
+ * @return {@code true} when the size is 0.
+ */
+ public boolean isEmpty();
+
+ /**
+ * Get an unsorted iterator to inspect the heap.
+ *
+ * @return Iterator
+ */
+ UnsortedIter unsortedIter();
+
+ /**
+ * Unsorted iterator - in heap order. Does not poll the heap.
+ *
+ * @author Erich Schubert
+ */
+ public static interface UnsortedIter extends Iter {
+ /**
+ * Get the current key
+ *
+ * @return Current key
+ */
+ double getKey();
+
+ /**
+ * Get the current value
+ *
+ * @return Current value
+ */
+ long getValue();
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleLongMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleLongMaxHeap.java
new file mode 100644
index 00000000..6d15656c
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleLongMaxHeap.java
@@ -0,0 +1,478 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.Arrays;
+import java.util.ConcurrentModificationException;
+
+import de.lmu.ifi.dbs.elki.math.MathUtil;
+
+/**
+ * Advanced priority queue class, based on a binary heap (for small sizes),
+ * which will for larger heaps be accompanied by a 4-ary heap (attached below
+ * the root of the two-ary heap, making the root actually 3-ary).
+ *
+ * This code was automatically instantiated for the types: Double and Long
+ *
+ * This combination was found to work quite well in benchmarks, but YMMV.
+ *
+ * Some other observations from benchmarking:
+ * <ul>
+ * <li>Bulk loading did not improve things</li>
+ * <li>Primitive heaps are substantially faster.</li>
+ * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and
+ * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li>
+ * <li>Workload makes a huge difference. A load-once, poll-until-empty priority
+ * queue is something different than e.g. a top-k heap, which will see a lot of
+ * top element replacements.</li>
+ * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for
+ * top-k make a difference.</li>
+ * <li>Different day, different benchmark results ...</li>
+ * </ul>
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.has UnsortedIter
+ */
+public class DoubleLongMaxHeap implements DoubleLongHeap {
+ /**
+ * Base heap.
+ */
+ protected double[] twoheap;
+
+ /**
+ * Base heap values.
+ */
+ protected long[] twovals;
+
+ /**
+ * Extension heap.
+ */
+ protected double[] fourheap;
+
+ /**
+ * Extension heapvalues.
+ */
+ protected long[] fourvals;
+
+ /**
+ * Current size of heap.
+ */
+ protected int size;
+
+ /**
+ * (Structural) modification counter. Used to invalidate iterators.
+ */
+ protected int modCount = 0;
+
+ /**
+ * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements.
+ */
+ private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1;
+
+ /**
+ * Initial size of the 2-ary heap.
+ */
+ private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1;
+
+ /**
+ * Initial size of 4-ary heap when initialized.
+ *
+ * 21 = 4-ary heap of height 2: 1 + 4 + 4*4
+ *
+ * 85 = 4-ary heap of height 3: 21 + 4*4*4
+ *
+ * 341 = 4-ary heap of height 4: 85 + 4*4*4*4
+ *
+ * Since we last grew by 255 (to 511), let's use 341.
+ */
+ private final static int FOUR_HEAP_INITIAL_SIZE = 341;
+
+ /**
+ * Constructor, with default size.
+ */
+ public DoubleLongMaxHeap() {
+ super();
+ double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE];
+ long[] twovals = new long[TWO_HEAP_INITIAL_SIZE];
+
+ this.twoheap = twoheap;
+ this.twovals = twovals;
+ this.fourheap = null;
+ this.fourvals = null;
+ this.size = 0;
+ this.modCount = 0;
+ }
+
+ /**
+ * Constructor, with given minimum size.
+ *
+ * @param minsize Minimum size
+ */
+ public DoubleLongMaxHeap(int minsize) {
+ super();
+ if (minsize < TWO_HEAP_MAX_SIZE) {
+ final int size = MathUtil.nextPow2Int(minsize + 1) - 1;
+ double[] twoheap = new double[size];
+ long[] twovals = new long[size];
+
+ this.twoheap = twoheap;
+ this.twovals = twovals;
+ this.fourheap = null;
+ this.fourvals = null;
+ } else {
+ double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE];
+ long[] twovals = new long[TWO_HEAP_INITIAL_SIZE];
+ double[] fourheap = new double[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)];
+ long[] fourvals = new long[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)];
+ this.twoheap = twoheap;
+ this.twovals = twovals;
+ this.fourheap = fourheap;
+ this.fourvals = fourvals;
+ }
+ this.size = 0;
+ this.modCount = 0;
+ }
+
+ @Override
+ public void clear() {
+ size = 0;
+ ++modCount;
+ fourheap = null;
+ fourvals = null;
+ Arrays.fill(twoheap, 0.0);
+ Arrays.fill(twovals, 0);
+ }
+
+ @Override
+ public int size() {
+ return size;
+ }
+
+ @Override
+ public boolean isEmpty() {
+ return (size == 0);
+ }
+
+ @Override
+ public void add(double o, long v) {
+ final double co = o;
+ final long cv = v;
+ // System.err.println("Add: " + o);
+ if (size < TWO_HEAP_MAX_SIZE) {
+ if (size >= twoheap.length) {
+ // Grow by one layer.
+ twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1);
+ twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1);
+ }
+ final int twopos = size;
+ twoheap[twopos] = co;
+ twovals[twopos] = cv;
+ ++size;
+ heapifyUp2(twopos, co, cv);
+ ++modCount;
+ } else {
+ final int fourpos = size - TWO_HEAP_MAX_SIZE;
+ if (fourheap == null) {
+ fourheap = new double[FOUR_HEAP_INITIAL_SIZE];
+ fourvals = new long[FOUR_HEAP_INITIAL_SIZE];
+ } else if (fourpos >= fourheap.length) {
+ // Grow extension heap by half.
+ fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1));
+ fourvals = Arrays.copyOf(fourvals, fourvals.length + (fourvals.length >> 1));
+ }
+ fourheap[fourpos] = co;
+ fourvals[fourpos] = cv;
+ ++size;
+ heapifyUp4(fourpos, co, cv);
+ ++modCount;
+ }
+ }
+
+ @Override
+ public void add(double key, long val, int max) {
+ if (size < max) {
+ add(key, val);
+ } else if (twoheap[0] >= key) {
+ replaceTopElement(key, val);
+ }
+ }
+
+ @Override
+ public void replaceTopElement(double reinsert, long val) {
+ heapifyDown(reinsert, val);
+ ++modCount;
+ }
+
+ /**
+ * Heapify-Up method for 2-ary heap.
+ *
+ * @param twopos Position in 2-ary heap.
+ * @param cur Current object
+ * @param val Current value
+ */
+ private void heapifyUp2(int twopos, double cur, long val) {
+ while (twopos > 0) {
+ final int parent = (twopos - 1) >>> 1;
+ double par = twoheap[parent];
+ if (cur <= par) {
+ break;
+ }
+ twoheap[twopos] = par;
+ twovals[twopos] = twovals[parent];
+ twopos = parent;
+ }
+ twoheap[twopos] = cur;
+ twovals[twopos] = val;
+ }
+
+ /**
+ * Heapify-Up method for 4-ary heap.
+ *
+ * @param fourpos Position in 4-ary heap.
+ * @param cur Current object
+ * @param val Current value
+ */
+ private void heapifyUp4(int fourpos, double cur, long val) {
+ while (fourpos > 0) {
+ final int parent = (fourpos - 1) >> 2;
+ double par = fourheap[parent];
+ if (cur <= par) {
+ break;
+ }
+ fourheap[fourpos] = par;
+ fourvals[fourpos] = fourvals[parent];
+ fourpos = parent;
+ }
+ if (fourpos == 0 && twoheap[0] < cur) {
+ fourheap[0] = twoheap[0];
+ fourvals[0] = twovals[0];
+ twoheap[0] = cur;
+ twovals[0] = val;
+ } else {
+ fourheap[fourpos] = cur;
+ fourvals[fourpos] = val;
+ }
+ }
+
+ @Override
+ public void poll() {
+ --size;
+ // Replacement object:
+ if (size >= TWO_HEAP_MAX_SIZE) {
+ final int last = size - TWO_HEAP_MAX_SIZE;
+ final double reinsert = fourheap[last];
+ final long reinsertv = fourvals[last];
+ fourheap[last] = 0.0;
+ fourvals[last] = 0;
+ heapifyDown(reinsert, reinsertv);
+ } else if (size > 0) {
+ final double reinsert = twoheap[size];
+ final long reinsertv = twovals[size];
+ twoheap[size] = 0.0;
+ twovals[size] = 0;
+ heapifyDown(reinsert, reinsertv);
+ } else {
+ twoheap[0] = 0.0;
+ twovals[0] = 0;
+ }
+ ++modCount;
+ }
+
+ /**
+ * Invoke heapify-down for the root object.
+ *
+ * @param reinsert Object to insert.
+ * @param val Value to reinsert.
+ */
+ private void heapifyDown(double reinsert, long val) {
+ if (size > TWO_HEAP_MAX_SIZE) {
+ // Special case: 3-ary situation.
+ final int best = (twoheap[1] >= twoheap[2]) ? 1 : 2;
+ if (fourheap[0] > twoheap[best]) {
+ twoheap[0] = fourheap[0];
+ twovals[0] = fourvals[0];
+ heapifyDown4(0, reinsert, val);
+ } else {
+ twoheap[0] = twoheap[best];
+ twovals[0] = twovals[best];
+ heapifyDown2(best, reinsert, val);
+ }
+ return;
+ }
+ heapifyDown2(0, reinsert, val);
+ }
+
+ /**
+ * Heapify-Down for 2-ary heap.
+ *
+ * @param twopos Position in 2-ary heap.
+ * @param cur Current object
+ * @param val Value to reinsert.
+ */
+ private void heapifyDown2(int twopos, double cur, long val) {
+ final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1;
+ while (twopos < stop) {
+ int bestchild = (twopos << 1) + 1;
+ double best = twoheap[bestchild];
+ final int right = bestchild + 1;
+ if (right < size && best < twoheap[right]) {
+ bestchild = right;
+ best = twoheap[right];
+ }
+ if (cur >= best) {
+ break;
+ }
+ twoheap[twopos] = best;
+ twovals[twopos] = twovals[bestchild];
+ twopos = bestchild;
+ }
+ twoheap[twopos] = cur;
+ twovals[twopos] = val;
+ }
+
+ /**
+ * Heapify-Down for 4-ary heap.
+ *
+ * @param fourpos Position in 4-ary heap.
+ * @param cur Current object
+ * @param val Value to reinsert.
+ */
+ private void heapifyDown4(int fourpos, double cur, long val) {
+ final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2;
+ while (fourpos < stop) {
+ final int child = (fourpos << 2) + 1;
+ double best = fourheap[child];
+ int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE;
+ if (size > minsize) {
+ double nextchild = fourheap[candidate];
+ if (best < nextchild) {
+ bestchild = candidate;
+ best = nextchild;
+ }
+
+ minsize += 2;
+ if (size >= minsize) {
+ nextchild = fourheap[++candidate];
+ if (best < nextchild) {
+ bestchild = candidate;
+ best = nextchild;
+ }
+
+ if (size > minsize) {
+ nextchild = fourheap[++candidate];
+ if (best < nextchild) {
+ bestchild = candidate;
+ best = nextchild;
+ }
+ }
+ }
+ }
+ if (cur >= best) {
+ break;
+ }
+ fourheap[fourpos] = best;
+ fourvals[fourpos] = fourvals[bestchild];
+ fourpos = bestchild;
+ }
+ fourheap[fourpos] = cur;
+ fourvals[fourpos] = val;
+ }
+
+ @Override
+ public double peekKey() {
+ return twoheap[0];
+ }
+
+ @Override
+ public long peekValue() {
+ return twovals[0];
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder buf = new StringBuilder();
+ buf.append(DoubleLongMaxHeap.class.getSimpleName()).append(" [");
+ for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) {
+ buf.append(iter.getKey()).append(':').append(iter.getValue()).append(',');
+ }
+ buf.append(']');
+ return buf.toString();
+ }
+
+ @Override
+ public UnsortedIter unsortedIter() {
+ return new UnsortedIter();
+ }
+
+ /**
+ * Unsorted iterator - in heap order. Does not poll the heap.
+ *
+ * Use this class as follows:
+ *
+ * <pre>
+ * {@code
+ * for (DoubleLongHeap.UnsortedIter iter = heap.unsortedIter(); iter.valid(); iter.next()) {
+ * doSomething(iter.get());
+ * }
+ * }
+ * </pre>
+ *
+ * @author Erich Schubert
+ */
+ private class UnsortedIter implements DoubleLongHeap.UnsortedIter {
+ /**
+ * Iterator position.
+ */
+ protected int pos = 0;
+
+ /**
+ * Modification counter we were initialized at.
+ */
+ protected final int myModCount = modCount;
+
+ @Override
+ public boolean valid() {
+ if (modCount != myModCount) {
+ throw new ConcurrentModificationException();
+ }
+ return pos < size;
+ }
+
+ @Override
+ public void advance() {
+ pos++;
+ }
+
+ @Override
+ public double getKey() {
+ return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]);
+ }
+
+ @Override
+ public long getValue() {
+ return ((pos < TWO_HEAP_MAX_SIZE) ? twovals[pos] : fourvals[pos - TWO_HEAP_MAX_SIZE]);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleLongMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleLongMinHeap.java
new file mode 100644
index 00000000..d38eb6e3
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleLongMinHeap.java
@@ -0,0 +1,478 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.Arrays;
+import java.util.ConcurrentModificationException;
+
+import de.lmu.ifi.dbs.elki.math.MathUtil;
+
+/**
+ * Advanced priority queue class, based on a binary heap (for small sizes),
+ * which will for larger heaps be accompanied by a 4-ary heap (attached below
+ * the root of the two-ary heap, making the root actually 3-ary).
+ *
+ * This code was automatically instantiated for the types: Double and Long
+ *
+ * This combination was found to work quite well in benchmarks, but YMMV.
+ *
+ * Some other observations from benchmarking:
+ * <ul>
+ * <li>Bulk loading did not improve things</li>
+ * <li>Primitive heaps are substantially faster.</li>
+ * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and
+ * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li>
+ * <li>Workload makes a huge difference. A load-once, poll-until-empty priority
+ * queue is something different than e.g. a top-k heap, which will see a lot of
+ * top element replacements.</li>
+ * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for
+ * top-k make a difference.</li>
+ * <li>Different day, different benchmark results ...</li>
+ * </ul>
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.has UnsortedIter
+ */
+public class DoubleLongMinHeap implements DoubleLongHeap {
+ /**
+ * Base heap.
+ */
+ protected double[] twoheap;
+
+ /**
+ * Base heap values.
+ */
+ protected long[] twovals;
+
+ /**
+ * Extension heap.
+ */
+ protected double[] fourheap;
+
+ /**
+ * Extension heapvalues.
+ */
+ protected long[] fourvals;
+
+ /**
+ * Current size of heap.
+ */
+ protected int size;
+
+ /**
+ * (Structural) modification counter. Used to invalidate iterators.
+ */
+ protected int modCount = 0;
+
+ /**
+ * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements.
+ */
+ private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1;
+
+ /**
+ * Initial size of the 2-ary heap.
+ */
+ private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1;
+
+ /**
+ * Initial size of 4-ary heap when initialized.
+ *
+ * 21 = 4-ary heap of height 2: 1 + 4 + 4*4
+ *
+ * 85 = 4-ary heap of height 3: 21 + 4*4*4
+ *
+ * 341 = 4-ary heap of height 4: 85 + 4*4*4*4
+ *
+ * Since we last grew by 255 (to 511), let's use 341.
+ */
+ private final static int FOUR_HEAP_INITIAL_SIZE = 341;
+
+ /**
+ * Constructor, with default size.
+ */
+ public DoubleLongMinHeap() {
+ super();
+ double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE];
+ long[] twovals = new long[TWO_HEAP_INITIAL_SIZE];
+
+ this.twoheap = twoheap;
+ this.twovals = twovals;
+ this.fourheap = null;
+ this.fourvals = null;
+ this.size = 0;
+ this.modCount = 0;
+ }
+
+ /**
+ * Constructor, with given minimum size.
+ *
+ * @param minsize Minimum size
+ */
+ public DoubleLongMinHeap(int minsize) {
+ super();
+ if (minsize < TWO_HEAP_MAX_SIZE) {
+ final int size = MathUtil.nextPow2Int(minsize + 1) - 1;
+ double[] twoheap = new double[size];
+ long[] twovals = new long[size];
+
+ this.twoheap = twoheap;
+ this.twovals = twovals;
+ this.fourheap = null;
+ this.fourvals = null;
+ } else {
+ double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE];
+ long[] twovals = new long[TWO_HEAP_INITIAL_SIZE];
+ double[] fourheap = new double[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)];
+ long[] fourvals = new long[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)];
+ this.twoheap = twoheap;
+ this.twovals = twovals;
+ this.fourheap = fourheap;
+ this.fourvals = fourvals;
+ }
+ this.size = 0;
+ this.modCount = 0;
+ }
+
+ @Override
+ public void clear() {
+ size = 0;
+ ++modCount;
+ fourheap = null;
+ fourvals = null;
+ Arrays.fill(twoheap, 0.0);
+ Arrays.fill(twovals, 0);
+ }
+
+ @Override
+ public int size() {
+ return size;
+ }
+
+ @Override
+ public boolean isEmpty() {
+ return (size == 0);
+ }
+
+ @Override
+ public void add(double o, long v) {
+ final double co = o;
+ final long cv = v;
+ // System.err.println("Add: " + o);
+ if (size < TWO_HEAP_MAX_SIZE) {
+ if (size >= twoheap.length) {
+ // Grow by one layer.
+ twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1);
+ twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1);
+ }
+ final int twopos = size;
+ twoheap[twopos] = co;
+ twovals[twopos] = cv;
+ ++size;
+ heapifyUp2(twopos, co, cv);
+ ++modCount;
+ } else {
+ final int fourpos = size - TWO_HEAP_MAX_SIZE;
+ if (fourheap == null) {
+ fourheap = new double[FOUR_HEAP_INITIAL_SIZE];
+ fourvals = new long[FOUR_HEAP_INITIAL_SIZE];
+ } else if (fourpos >= fourheap.length) {
+ // Grow extension heap by half.
+ fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1));
+ fourvals = Arrays.copyOf(fourvals, fourvals.length + (fourvals.length >> 1));
+ }
+ fourheap[fourpos] = co;
+ fourvals[fourpos] = cv;
+ ++size;
+ heapifyUp4(fourpos, co, cv);
+ ++modCount;
+ }
+ }
+
+ @Override
+ public void add(double key, long val, int max) {
+ if (size < max) {
+ add(key, val);
+ } else if (twoheap[0] <= key) {
+ replaceTopElement(key, val);
+ }
+ }
+
+ @Override
+ public void replaceTopElement(double reinsert, long val) {
+ heapifyDown(reinsert, val);
+ ++modCount;
+ }
+
+ /**
+ * Heapify-Up method for 2-ary heap.
+ *
+ * @param twopos Position in 2-ary heap.
+ * @param cur Current object
+ * @param val Current value
+ */
+ private void heapifyUp2(int twopos, double cur, long val) {
+ while (twopos > 0) {
+ final int parent = (twopos - 1) >>> 1;
+ double par = twoheap[parent];
+ if (cur >= par) {
+ break;
+ }
+ twoheap[twopos] = par;
+ twovals[twopos] = twovals[parent];
+ twopos = parent;
+ }
+ twoheap[twopos] = cur;
+ twovals[twopos] = val;
+ }
+
+ /**
+ * Heapify-Up method for 4-ary heap.
+ *
+ * @param fourpos Position in 4-ary heap.
+ * @param cur Current object
+ * @param val Current value
+ */
+ private void heapifyUp4(int fourpos, double cur, long val) {
+ while (fourpos > 0) {
+ final int parent = (fourpos - 1) >> 2;
+ double par = fourheap[parent];
+ if (cur >= par) {
+ break;
+ }
+ fourheap[fourpos] = par;
+ fourvals[fourpos] = fourvals[parent];
+ fourpos = parent;
+ }
+ if (fourpos == 0 && twoheap[0] > cur) {
+ fourheap[0] = twoheap[0];
+ fourvals[0] = twovals[0];
+ twoheap[0] = cur;
+ twovals[0] = val;
+ } else {
+ fourheap[fourpos] = cur;
+ fourvals[fourpos] = val;
+ }
+ }
+
+ @Override
+ public void poll() {
+ --size;
+ // Replacement object:
+ if (size >= TWO_HEAP_MAX_SIZE) {
+ final int last = size - TWO_HEAP_MAX_SIZE;
+ final double reinsert = fourheap[last];
+ final long reinsertv = fourvals[last];
+ fourheap[last] = 0.0;
+ fourvals[last] = 0;
+ heapifyDown(reinsert, reinsertv);
+ } else if (size > 0) {
+ final double reinsert = twoheap[size];
+ final long reinsertv = twovals[size];
+ twoheap[size] = 0.0;
+ twovals[size] = 0;
+ heapifyDown(reinsert, reinsertv);
+ } else {
+ twoheap[0] = 0.0;
+ twovals[0] = 0;
+ }
+ ++modCount;
+ }
+
+ /**
+ * Invoke heapify-down for the root object.
+ *
+ * @param reinsert Object to insert.
+ * @param val Value to reinsert.
+ */
+ private void heapifyDown(double reinsert, long val) {
+ if (size > TWO_HEAP_MAX_SIZE) {
+ // Special case: 3-ary situation.
+ final int best = (twoheap[1] <= twoheap[2]) ? 1 : 2;
+ if (fourheap[0] < twoheap[best]) {
+ twoheap[0] = fourheap[0];
+ twovals[0] = fourvals[0];
+ heapifyDown4(0, reinsert, val);
+ } else {
+ twoheap[0] = twoheap[best];
+ twovals[0] = twovals[best];
+ heapifyDown2(best, reinsert, val);
+ }
+ return;
+ }
+ heapifyDown2(0, reinsert, val);
+ }
+
+ /**
+ * Heapify-Down for 2-ary heap.
+ *
+ * @param twopos Position in 2-ary heap.
+ * @param cur Current object
+ * @param val Value to reinsert.
+ */
+ private void heapifyDown2(int twopos, double cur, long val) {
+ final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1;
+ while (twopos < stop) {
+ int bestchild = (twopos << 1) + 1;
+ double best = twoheap[bestchild];
+ final int right = bestchild + 1;
+ if (right < size && best > twoheap[right]) {
+ bestchild = right;
+ best = twoheap[right];
+ }
+ if (cur <= best) {
+ break;
+ }
+ twoheap[twopos] = best;
+ twovals[twopos] = twovals[bestchild];
+ twopos = bestchild;
+ }
+ twoheap[twopos] = cur;
+ twovals[twopos] = val;
+ }
+
+ /**
+ * Heapify-Down for 4-ary heap.
+ *
+ * @param fourpos Position in 4-ary heap.
+ * @param cur Current object
+ * @param val Value to reinsert.
+ */
+ private void heapifyDown4(int fourpos, double cur, long val) {
+ final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2;
+ while (fourpos < stop) {
+ final int child = (fourpos << 2) + 1;
+ double best = fourheap[child];
+ int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE;
+ if (size > minsize) {
+ double nextchild = fourheap[candidate];
+ if (best > nextchild) {
+ bestchild = candidate;
+ best = nextchild;
+ }
+
+ minsize += 2;
+ if (size >= minsize) {
+ nextchild = fourheap[++candidate];
+ if (best > nextchild) {
+ bestchild = candidate;
+ best = nextchild;
+ }
+
+ if (size > minsize) {
+ nextchild = fourheap[++candidate];
+ if (best > nextchild) {
+ bestchild = candidate;
+ best = nextchild;
+ }
+ }
+ }
+ }
+ if (cur <= best) {
+ break;
+ }
+ fourheap[fourpos] = best;
+ fourvals[fourpos] = fourvals[bestchild];
+ fourpos = bestchild;
+ }
+ fourheap[fourpos] = cur;
+ fourvals[fourpos] = val;
+ }
+
+ @Override
+ public double peekKey() {
+ return twoheap[0];
+ }
+
+ @Override
+ public long peekValue() {
+ return twovals[0];
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder buf = new StringBuilder();
+ buf.append(DoubleLongMinHeap.class.getSimpleName()).append(" [");
+ for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) {
+ buf.append(iter.getKey()).append(':').append(iter.getValue()).append(',');
+ }
+ buf.append(']');
+ return buf.toString();
+ }
+
+ @Override
+ public UnsortedIter unsortedIter() {
+ return new UnsortedIter();
+ }
+
+ /**
+ * Unsorted iterator - in heap order. Does not poll the heap.
+ *
+ * Use this class as follows:
+ *
+ * <pre>
+ * {@code
+ * for (DoubleLongHeap.UnsortedIter iter = heap.unsortedIter(); iter.valid(); iter.next()) {
+ * doSomething(iter.get());
+ * }
+ * }
+ * </pre>
+ *
+ * @author Erich Schubert
+ */
+ private class UnsortedIter implements DoubleLongHeap.UnsortedIter {
+ /**
+ * Iterator position.
+ */
+ protected int pos = 0;
+
+ /**
+ * Modification counter we were initialized at.
+ */
+ protected final int myModCount = modCount;
+
+ @Override
+ public boolean valid() {
+ if (modCount != myModCount) {
+ throw new ConcurrentModificationException();
+ }
+ return pos < size;
+ }
+
+ @Override
+ public void advance() {
+ pos++;
+ }
+
+ @Override
+ public double getKey() {
+ return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]);
+ }
+
+ @Override
+ public long getValue() {
+ return ((pos < TWO_HEAP_MAX_SIZE) ? twovals[pos] : fourvals[pos - TWO_HEAP_MAX_SIZE]);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMaxHeap.java
index 1b7d6037..7ea28f14 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMaxHeap.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMaxHeap.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -23,40 +23,400 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+import java.util.Arrays;
+import java.util.ConcurrentModificationException;
+
+import de.lmu.ifi.dbs.elki.math.MathUtil;
+
/**
- * Basic in-memory heap structure.
+ * Advanced priority queue class, based on a binary heap (for small sizes),
+ * which will for larger heaps be accompanied by a 4-ary heap (attached below
+ * the root of the two-ary heap, making the root actually 3-ary).
+ *
+ * This code was automatically instantiated for the type: Double
*
- * This heap is built lazily: if you first add many elements, then poll the
- * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log
- * n). This is implemented via a simple validTo counter.
+ * This combination was found to work quite well in benchmarks, but YMMV.
+ *
+ * Some other observations from benchmarking:
+ * <ul>
+ * <li>Bulk loading did not improve things</li>
+ * <li>Primitive heaps are substantially faster.</li>
+ * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and
+ * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li>
+ * <li>Workload makes a huge difference. A load-once, poll-until-empty priority
+ * queue is something different than e.g. a top-k heap, which will see a lot of
+ * top element replacements.</li>
+ * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for
+ * top-k make a difference.</li>
+ * <li>Different day, different benchmark results ...</li>
+ * </ul>
*
* @author Erich Schubert
+ *
+ * @apiviz.has UnsortedIter
*/
-public class DoubleMaxHeap extends DoubleHeap {
+public class DoubleMaxHeap implements DoubleHeap {
+ /**
+ * Base heap.
+ */
+ protected double[] twoheap;
+
+ /**
+ * Extension heap.
+ */
+ protected double[] fourheap;
+
+ /**
+ * Current size of heap.
+ */
+ protected int size;
+
+ /**
+ * (Structural) modification counter. Used to invalidate iterators.
+ */
+ protected int modCount = 0;
+
+ /**
+ * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements.
+ */
+ private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1;
+
/**
- * Constructor with default capacity.
+ * Initial size of the 2-ary heap.
+ */
+ private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1;
+
+ /**
+ * Initial size of 4-ary heap when initialized.
+ *
+ * 21 = 4-ary heap of height 2: 1 + 4 + 4*4
+ *
+ * 85 = 4-ary heap of height 3: 21 + 4*4*4
+ *
+ * 341 = 4-ary heap of height 4: 85 + 4*4*4*4
+ *
+ * Since we last grew by 255 (to 511), let's use 341.
+ */
+ private final static int FOUR_HEAP_INITIAL_SIZE = 341;
+
+ /**
+ * Constructor, with default size.
*/
public DoubleMaxHeap() {
- super(DEFAULT_INITIAL_CAPACITY);
+ super();
+ double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE];
+
+ this.twoheap = twoheap;
+ this.fourheap = null;
+ this.size = 0;
+ this.modCount = 0;
}
/**
- * Constructor with initial capacity.
+ * Constructor, with given minimum size.
*
- * @param size initial capacity
+ * @param minsize Minimum size
*/
- public DoubleMaxHeap(int size) {
- super(size);
+ public DoubleMaxHeap(int minsize) {
+ super();
+ if (minsize < TWO_HEAP_MAX_SIZE) {
+ final int size = MathUtil.nextPow2Int(minsize + 1) - 1;
+ double[] twoheap = new double[size];
+
+ this.twoheap = twoheap;
+ this.fourheap = null;
+ } else {
+ double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE];
+ double[] fourheap = new double[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)];
+ this.twoheap = twoheap;
+ this.fourheap = fourheap;
+ }
+ this.size = 0;
+ this.modCount = 0;
+ }
+
+ @Override
+ public void clear() {
+ size = 0;
+ ++modCount;
+ fourheap = null;
+ Arrays.fill(twoheap, 0.0);
+ }
+
+ @Override
+ public int size() {
+ return size;
+ }
+
+ @Override
+ public boolean isEmpty() {
+ return (size == 0);
+ }
+
+ @Override
+ public void add(double o) {
+ final double co = o;
+ // System.err.println("Add: " + o);
+ if (size < TWO_HEAP_MAX_SIZE) {
+ if (size >= twoheap.length) {
+ // Grow by one layer.
+ twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1);
+ }
+ final int twopos = size;
+ twoheap[twopos] = co;
+ ++size;
+ heapifyUp2(twopos, co);
+ ++modCount;
+ } else {
+ final int fourpos = size - TWO_HEAP_MAX_SIZE;
+ if (fourheap == null) {
+ fourheap = new double[FOUR_HEAP_INITIAL_SIZE];
+ } else if (fourpos >= fourheap.length) {
+ // Grow extension heap by half.
+ fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1));
+ }
+ fourheap[fourpos] = co;
+ ++size;
+ heapifyUp4(fourpos, co);
+ ++modCount;
+ }
+ }
+
+ @Override
+ public void add(double key, int max) {
+ if (size < max) {
+ add(key);
+ } else if (twoheap[0] >= key) {
+ replaceTopElement(key);
+ }
+ }
+
+ @Override
+ public double replaceTopElement(double reinsert) {
+ final double ret = twoheap[0];
+ heapifyDown( reinsert);
+ ++modCount;
+ return ret;
+ }
+
+ /**
+ * Heapify-Up method for 2-ary heap.
+ *
+ * @param twopos Position in 2-ary heap.
+ * @param cur Current object
+ */
+ private void heapifyUp2(int twopos, double cur) {
+ while (twopos > 0) {
+ final int parent = (twopos - 1) >>> 1;
+ double par = twoheap[parent];
+ if (cur <= par) {
+ break;
+ }
+ twoheap[twopos] = par;
+ twopos = parent;
+ }
+ twoheap[twopos] = cur;
+ }
+
+ /**
+ * Heapify-Up method for 4-ary heap.
+ *
+ * @param fourpos Position in 4-ary heap.
+ * @param cur Current object
+ */
+ private void heapifyUp4(int fourpos, double cur) {
+ while (fourpos > 0) {
+ final int parent = (fourpos - 1) >> 2;
+ double par = fourheap[parent];
+ if (cur <= par) {
+ break;
+ }
+ fourheap[fourpos] = par;
+ fourpos = parent;
+ }
+ if (fourpos == 0 && twoheap[0] < cur) {
+ fourheap[0] = twoheap[0];
+ twoheap[0] = cur;
+ } else {
+ fourheap[fourpos] = cur;
+ }
+ }
+
+ @Override
+ public double poll() {
+ final double ret = twoheap[0];
+ --size;
+ // Replacement object:
+ if (size >= TWO_HEAP_MAX_SIZE) {
+ final int last = size - TWO_HEAP_MAX_SIZE;
+ final double reinsert = fourheap[last];
+ fourheap[last] = 0.0;
+ heapifyDown(reinsert);
+ } else if (size > 0) {
+ final double reinsert = twoheap[size];
+ twoheap[size] = 0.0;
+ heapifyDown(reinsert);
+ } else {
+ twoheap[0] = 0.0;
+ }
+ ++modCount;
+ return ret;
+ }
+
+ /**
+ * Invoke heapify-down for the root object.
+ *
+ * @param reinsert Object to insert.
+ */
+ private void heapifyDown(double reinsert) {
+ if (size > TWO_HEAP_MAX_SIZE) {
+ // Special case: 3-ary situation.
+ final int best = (twoheap[1] >= twoheap[2]) ? 1 : 2;
+ if (fourheap[0] > twoheap[best]) {
+ twoheap[0] = fourheap[0];
+ heapifyDown4(0, reinsert);
+ } else {
+ twoheap[0] = twoheap[best];
+ heapifyDown2(best, reinsert);
+ }
+ return;
+ }
+ heapifyDown2(0, reinsert);
+ }
+
+ /**
+ * Heapify-Down for 2-ary heap.
+ *
+ * @param twopos Position in 2-ary heap.
+ * @param cur Current object
+ */
+ private void heapifyDown2(int twopos, double cur) {
+ final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1;
+ while (twopos < stop) {
+ int bestchild = (twopos << 1) + 1;
+ double best = twoheap[bestchild];
+ final int right = bestchild + 1;
+ if (right < size && best < twoheap[right]) {
+ bestchild = right;
+ best = twoheap[right];
+ }
+ if (cur >= best) {
+ break;
+ }
+ twoheap[twopos] = best;
+ twopos = bestchild;
+ }
+ twoheap[twopos] = cur;
}
/**
- * Compare two objects
+ * Heapify-Down for 4-ary heap.
*
- * @param o1 First object
- * @param o2 Second object
+ * @param fourpos Position in 4-ary heap.
+ * @param cur Current object
*/
+ private void heapifyDown4(int fourpos, double cur) {
+ final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2;
+ while (fourpos < stop) {
+ final int child = (fourpos << 2) + 1;
+ double best = fourheap[child];
+ int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE;
+ if (size > minsize) {
+ double nextchild = fourheap[candidate];
+ if (best < nextchild) {
+ bestchild = candidate;
+ best = nextchild;
+ }
+
+ minsize += 2;
+ if (size >= minsize) {
+ nextchild = fourheap[++candidate];
+ if (best < nextchild) {
+ bestchild = candidate;
+ best = nextchild;
+ }
+
+ if (size > minsize) {
+ nextchild = fourheap[++candidate];
+ if (best < nextchild) {
+ bestchild = candidate;
+ best = nextchild;
+ }
+ }
+ }
+ }
+ if (cur >= best) {
+ break;
+ }
+ fourheap[fourpos] = best;
+ fourpos = bestchild;
+ }
+ fourheap[fourpos] = cur;
+ }
+
+ @Override
+ public double peek() {
+ return twoheap[0];
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder buf = new StringBuilder();
+ buf.append(DoubleMaxHeap.class.getSimpleName()).append(" [");
+ for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) {
+ buf.append(iter.get()).append(',');
+ }
+ buf.append(']');
+ return buf.toString();
+ }
+
@Override
- protected boolean comp(double o1, double o2) {
- return o1 < o2;
+ public UnsortedIter unsortedIter() {
+ return new UnsortedIter();
+ }
+
+ /**
+ * Unsorted iterator - in heap order. Does not poll the heap.
+ *
+ * Use this class as follows:
+ *
+ * <pre>
+ * {@code
+ * for (DoubleHeap.UnsortedIter iter = heap.unsortedIter(); iter.valid(); iter.next()) {
+ * doSomething(iter.get());
+ * }
+ * }
+ * </pre>
+ *
+ * @author Erich Schubert
+ */
+ private class UnsortedIter implements DoubleHeap.UnsortedIter {
+ /**
+ * Iterator position.
+ */
+ protected int pos = 0;
+
+ /**
+ * Modification counter we were initialized at.
+ */
+ protected final int myModCount = modCount;
+
+ @Override
+ public boolean valid() {
+ if (modCount != myModCount) {
+ throw new ConcurrentModificationException();
+ }
+ return pos < size;
+ }
+
+ @Override
+ public void advance() {
+ pos++;
+ }
+
+ @Override
+ public double get() {
+ return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]);
+ }
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMinHeap.java
index 2ce05ff9..e9334153 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMinHeap.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMinHeap.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -23,40 +23,400 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+import java.util.Arrays;
+import java.util.ConcurrentModificationException;
+
+import de.lmu.ifi.dbs.elki.math.MathUtil;
+
/**
- * Basic in-memory heap structure.
+ * Advanced priority queue class, based on a binary heap (for small sizes),
+ * which will for larger heaps be accompanied by a 4-ary heap (attached below
+ * the root of the two-ary heap, making the root actually 3-ary).
+ *
+ * This code was automatically instantiated for the type: Double
*
- * This heap is built lazily: if you first add many elements, then poll the
- * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log
- * n). This is implemented via a simple validTo counter.
+ * This combination was found to work quite well in benchmarks, but YMMV.
+ *
+ * Some other observations from benchmarking:
+ * <ul>
+ * <li>Bulk loading did not improve things</li>
+ * <li>Primitive heaps are substantially faster.</li>
+ * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and
+ * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li>
+ * <li>Workload makes a huge difference. A load-once, poll-until-empty priority
+ * queue is something different than e.g. a top-k heap, which will see a lot of
+ * top element replacements.</li>
+ * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for
+ * top-k make a difference.</li>
+ * <li>Different day, different benchmark results ...</li>
+ * </ul>
*
* @author Erich Schubert
+ *
+ * @apiviz.has UnsortedIter
*/
-public class DoubleMinHeap extends DoubleHeap {
+public class DoubleMinHeap implements DoubleHeap {
+ /**
+ * Base heap.
+ */
+ protected double[] twoheap;
+
+ /**
+ * Extension heap.
+ */
+ protected double[] fourheap;
+
+ /**
+ * Current size of heap.
+ */
+ protected int size;
+
+ /**
+ * (Structural) modification counter. Used to invalidate iterators.
+ */
+ protected int modCount = 0;
+
+ /**
+ * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements.
+ */
+ private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1;
+
/**
- * Constructor with default capacity.
+ * Initial size of the 2-ary heap.
+ */
+ private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1;
+
+ /**
+ * Initial size of 4-ary heap when initialized.
+ *
+ * 21 = 4-ary heap of height 2: 1 + 4 + 4*4
+ *
+ * 85 = 4-ary heap of height 3: 21 + 4*4*4
+ *
+ * 341 = 4-ary heap of height 4: 85 + 4*4*4*4
+ *
+ * Since we last grew by 255 (to 511), let's use 341.
+ */
+ private final static int FOUR_HEAP_INITIAL_SIZE = 341;
+
+ /**
+ * Constructor, with default size.
*/
public DoubleMinHeap() {
- super(DEFAULT_INITIAL_CAPACITY);
+ super();
+ double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE];
+
+ this.twoheap = twoheap;
+ this.fourheap = null;
+ this.size = 0;
+ this.modCount = 0;
}
/**
- * Constructor with initial capacity.
+ * Constructor, with given minimum size.
*
- * @param size initial capacity
+ * @param minsize Minimum size
*/
- public DoubleMinHeap(int size) {
- super(size);
+ public DoubleMinHeap(int minsize) {
+ super();
+ if (minsize < TWO_HEAP_MAX_SIZE) {
+ final int size = MathUtil.nextPow2Int(minsize + 1) - 1;
+ double[] twoheap = new double[size];
+
+ this.twoheap = twoheap;
+ this.fourheap = null;
+ } else {
+ double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE];
+ double[] fourheap = new double[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)];
+ this.twoheap = twoheap;
+ this.fourheap = fourheap;
+ }
+ this.size = 0;
+ this.modCount = 0;
+ }
+
+ @Override
+ public void clear() {
+ size = 0;
+ ++modCount;
+ fourheap = null;
+ Arrays.fill(twoheap, 0.0);
+ }
+
+ @Override
+ public int size() {
+ return size;
+ }
+
+ @Override
+ public boolean isEmpty() {
+ return (size == 0);
+ }
+
+ @Override
+ public void add(double o) {
+ final double co = o;
+ // System.err.println("Add: " + o);
+ if (size < TWO_HEAP_MAX_SIZE) {
+ if (size >= twoheap.length) {
+ // Grow by one layer.
+ twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1);
+ }
+ final int twopos = size;
+ twoheap[twopos] = co;
+ ++size;
+ heapifyUp2(twopos, co);
+ ++modCount;
+ } else {
+ final int fourpos = size - TWO_HEAP_MAX_SIZE;
+ if (fourheap == null) {
+ fourheap = new double[FOUR_HEAP_INITIAL_SIZE];
+ } else if (fourpos >= fourheap.length) {
+ // Grow extension heap by half.
+ fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1));
+ }
+ fourheap[fourpos] = co;
+ ++size;
+ heapifyUp4(fourpos, co);
+ ++modCount;
+ }
+ }
+
+ @Override
+ public void add(double key, int max) {
+ if (size < max) {
+ add(key);
+ } else if (twoheap[0] <= key) {
+ replaceTopElement(key);
+ }
+ }
+
+ @Override
+ public double replaceTopElement(double reinsert) {
+ final double ret = twoheap[0];
+ heapifyDown( reinsert);
+ ++modCount;
+ return ret;
+ }
+
+ /**
+ * Heapify-Up method for 2-ary heap.
+ *
+ * @param twopos Position in 2-ary heap.
+ * @param cur Current object
+ */
+ private void heapifyUp2(int twopos, double cur) {
+ while (twopos > 0) {
+ final int parent = (twopos - 1) >>> 1;
+ double par = twoheap[parent];
+ if (cur >= par) {
+ break;
+ }
+ twoheap[twopos] = par;
+ twopos = parent;
+ }
+ twoheap[twopos] = cur;
+ }
+
+ /**
+ * Heapify-Up method for 4-ary heap.
+ *
+ * @param fourpos Position in 4-ary heap.
+ * @param cur Current object
+ */
+ private void heapifyUp4(int fourpos, double cur) {
+ while (fourpos > 0) {
+ final int parent = (fourpos - 1) >> 2;
+ double par = fourheap[parent];
+ if (cur >= par) {
+ break;
+ }
+ fourheap[fourpos] = par;
+ fourpos = parent;
+ }
+ if (fourpos == 0 && twoheap[0] > cur) {
+ fourheap[0] = twoheap[0];
+ twoheap[0] = cur;
+ } else {
+ fourheap[fourpos] = cur;
+ }
+ }
+
+ @Override
+ public double poll() {
+ final double ret = twoheap[0];
+ --size;
+ // Replacement object:
+ if (size >= TWO_HEAP_MAX_SIZE) {
+ final int last = size - TWO_HEAP_MAX_SIZE;
+ final double reinsert = fourheap[last];
+ fourheap[last] = 0.0;
+ heapifyDown(reinsert);
+ } else if (size > 0) {
+ final double reinsert = twoheap[size];
+ twoheap[size] = 0.0;
+ heapifyDown(reinsert);
+ } else {
+ twoheap[0] = 0.0;
+ }
+ ++modCount;
+ return ret;
+ }
+
+ /**
+ * Invoke heapify-down for the root object.
+ *
+ * @param reinsert Object to insert.
+ */
+ private void heapifyDown(double reinsert) {
+ if (size > TWO_HEAP_MAX_SIZE) {
+ // Special case: 3-ary situation.
+ final int best = (twoheap[1] <= twoheap[2]) ? 1 : 2;
+ if (fourheap[0] < twoheap[best]) {
+ twoheap[0] = fourheap[0];
+ heapifyDown4(0, reinsert);
+ } else {
+ twoheap[0] = twoheap[best];
+ heapifyDown2(best, reinsert);
+ }
+ return;
+ }
+ heapifyDown2(0, reinsert);
+ }
+
+ /**
+ * Heapify-Down for 2-ary heap.
+ *
+ * @param twopos Position in 2-ary heap.
+ * @param cur Current object
+ */
+ private void heapifyDown2(int twopos, double cur) {
+ final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1;
+ while (twopos < stop) {
+ int bestchild = (twopos << 1) + 1;
+ double best = twoheap[bestchild];
+ final int right = bestchild + 1;
+ if (right < size && best > twoheap[right]) {
+ bestchild = right;
+ best = twoheap[right];
+ }
+ if (cur <= best) {
+ break;
+ }
+ twoheap[twopos] = best;
+ twopos = bestchild;
+ }
+ twoheap[twopos] = cur;
}
/**
- * Compare two objects
+ * Heapify-Down for 4-ary heap.
*
- * @param o1 First object
- * @param o2 Second object
+ * @param fourpos Position in 4-ary heap.
+ * @param cur Current object
*/
+ private void heapifyDown4(int fourpos, double cur) {
+ final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2;
+ while (fourpos < stop) {
+ final int child = (fourpos << 2) + 1;
+ double best = fourheap[child];
+ int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE;
+ if (size > minsize) {
+ double nextchild = fourheap[candidate];
+ if (best > nextchild) {
+ bestchild = candidate;
+ best = nextchild;
+ }
+
+ minsize += 2;
+ if (size >= minsize) {
+ nextchild = fourheap[++candidate];
+ if (best > nextchild) {
+ bestchild = candidate;
+ best = nextchild;
+ }
+
+ if (size > minsize) {
+ nextchild = fourheap[++candidate];
+ if (best > nextchild) {
+ bestchild = candidate;
+ best = nextchild;
+ }
+ }
+ }
+ }
+ if (cur <= best) {
+ break;
+ }
+ fourheap[fourpos] = best;
+ fourpos = bestchild;
+ }
+ fourheap[fourpos] = cur;
+ }
+
+ @Override
+ public double peek() {
+ return twoheap[0];
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder buf = new StringBuilder();
+ buf.append(DoubleMinHeap.class.getSimpleName()).append(" [");
+ for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) {
+ buf.append(iter.get()).append(',');
+ }
+ buf.append(']');
+ return buf.toString();
+ }
+
@Override
- protected boolean comp(double o1, double o2) {
- return o1 > o2;
+ public UnsortedIter unsortedIter() {
+ return new UnsortedIter();
+ }
+
+ /**
+ * Unsorted iterator - in heap order. Does not poll the heap.
+ *
+ * Use this class as follows:
+ *
+ * <pre>
+ * {@code
+ * for (DoubleHeap.UnsortedIter iter = heap.unsortedIter(); iter.valid(); iter.next()) {
+ * doSomething(iter.get());
+ * }
+ * }
+ * </pre>
+ *
+ * @author Erich Schubert
+ */
+ private class UnsortedIter implements DoubleHeap.UnsortedIter {
+ /**
+ * Iterator position.
+ */
+ protected int pos = 0;
+
+ /**
+ * Modification counter we were initialized at.
+ */
+ protected final int myModCount = modCount;
+
+ @Override
+ public boolean valid() {
+ if (modCount != myModCount) {
+ throw new ConcurrentModificationException();
+ }
+ return pos < size;
+ }
+
+ @Override
+ public void advance() {
+ pos++;
+ }
+
+ @Override
+ public double get() {
+ return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]);
+ }
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjMaxHeap.java
deleted file mode 100644
index 8417309a..00000000
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjMaxHeap.java
+++ /dev/null
@@ -1,328 +0,0 @@
-package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
-
-/*
- This file is part of ELKI:
- Environment for Developing KDD-Applications Supported by Index-Structures
-
- Copyright (C) 2012
- Ludwig-Maximilians-Universität München
- Lehr- und Forschungseinheit für Datenbanksysteme
- ELKI Development Team
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-import java.util.Arrays;
-import java.util.Comparator;
-
-import de.lmu.ifi.dbs.elki.math.MathUtil;
-
-/**
- * Basic in-memory heap structure.
- *
- * This heap is built lazily: if you first add many elements, then poll the
- * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log
- * n). This is implemented via a simple validTo counter.
- *
- * @author Erich Schubert
- *
- * @param <V> value type
- */
-public class DoubleObjMaxHeap<V> {
- /**
- * Heap storage: keys
- */
- protected double[] keys;
-
- /**
- * Heap storage: values
- */
- protected Object[] values;
-
- /**
- * Current number of objects
- */
- protected int size = 0;
-
- /**
- * Indicate up to where the heap is valid
- */
- protected int validSize = 0;
-
- /**
- * (Structural) modification counter. Used to invalidate iterators.
- */
- public transient int modCount = 0;
-
- /**
- * Default initial capacity
- */
- private static final int DEFAULT_INITIAL_CAPACITY = 11;
-
- /**
- * Default constructor: default capacity, natural ordering.
- */
- public DoubleObjMaxHeap() {
- this(DEFAULT_INITIAL_CAPACITY);
- }
-
- /**
- * Constructor with initial capacity and {@link Comparator}.
- *
- * @param size initial capacity
- */
- public DoubleObjMaxHeap(int size) {
- super();
- this.size = 0;
- this.keys = new double[size];
- this.values = new Object[size];
- }
-
- /**
- * Add a key-value pair to the heap
- *
- * @param key Key
- * @param val Value
- * @return Success code
- */
- public boolean add(double key, V val) {
- // resize when needed
- if(size + 1 > keys.length) {
- resize(size + 1);
- }
- // final int pos = size;
- this.keys[size] = key;
- this.values[size] = val;
- this.size += 1;
- heapifyUp(size - 1, key, val);
- validSize += 1;
- // We have changed - return true according to {@link Collection#put}
- modCount++;
- return true;
- }
-
- /**
- * Get the current top key
- *
- * @return Top key
- */
- public double peekKey() {
- if(size == 0) {
- throw new ArrayIndexOutOfBoundsException("Peek() on an empty heap!");
- }
- ensureValid();
- return keys[0];
- }
-
- /**
- * Get the current top value
- *
- * @return Value
- */
- @SuppressWarnings("unchecked")
- public V peekValue() {
- if(size == 0) {
- throw new ArrayIndexOutOfBoundsException("Peek() on an empty heap!");
- }
- ensureValid();
- return (V) values[0];
- }
-
- /**
- * Remove the first element
- */
- public void poll() {
- removeAt(0);
- }
-
- /**
- * Repair the heap
- */
- protected void ensureValid() {
- if(validSize != size) {
- if(size > 1) {
- // Parent of first invalid
- int nextmin = validSize > 0 ? ((validSize - 1) >>> 1) : 0;
- int curmin = MathUtil.nextAllOnesInt(nextmin); // Next line
- int nextmax = curmin - 1; // End of valid line
- int pos = (size - 2) >>> 1; // Parent of last element
- // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin+", "+nextmin);
- while(pos >= nextmin) {
- // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin);
- while(pos >= curmin) {
- if(!heapifyDown(pos, keys[pos], values[pos])) {
- final int parent = (pos - 1) >>> 1;
- if(parent < curmin) {
- nextmin = Math.min(nextmin, parent);
- nextmax = Math.max(nextmax, parent);
- }
- }
- pos--;
- }
- curmin = nextmin;
- pos = Math.min(pos, nextmax);
- nextmax = -1;
- }
- }
- validSize = size;
- }
- }
-
- /**
- * Remove the element at the given position.
- *
- * @param pos Element position.
- */
- protected void removeAt(int pos) {
- if(pos < 0 || pos >= size) {
- return;
- }
- // Replacement object:
- final double reinkey = keys[size - 1];
- final Object reinval = values[size - 1];
- values[size - 1] = null;
- // Keep heap in sync
- if(validSize == size) {
- size -= 1;
- validSize -= 1;
- heapifyDown(pos, reinkey, reinval);
- }
- else {
- size -= 1;
- validSize = Math.min(pos >>> 1, validSize);
- keys[pos] = reinkey;
- values[pos] = reinval;
- }
- modCount++;
- }
-
- /**
- * Execute a "Heapify Upwards" aka "SiftUp". Used in insertions.
- *
- * @param pos insertion position
- * @param curkey Current key
- * @param curval Current value
- */
- protected void heapifyUp(int pos, double curkey, Object curval) {
- while(pos > 0) {
- final int parent = (pos - 1) >>> 1;
- double parkey = keys[parent];
-
- if(curkey <= parkey) { // Compare
- break;
- }
- keys[pos] = parkey;
- values[pos] = values[parent];
- pos = parent;
- }
- keys[pos] = curkey;
- values[pos] = curval;
- }
-
- /**
- * Execute a "Heapify Downwards" aka "SiftDown". Used in deletions.
- *
- * @param ipos re-insertion position
- * @param curkey Current key
- * @param curval Current value
- * @return true when the order was changed
- */
- protected boolean heapifyDown(final int ipos, double curkey, Object curval) {
- int pos = ipos;
- final int half = size >>> 1;
- while(pos < half) {
- // Get left child (must exist!)
- int cpos = (pos << 1) + 1;
- double chikey = keys[cpos];
- Object chival = values[cpos];
- // Test right child, if present
- final int rchild = cpos + 1;
- if(rchild < size) {
- double right = keys[rchild];
- if(chikey < right) { // Compare
- cpos = rchild;
- chikey = right;
- chival = values[rchild];
- }
- }
-
- if(curkey >= chikey) { // Compare
- break;
- }
- keys[pos] = chikey;
- values[pos] = chival;
- pos = cpos;
- }
- keys[pos] = curkey;
- values[pos] = curval;
- return (pos == ipos);
- }
-
- /**
- * Query the size
- *
- * @return Size
- */
- public int size() {
- return this.size;
- }
-
- /**
- * Test whether we need to resize to have the requested capacity.
- *
- * @param requiredSize required capacity
- */
- protected final void resize(int requiredSize) {
- // Double until 64, then increase by 50% each time.
- int newCapacity = ((keys.length < 64) ? ((keys.length + 1) << 1) : ((keys.length >> 1) * 3));
- // overflow?
- if(newCapacity < 0) {
- throw new OutOfMemoryError();
- }
- if(requiredSize > newCapacity) {
- newCapacity = requiredSize;
- }
- keys = Arrays.copyOf(keys, newCapacity);
- values = Arrays.copyOf(values, newCapacity);
- }
-
- /**
- * Delete all elements from the heap.
- */
- public void clear() {
- // clean up references in the array for memory management
- Arrays.fill(values, null);
- this.size = 0;
- this.validSize = -1;
- modCount++;
- }
-
- /**
- * Test whether the heap is still valid.
- *
- * Debug method.
- *
- * @return {@code null} when the heap is correct
- */
- protected String checkHeap() {
- ensureValid();
- for(int i = 1; i < size; i++) {
- final int parent = (i - 1) >>> 1;
- if(keys[parent] < keys[i]) { // Compare
- return "@" + parent + ": " + keys[parent] + " < @" + i + ": " + keys[i];
- }
- }
- return null;
- }
-}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjMinHeap.java
deleted file mode 100644
index 244277e8..00000000
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjMinHeap.java
+++ /dev/null
@@ -1,328 +0,0 @@
-package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
-
-/*
- This file is part of ELKI:
- Environment for Developing KDD-Applications Supported by Index-Structures
-
- Copyright (C) 2012
- Ludwig-Maximilians-Universität München
- Lehr- und Forschungseinheit für Datenbanksysteme
- ELKI Development Team
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-import java.util.Arrays;
-import java.util.Comparator;
-
-import de.lmu.ifi.dbs.elki.math.MathUtil;
-
-/**
- * Basic in-memory heap structure.
- *
- * This heap is built lazily: if you first add many elements, then poll the
- * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log
- * n). This is implemented via a simple validTo counter.
- *
- * @author Erich Schubert
- *
- * @param <V> value type
- */
-public class DoubleObjMinHeap<V> {
- /**
- * Heap storage: keys
- */
- protected double[] keys;
-
- /**
- * Heap storage: values
- */
- protected Object[] values;
-
- /**
- * Current number of objects
- */
- protected int size = 0;
-
- /**
- * Indicate up to where the heap is valid
- */
- protected int validSize = 0;
-
- /**
- * (Structural) modification counter. Used to invalidate iterators.
- */
- public transient int modCount = 0;
-
- /**
- * Default initial capacity
- */
- private static final int DEFAULT_INITIAL_CAPACITY = 11;
-
- /**
- * Default constructor: default capacity, natural ordering.
- */
- public DoubleObjMinHeap() {
- this(DEFAULT_INITIAL_CAPACITY);
- }
-
- /**
- * Constructor with initial capacity and {@link Comparator}.
- *
- * @param size initial capacity
- */
- public DoubleObjMinHeap(int size) {
- super();
- this.size = 0;
- this.keys = new double[size];
- this.values = new Object[size];
- }
-
- /**
- * Add a key-value pair to the heap
- *
- * @param key Key
- * @param val Value
- * @return Success code
- */
- public boolean add(double key, V val) {
- // resize when needed
- if(size + 1 > keys.length) {
- resize(size + 1);
- }
- // final int pos = size;
- this.keys[size] = key;
- this.values[size] = val;
- this.size += 1;
- heapifyUp(size - 1, key, val);
- validSize += 1;
- // We have changed - return true according to {@link Collection#put}
- modCount++;
- return true;
- }
-
- /**
- * Get the current top key
- *
- * @return Top key
- */
- public double peekKey() {
- if(size == 0) {
- throw new ArrayIndexOutOfBoundsException("Peek() on an empty heap!");
- }
- ensureValid();
- return keys[0];
- }
-
- /**
- * Get the current top value
- *
- * @return Value
- */
- @SuppressWarnings("unchecked")
- public V peekValue() {
- if(size == 0) {
- throw new ArrayIndexOutOfBoundsException("Peek() on an empty heap!");
- }
- ensureValid();
- return (V) values[0];
- }
-
- /**
- * Remove the first element
- */
- public void poll() {
- removeAt(0);
- }
-
- /**
- * Repair the heap
- */
- protected void ensureValid() {
- if(validSize != size) {
- if(size > 1) {
- // Parent of first invalid
- int nextmin = validSize > 0 ? ((validSize - 1) >>> 1) : 0;
- int curmin = MathUtil.nextAllOnesInt(nextmin); // Next line
- int nextmax = curmin - 1; // End of valid line
- int pos = (size - 2) >>> 1; // Parent of last element
- // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin+", "+nextmin);
- while(pos >= nextmin) {
- // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin);
- while(pos >= curmin) {
- if(!heapifyDown(pos, keys[pos], values[pos])) {
- final int parent = (pos - 1) >>> 1;
- if(parent < curmin) {
- nextmin = Math.min(nextmin, parent);
- nextmax = Math.max(nextmax, parent);
- }
- }
- pos--;
- }
- curmin = nextmin;
- pos = Math.min(pos, nextmax);
- nextmax = -1;
- }
- }
- validSize = size;
- }
- }
-
- /**
- * Remove the element at the given position.
- *
- * @param pos Element position.
- */
- protected void removeAt(int pos) {
- if(pos < 0 || pos >= size) {
- return;
- }
- // Replacement object:
- final double reinkey = keys[size - 1];
- final Object reinval = values[size - 1];
- values[size - 1] = null;
- // Keep heap in sync
- if(validSize == size) {
- size -= 1;
- validSize -= 1;
- heapifyDown(pos, reinkey, reinval);
- }
- else {
- size -= 1;
- validSize = Math.min(pos >>> 1, validSize);
- keys[pos] = reinkey;
- values[pos] = reinval;
- }
- modCount++;
- }
-
- /**
- * Execute a "Heapify Upwards" aka "SiftUp". Used in insertions.
- *
- * @param pos insertion position
- * @param curkey Current key
- * @param curval Current value
- */
- protected void heapifyUp(int pos, double curkey, Object curval) {
- while(pos > 0) {
- final int parent = (pos - 1) >>> 1;
- double parkey = keys[parent];
-
- if(curkey >= parkey) { // Compare
- break;
- }
- keys[pos] = parkey;
- values[pos] = values[parent];
- pos = parent;
- }
- keys[pos] = curkey;
- values[pos] = curval;
- }
-
- /**
- * Execute a "Heapify Downwards" aka "SiftDown". Used in deletions.
- *
- * @param ipos re-insertion position
- * @param curkey Current key
- * @param curval Current value
- * @return true when the order was changed
- */
- protected boolean heapifyDown(final int ipos, double curkey, Object curval) {
- int pos = ipos;
- final int half = size >>> 1;
- while(pos < half) {
- // Get left child (must exist!)
- int cpos = (pos << 1) + 1;
- double chikey = keys[cpos];
- Object chival = values[cpos];
- // Test right child, if present
- final int rchild = cpos + 1;
- if(rchild < size) {
- double right = keys[rchild];
- if(chikey > right) { // Compare
- cpos = rchild;
- chikey = right;
- chival = values[rchild];
- }
- }
-
- if(curkey <= chikey) { // Compare
- break;
- }
- keys[pos] = chikey;
- values[pos] = chival;
- pos = cpos;
- }
- keys[pos] = curkey;
- values[pos] = curval;
- return (pos == ipos);
- }
-
- /**
- * Query the size
- *
- * @return Size
- */
- public int size() {
- return this.size;
- }
-
- /**
- * Test whether we need to resize to have the requested capacity.
- *
- * @param requiredSize required capacity
- */
- protected final void resize(int requiredSize) {
- // Double until 64, then increase by 50% each time.
- int newCapacity = ((keys.length < 64) ? ((keys.length + 1) << 1) : ((keys.length >> 1) * 3));
- // overflow?
- if(newCapacity < 0) {
- throw new OutOfMemoryError();
- }
- if(requiredSize > newCapacity) {
- newCapacity = requiredSize;
- }
- keys = Arrays.copyOf(keys, newCapacity);
- values = Arrays.copyOf(values, newCapacity);
- }
-
- /**
- * Delete all elements from the heap.
- */
- public void clear() {
- // clean up references in the array for memory management
- Arrays.fill(values, null);
- this.size = 0;
- this.validSize = -1;
- modCount++;
- }
-
- /**
- * Test whether the heap is still valid.
- *
- * Debug method.
- *
- * @return {@code null} when the heap is correct
- */
- protected String checkHeap() {
- ensureValid();
- for(int i = 1; i < size; i++) {
- final int parent = (i - 1) >>> 1;
- if(keys[parent] > keys[i]) { // Compare
- return "@" + parent + ": " + keys[parent] + " < @" + i + ": " + keys[i];
- }
- }
- return null;
- }
-}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectHeap.java
new file mode 100644
index 00000000..db65ce81
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectHeap.java
@@ -0,0 +1,129 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.utilities.datastructures.iterator.Iter;
+
+/**
+ * Basic in-memory heap interface, for double keys and V values.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.has UnsortedIter
+ * @param <V> Value type
+ */
+public interface DoubleObjectHeap<V> {
+ /**
+ * Add a key-value pair to the heap
+ *
+ * @param key Key
+ * @param val Value
+ */
+ void add(double key, V val);
+
+ /**
+ * Add a key-value pair to the heap if it improves the top.
+ *
+ * @param key Key
+ * @param val Value
+ * @param k Desired maximum size
+ */
+ void add(double key, V val, int k);
+
+ /**
+ * Combined operation that removes the top element, and inserts a new element
+ * instead.
+ *
+ * @param key Key of new element
+ * @param val Value of new element
+ */
+ void replaceTopElement(double key, V val);
+
+ /**
+ * Get the current top key
+ *
+ * @return Top key
+ */
+ double peekKey();
+
+ /**
+ * Get the current top value
+ *
+ * @return Value
+ */
+ V peekValue();
+
+ /**
+ * Remove the first element
+ */
+ void poll();
+
+ /**
+ * Clear the heap contents.
+ */
+ void clear();
+
+ /**
+ * Query the size
+ *
+ * @return Size
+ */
+ public int size();
+
+ /**
+ * Is the heap empty?
+ *
+ * @return {@code true} when the size is 0.
+ */
+ public boolean isEmpty();
+
+ /**
+ * Get an unsorted iterator to inspect the heap.
+ *
+ * @return Iterator
+ */
+ UnsortedIter<V> unsortedIter();
+
+ /**
+ * Unsorted iterator - in heap order. Does not poll the heap.
+ *
+ * @author Erich Schubert
+ * @param <V> Value type
+ */
+ public static interface UnsortedIter<V> extends Iter {
+ /**
+ * Get the current key
+ *
+ * @return Current key
+ */
+ double getKey();
+
+ /**
+ * Get the current value
+ *
+ * @return Current value
+ */
+ V getValue();
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectMaxHeap.java
new file mode 100644
index 00000000..dd89573c
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectMaxHeap.java
@@ -0,0 +1,482 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.Arrays;
+import java.util.ConcurrentModificationException;
+
+import de.lmu.ifi.dbs.elki.math.MathUtil;
+
+/**
+ * Advanced priority queue class, based on a binary heap (for small sizes),
+ * which will for larger heaps be accompanied by a 4-ary heap (attached below
+ * the root of the two-ary heap, making the root actually 3-ary).
+ *
+ * This code was automatically instantiated for the types: Double and Object
+ *
+ * This combination was found to work quite well in benchmarks, but YMMV.
+ *
+ * Some other observations from benchmarking:
+ * <ul>
+ * <li>Bulk loading did not improve things</li>
+ * <li>Primitive heaps are substantially faster.</li>
+ * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and
+ * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li>
+ * <li>Workload makes a huge difference. A load-once, poll-until-empty priority
+ * queue is something different than e.g. a top-k heap, which will see a lot of
+ * top element replacements.</li>
+ * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for
+ * top-k make a difference.</li>
+ * <li>Different day, different benchmark results ...</li>
+ * </ul>
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.has UnsortedIter
+ * @param <V> Value type
+ */
+public class DoubleObjectMaxHeap<V> implements DoubleObjectHeap<V> {
+ /**
+ * Base heap.
+ */
+ protected double[] twoheap;
+
+ /**
+ * Base heap values.
+ */
+ protected Object[] twovals;
+
+ /**
+ * Extension heap.
+ */
+ protected double[] fourheap;
+
+ /**
+ * Extension heapvalues.
+ */
+ protected Object[] fourvals;
+
+ /**
+ * Current size of heap.
+ */
+ protected int size;
+
+ /**
+ * (Structural) modification counter. Used to invalidate iterators.
+ */
+ protected int modCount = 0;
+
+ /**
+ * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements.
+ */
+ private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1;
+
+ /**
+ * Initial size of the 2-ary heap.
+ */
+ private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1;
+
+ /**
+ * Initial size of 4-ary heap when initialized.
+ *
+ * 21 = 4-ary heap of height 2: 1 + 4 + 4*4
+ *
+ * 85 = 4-ary heap of height 3: 21 + 4*4*4
+ *
+ * 341 = 4-ary heap of height 4: 85 + 4*4*4*4
+ *
+ * Since we last grew by 255 (to 511), let's use 341.
+ */
+ private final static int FOUR_HEAP_INITIAL_SIZE = 341;
+
+ /**
+ * Constructor, with default size.
+ */
+ public DoubleObjectMaxHeap() {
+ super();
+ double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE];
+ Object[] twovals = new Object[TWO_HEAP_INITIAL_SIZE];
+
+ this.twoheap = twoheap;
+ this.twovals = twovals;
+ this.fourheap = null;
+ this.fourvals = null;
+ this.size = 0;
+ this.modCount = 0;
+ }
+
+ /**
+ * Constructor, with given minimum size.
+ *
+ * @param minsize Minimum size
+ */
+ public DoubleObjectMaxHeap(int minsize) {
+ super();
+ if (minsize < TWO_HEAP_MAX_SIZE) {
+ final int size = MathUtil.nextPow2Int(minsize + 1) - 1;
+ double[] twoheap = new double[size];
+ Object[] twovals = new Object[size];
+
+ this.twoheap = twoheap;
+ this.twovals = twovals;
+ this.fourheap = null;
+ this.fourvals = null;
+ } else {
+ double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE];
+ Object[] twovals = new Object[TWO_HEAP_INITIAL_SIZE];
+ double[] fourheap = new double[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)];
+ Object[] fourvals = new Object[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)];
+ this.twoheap = twoheap;
+ this.twovals = twovals;
+ this.fourheap = fourheap;
+ this.fourvals = fourvals;
+ }
+ this.size = 0;
+ this.modCount = 0;
+ }
+
+ @Override
+ public void clear() {
+ size = 0;
+ ++modCount;
+ fourheap = null;
+ fourvals = null;
+ Arrays.fill(twoheap, 0.0);
+ Arrays.fill(twovals, null);
+ }
+
+ @Override
+ public int size() {
+ return size;
+ }
+
+ @Override
+ public boolean isEmpty() {
+ return (size == 0);
+ }
+
+ @Override
+ public void add(double o, V v) {
+ final double co = o;
+ final Object cv = (Object)v;
+ // System.err.println("Add: " + o);
+ if (size < TWO_HEAP_MAX_SIZE) {
+ if (size >= twoheap.length) {
+ // Grow by one layer.
+ twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1);
+ twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1);
+ }
+ final int twopos = size;
+ twoheap[twopos] = co;
+ twovals[twopos] = cv;
+ ++size;
+ heapifyUp2(twopos, co, cv);
+ ++modCount;
+ } else {
+ final int fourpos = size - TWO_HEAP_MAX_SIZE;
+ if (fourheap == null) {
+ fourheap = new double[FOUR_HEAP_INITIAL_SIZE];
+ fourvals = new Object[FOUR_HEAP_INITIAL_SIZE];
+ } else if (fourpos >= fourheap.length) {
+ // Grow extension heap by half.
+ fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1));
+ fourvals = Arrays.copyOf(fourvals, fourvals.length + (fourvals.length >> 1));
+ }
+ fourheap[fourpos] = co;
+ fourvals[fourpos] = cv;
+ ++size;
+ heapifyUp4(fourpos, co, cv);
+ ++modCount;
+ }
+ }
+
+ @Override
+ public void add(double key, V val, int max) {
+ if (size < max) {
+ add(key, val);
+ } else if (twoheap[0] >= key) {
+ replaceTopElement(key, val);
+ }
+ }
+
+ @Override
+ public void replaceTopElement(double reinsert, V val) {
+ heapifyDown(reinsert, (Object)val);
+ ++modCount;
+ }
+
+ /**
+ * Heapify-Up method for 2-ary heap.
+ *
+ * @param twopos Position in 2-ary heap.
+ * @param cur Current object
+ * @param val Current value
+ */
+ private void heapifyUp2(int twopos, double cur, Object val) {
+ while (twopos > 0) {
+ final int parent = (twopos - 1) >>> 1;
+ double par = twoheap[parent];
+ if (cur <= par) {
+ break;
+ }
+ twoheap[twopos] = par;
+ twovals[twopos] = twovals[parent];
+ twopos = parent;
+ }
+ twoheap[twopos] = cur;
+ twovals[twopos] = val;
+ }
+
+ /**
+ * Heapify-Up method for 4-ary heap.
+ *
+ * @param fourpos Position in 4-ary heap.
+ * @param cur Current object
+ * @param val Current value
+ */
+ private void heapifyUp4(int fourpos, double cur, Object val) {
+ while (fourpos > 0) {
+ final int parent = (fourpos - 1) >> 2;
+ double par = fourheap[parent];
+ if (cur <= par) {
+ break;
+ }
+ fourheap[fourpos] = par;
+ fourvals[fourpos] = fourvals[parent];
+ fourpos = parent;
+ }
+ if (fourpos == 0 && twoheap[0] < cur) {
+ fourheap[0] = twoheap[0];
+ fourvals[0] = twovals[0];
+ twoheap[0] = cur;
+ twovals[0] = val;
+ } else {
+ fourheap[fourpos] = cur;
+ fourvals[fourpos] = val;
+ }
+ }
+
+ @Override
+ public void poll() {
+ --size;
+ // Replacement object:
+ if (size >= TWO_HEAP_MAX_SIZE) {
+ final int last = size - TWO_HEAP_MAX_SIZE;
+ final double reinsert = fourheap[last];
+ final Object reinsertv = fourvals[last];
+ fourheap[last] = 0.0;
+ fourvals[last] = null;
+ heapifyDown(reinsert, reinsertv);
+ } else if (size > 0) {
+ final double reinsert = twoheap[size];
+ final Object reinsertv = twovals[size];
+ twoheap[size] = 0.0;
+ twovals[size] = null;
+ heapifyDown(reinsert, reinsertv);
+ } else {
+ twoheap[0] = 0.0;
+ twovals[0] = null;
+ }
+ ++modCount;
+ }
+
+ /**
+ * Invoke heapify-down for the root object.
+ *
+ * @param reinsert Object to insert.
+ * @param val Value to reinsert.
+ */
+ private void heapifyDown(double reinsert, Object val) {
+ if (size > TWO_HEAP_MAX_SIZE) {
+ // Special case: 3-ary situation.
+ final int best = (twoheap[1] >= twoheap[2]) ? 1 : 2;
+ if (fourheap[0] > twoheap[best]) {
+ twoheap[0] = fourheap[0];
+ twovals[0] = fourvals[0];
+ heapifyDown4(0, reinsert, val);
+ } else {
+ twoheap[0] = twoheap[best];
+ twovals[0] = twovals[best];
+ heapifyDown2(best, reinsert, val);
+ }
+ return;
+ }
+ heapifyDown2(0, reinsert, val);
+ }
+
+ /**
+ * Heapify-Down for 2-ary heap.
+ *
+ * @param twopos Position in 2-ary heap.
+ * @param cur Current object
+ * @param val Value to reinsert.
+ */
+ private void heapifyDown2(int twopos, double cur, Object val) {
+ final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1;
+ while (twopos < stop) {
+ int bestchild = (twopos << 1) + 1;
+ double best = twoheap[bestchild];
+ final int right = bestchild + 1;
+ if (right < size && best < twoheap[right]) {
+ bestchild = right;
+ best = twoheap[right];
+ }
+ if (cur >= best) {
+ break;
+ }
+ twoheap[twopos] = best;
+ twovals[twopos] = twovals[bestchild];
+ twopos = bestchild;
+ }
+ twoheap[twopos] = cur;
+ twovals[twopos] = val;
+ }
+
+ /**
+ * Heapify-Down for 4-ary heap.
+ *
+ * @param fourpos Position in 4-ary heap.
+ * @param cur Current object
+ * @param val Value to reinsert.
+ */
+ private void heapifyDown4(int fourpos, double cur, Object val) {
+ final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2;
+ while (fourpos < stop) {
+ final int child = (fourpos << 2) + 1;
+ double best = fourheap[child];
+ int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE;
+ if (size > minsize) {
+ double nextchild = fourheap[candidate];
+ if (best < nextchild) {
+ bestchild = candidate;
+ best = nextchild;
+ }
+
+ minsize += 2;
+ if (size >= minsize) {
+ nextchild = fourheap[++candidate];
+ if (best < nextchild) {
+ bestchild = candidate;
+ best = nextchild;
+ }
+
+ if (size > minsize) {
+ nextchild = fourheap[++candidate];
+ if (best < nextchild) {
+ bestchild = candidate;
+ best = nextchild;
+ }
+ }
+ }
+ }
+ if (cur >= best) {
+ break;
+ }
+ fourheap[fourpos] = best;
+ fourvals[fourpos] = fourvals[bestchild];
+ fourpos = bestchild;
+ }
+ fourheap[fourpos] = cur;
+ fourvals[fourpos] = val;
+ }
+
+ @Override
+ public double peekKey() {
+ return twoheap[0];
+ }
+
+ @Override
+ @SuppressWarnings("unchecked")
+ public V peekValue() {
+ return (V)twovals[0];
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder buf = new StringBuilder();
+ buf.append(DoubleObjectMaxHeap.class.getSimpleName()).append(" [");
+ for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) {
+ buf.append(iter.getKey()).append(':').append(iter.getValue()).append(',');
+ }
+ buf.append(']');
+ return buf.toString();
+ }
+
+ @Override
+ public UnsortedIter unsortedIter() {
+ return new UnsortedIter();
+ }
+
+ /**
+ * Unsorted iterator - in heap order. Does not poll the heap.
+ *
+ * Use this class as follows:
+ *
+ * <pre>
+ * {@code
+ * for (DoubleObjectHeap.UnsortedIter<V> iter = heap.unsortedIter(); iter.valid(); iter.next()) {
+ * doSomething(iter.get());
+ * }
+ * }
+ * </pre>
+ *
+ * @author Erich Schubert
+ */
+ private class UnsortedIter implements DoubleObjectHeap.UnsortedIter<V> {
+ /**
+ * Iterator position.
+ */
+ protected int pos = 0;
+
+ /**
+ * Modification counter we were initialized at.
+ */
+ protected final int myModCount = modCount;
+
+ @Override
+ public boolean valid() {
+ if (modCount != myModCount) {
+ throw new ConcurrentModificationException();
+ }
+ return pos < size;
+ }
+
+ @Override
+ public void advance() {
+ pos++;
+ }
+
+ @Override
+ public double getKey() {
+ return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]);
+ }
+
+ @SuppressWarnings("unchecked")
+
+ @Override
+ public V getValue() {
+ return (V)((pos < TWO_HEAP_MAX_SIZE) ? twovals[pos] : fourvals[pos - TWO_HEAP_MAX_SIZE]);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectMinHeap.java
new file mode 100644
index 00000000..905cdedb
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectMinHeap.java
@@ -0,0 +1,482 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.Arrays;
+import java.util.ConcurrentModificationException;
+
+import de.lmu.ifi.dbs.elki.math.MathUtil;
+
+/**
+ * Advanced priority queue class, based on a binary heap (for small sizes),
+ * which will for larger heaps be accompanied by a 4-ary heap (attached below
+ * the root of the two-ary heap, making the root actually 3-ary).
+ *
+ * This code was automatically instantiated for the types: Double and Object
+ *
+ * This combination was found to work quite well in benchmarks, but YMMV.
+ *
+ * Some other observations from benchmarking:
+ * <ul>
+ * <li>Bulk loading did not improve things</li>
+ * <li>Primitive heaps are substantially faster.</li>
+ * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and
+ * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li>
+ * <li>Workload makes a huge difference. A load-once, poll-until-empty priority
+ * queue is something different than e.g. a top-k heap, which will see a lot of
+ * top element replacements.</li>
+ * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for
+ * top-k make a difference.</li>
+ * <li>Different day, different benchmark results ...</li>
+ * </ul>
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.has UnsortedIter
+ * @param <V> Value type
+ */
+public class DoubleObjectMinHeap<V> implements DoubleObjectHeap<V> {
+ /**
+ * Base heap.
+ */
+ protected double[] twoheap;
+
+ /**
+ * Base heap values.
+ */
+ protected Object[] twovals;
+
+ /**
+ * Extension heap.
+ */
+ protected double[] fourheap;
+
+ /**
+ * Extension heapvalues.
+ */
+ protected Object[] fourvals;
+
+ /**
+ * Current size of heap.
+ */
+ protected int size;
+
+ /**
+ * (Structural) modification counter. Used to invalidate iterators.
+ */
+ protected int modCount = 0;
+
+ /**
+ * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements.
+ */
+ private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1;
+
+ /**
+ * Initial size of the 2-ary heap.
+ */
+ private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1;
+
+ /**
+ * Initial size of 4-ary heap when initialized.
+ *
+ * 21 = 4-ary heap of height 2: 1 + 4 + 4*4
+ *
+ * 85 = 4-ary heap of height 3: 21 + 4*4*4
+ *
+ * 341 = 4-ary heap of height 4: 85 + 4*4*4*4
+ *
+ * Since we last grew by 255 (to 511), let's use 341.
+ */
+ private final static int FOUR_HEAP_INITIAL_SIZE = 341;
+
+ /**
+ * Constructor, with default size.
+ */
+ public DoubleObjectMinHeap() {
+ super();
+ double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE];
+ Object[] twovals = new Object[TWO_HEAP_INITIAL_SIZE];
+
+ this.twoheap = twoheap;
+ this.twovals = twovals;
+ this.fourheap = null;
+ this.fourvals = null;
+ this.size = 0;
+ this.modCount = 0;
+ }
+
+ /**
+ * Constructor, with given minimum size.
+ *
+ * @param minsize Minimum size
+ */
+ public DoubleObjectMinHeap(int minsize) {
+ super();
+ if (minsize < TWO_HEAP_MAX_SIZE) {
+ final int size = MathUtil.nextPow2Int(minsize + 1) - 1;
+ double[] twoheap = new double[size];
+ Object[] twovals = new Object[size];
+
+ this.twoheap = twoheap;
+ this.twovals = twovals;
+ this.fourheap = null;
+ this.fourvals = null;
+ } else {
+ double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE];
+ Object[] twovals = new Object[TWO_HEAP_INITIAL_SIZE];
+ double[] fourheap = new double[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)];
+ Object[] fourvals = new Object[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)];
+ this.twoheap = twoheap;
+ this.twovals = twovals;
+ this.fourheap = fourheap;
+ this.fourvals = fourvals;
+ }
+ this.size = 0;
+ this.modCount = 0;
+ }
+
+ @Override
+ public void clear() {
+ size = 0;
+ ++modCount;
+ fourheap = null;
+ fourvals = null;
+ Arrays.fill(twoheap, 0.0);
+ Arrays.fill(twovals, null);
+ }
+
+ @Override
+ public int size() {
+ return size;
+ }
+
+ @Override
+ public boolean isEmpty() {
+ return (size == 0);
+ }
+
+ @Override
+ public void add(double o, V v) {
+ final double co = o;
+ final Object cv = (Object)v;
+ // System.err.println("Add: " + o);
+ if (size < TWO_HEAP_MAX_SIZE) {
+ if (size >= twoheap.length) {
+ // Grow by one layer.
+ twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1);
+ twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1);
+ }
+ final int twopos = size;
+ twoheap[twopos] = co;
+ twovals[twopos] = cv;
+ ++size;
+ heapifyUp2(twopos, co, cv);
+ ++modCount;
+ } else {
+ final int fourpos = size - TWO_HEAP_MAX_SIZE;
+ if (fourheap == null) {
+ fourheap = new double[FOUR_HEAP_INITIAL_SIZE];
+ fourvals = new Object[FOUR_HEAP_INITIAL_SIZE];
+ } else if (fourpos >= fourheap.length) {
+ // Grow extension heap by half.
+ fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1));
+ fourvals = Arrays.copyOf(fourvals, fourvals.length + (fourvals.length >> 1));
+ }
+ fourheap[fourpos] = co;
+ fourvals[fourpos] = cv;
+ ++size;
+ heapifyUp4(fourpos, co, cv);
+ ++modCount;
+ }
+ }
+
+ @Override
+ public void add(double key, V val, int max) {
+ if (size < max) {
+ add(key, val);
+ } else if (twoheap[0] <= key) {
+ replaceTopElement(key, val);
+ }
+ }
+
+ @Override
+ public void replaceTopElement(double reinsert, V val) {
+ heapifyDown(reinsert, (Object)val);
+ ++modCount;
+ }
+
+ /**
+ * Heapify-Up method for 2-ary heap.
+ *
+ * @param twopos Position in 2-ary heap.
+ * @param cur Current object
+ * @param val Current value
+ */
+ private void heapifyUp2(int twopos, double cur, Object val) {
+ while (twopos > 0) {
+ final int parent = (twopos - 1) >>> 1;
+ double par = twoheap[parent];
+ if (cur >= par) {
+ break;
+ }
+ twoheap[twopos] = par;
+ twovals[twopos] = twovals[parent];
+ twopos = parent;
+ }
+ twoheap[twopos] = cur;
+ twovals[twopos] = val;
+ }
+
+ /**
+ * Heapify-Up method for 4-ary heap.
+ *
+ * @param fourpos Position in 4-ary heap.
+ * @param cur Current object
+ * @param val Current value
+ */
+ private void heapifyUp4(int fourpos, double cur, Object val) {
+ while (fourpos > 0) {
+ final int parent = (fourpos - 1) >> 2;
+ double par = fourheap[parent];
+ if (cur >= par) {
+ break;
+ }
+ fourheap[fourpos] = par;
+ fourvals[fourpos] = fourvals[parent];
+ fourpos = parent;
+ }
+ if (fourpos == 0 && twoheap[0] > cur) {
+ fourheap[0] = twoheap[0];
+ fourvals[0] = twovals[0];
+ twoheap[0] = cur;
+ twovals[0] = val;
+ } else {
+ fourheap[fourpos] = cur;
+ fourvals[fourpos] = val;
+ }
+ }
+
+ @Override
+ public void poll() {
+ --size;
+ // Replacement object:
+ if (size >= TWO_HEAP_MAX_SIZE) {
+ final int last = size - TWO_HEAP_MAX_SIZE;
+ final double reinsert = fourheap[last];
+ final Object reinsertv = fourvals[last];
+ fourheap[last] = 0.0;
+ fourvals[last] = null;
+ heapifyDown(reinsert, reinsertv);
+ } else if (size > 0) {
+ final double reinsert = twoheap[size];
+ final Object reinsertv = twovals[size];
+ twoheap[size] = 0.0;
+ twovals[size] = null;
+ heapifyDown(reinsert, reinsertv);
+ } else {
+ twoheap[0] = 0.0;
+ twovals[0] = null;
+ }
+ ++modCount;
+ }
+
+ /**
+ * Invoke heapify-down for the root object.
+ *
+ * @param reinsert Object to insert.
+ * @param val Value to reinsert.
+ */
+ private void heapifyDown(double reinsert, Object val) {
+ if (size > TWO_HEAP_MAX_SIZE) {
+ // Special case: 3-ary situation.
+ final int best = (twoheap[1] <= twoheap[2]) ? 1 : 2;
+ if (fourheap[0] < twoheap[best]) {
+ twoheap[0] = fourheap[0];
+ twovals[0] = fourvals[0];
+ heapifyDown4(0, reinsert, val);
+ } else {
+ twoheap[0] = twoheap[best];
+ twovals[0] = twovals[best];
+ heapifyDown2(best, reinsert, val);
+ }
+ return;
+ }
+ heapifyDown2(0, reinsert, val);
+ }
+
+ /**
+ * Heapify-Down for 2-ary heap.
+ *
+ * @param twopos Position in 2-ary heap.
+ * @param cur Current object
+ * @param val Value to reinsert.
+ */
+ private void heapifyDown2(int twopos, double cur, Object val) {
+ final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1;
+ while (twopos < stop) {
+ int bestchild = (twopos << 1) + 1;
+ double best = twoheap[bestchild];
+ final int right = bestchild + 1;
+ if (right < size && best > twoheap[right]) {
+ bestchild = right;
+ best = twoheap[right];
+ }
+ if (cur <= best) {
+ break;
+ }
+ twoheap[twopos] = best;
+ twovals[twopos] = twovals[bestchild];
+ twopos = bestchild;
+ }
+ twoheap[twopos] = cur;
+ twovals[twopos] = val;
+ }
+
+ /**
+ * Heapify-Down for 4-ary heap.
+ *
+ * @param fourpos Position in 4-ary heap.
+ * @param cur Current object
+ * @param val Value to reinsert.
+ */
+ private void heapifyDown4(int fourpos, double cur, Object val) {
+ final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2;
+ while (fourpos < stop) {
+ final int child = (fourpos << 2) + 1;
+ double best = fourheap[child];
+ int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE;
+ if (size > minsize) {
+ double nextchild = fourheap[candidate];
+ if (best > nextchild) {
+ bestchild = candidate;
+ best = nextchild;
+ }
+
+ minsize += 2;
+ if (size >= minsize) {
+ nextchild = fourheap[++candidate];
+ if (best > nextchild) {
+ bestchild = candidate;
+ best = nextchild;
+ }
+
+ if (size > minsize) {
+ nextchild = fourheap[++candidate];
+ if (best > nextchild) {
+ bestchild = candidate;
+ best = nextchild;
+ }
+ }
+ }
+ }
+ if (cur <= best) {
+ break;
+ }
+ fourheap[fourpos] = best;
+ fourvals[fourpos] = fourvals[bestchild];
+ fourpos = bestchild;
+ }
+ fourheap[fourpos] = cur;
+ fourvals[fourpos] = val;
+ }
+
+ @Override
+ public double peekKey() {
+ return twoheap[0];
+ }
+
+ @Override
+ @SuppressWarnings("unchecked")
+ public V peekValue() {
+ return (V)twovals[0];
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder buf = new StringBuilder();
+ buf.append(DoubleObjectMinHeap.class.getSimpleName()).append(" [");
+ for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) {
+ buf.append(iter.getKey()).append(':').append(iter.getValue()).append(',');
+ }
+ buf.append(']');
+ return buf.toString();
+ }
+
+ @Override
+ public UnsortedIter unsortedIter() {
+ return new UnsortedIter();
+ }
+
+ /**
+ * Unsorted iterator - in heap order. Does not poll the heap.
+ *
+ * Use this class as follows:
+ *
+ * <pre>
+ * {@code
+ * for (DoubleObjectHeap.UnsortedIter<V> iter = heap.unsortedIter(); iter.valid(); iter.next()) {
+ * doSomething(iter.get());
+ * }
+ * }
+ * </pre>
+ *
+ * @author Erich Schubert
+ */
+ private class UnsortedIter implements DoubleObjectHeap.UnsortedIter<V> {
+ /**
+ * Iterator position.
+ */
+ protected int pos = 0;
+
+ /**
+ * Modification counter we were initialized at.
+ */
+ protected final int myModCount = modCount;
+
+ @Override
+ public boolean valid() {
+ if (modCount != myModCount) {
+ throw new ConcurrentModificationException();
+ }
+ return pos < size;
+ }
+
+ @Override
+ public void advance() {
+ pos++;
+ }
+
+ @Override
+ public double getKey() {
+ return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]);
+ }
+
+ @SuppressWarnings("unchecked")
+
+ @Override
+ public V getValue() {
+ return (V)((pos < TWO_HEAP_MAX_SIZE) ? twovals[pos] : fourvals[pos - TWO_HEAP_MAX_SIZE]);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoublePriorityObject.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoublePriorityObject.java
index 92d548cb..82453885 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoublePriorityObject.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoublePriorityObject.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/Heap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/Heap.java
index 86d3ae08..2c278110 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/Heap.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/Heap.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -25,11 +25,6 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
import java.util.Arrays;
import java.util.Comparator;
-import java.util.ConcurrentModificationException;
-import java.util.Iterator;
-import java.util.NoSuchElementException;
-
-import de.lmu.ifi.dbs.elki.math.MathUtil;
/**
* Basic in-memory heap structure. Closely related to a
@@ -45,11 +40,11 @@ import de.lmu.ifi.dbs.elki.math.MathUtil;
* @param <E> Element type. Should be {@link java.lang.Comparable} or a
* {@link java.util.Comparator} needs to be given.
*/
-public class Heap<E> implements Iterable<E> {
+public class Heap<E> {
/**
* Heap storage.
*/
- protected transient Object[] queue;
+ protected Object[] queue;
/**
* Current number of objects.
@@ -57,11 +52,6 @@ public class Heap<E> implements Iterable<E> {
protected int size = 0;
/**
- * Indicate up to where the heap is valid.
- */
- protected int validSize = 0;
-
- /**
* The comparator or {@code null}.
*/
protected final Comparator<Object> comparator;
@@ -69,7 +59,7 @@ public class Heap<E> implements Iterable<E> {
/**
* (Structural) modification counter. Used to invalidate iterators.
*/
- private transient int modCount = 0;
+ private int modCount = 0;
/**
* Default initial capacity.
@@ -126,10 +116,8 @@ public class Heap<E> implements Iterable<E> {
resize(size + 1);
}
// final int pos = size;
- this.queue[size] = e;
this.size += 1;
heapifyUp(size - 1, e);
- validSize += 1;
heapModified();
}
@@ -142,7 +130,6 @@ public class Heap<E> implements Iterable<E> {
*/
@SuppressWarnings("unchecked")
public E replaceTopElement(E e) {
- ensureValid();
E oldroot = (E) queue[0];
heapifyDown(0, e);
heapModified();
@@ -159,7 +146,6 @@ public class Heap<E> implements Iterable<E> {
if (size == 0) {
return null;
}
- ensureValid();
return (E) queue[0];
}
@@ -169,70 +155,10 @@ public class Heap<E> implements Iterable<E> {
* @return Top element.
*/
public E poll() {
- ensureValid();
return removeAt(0);
}
/**
- * Perform pending heap repair operations in a single bulk operation.
- */
- protected void ensureValid() {
- if (validSize != size) {
- if (size > 1) {
- // Bottom up heap update.
- if (comparator != null) {
- // Parent of first invalid
- int nextmin = validSize > 0 ? ((validSize - 1) >>> 1) : 0;
- int curmin = MathUtil.nextAllOnesInt(nextmin); // Next line
- int nextmax = curmin - 1; // End of valid line
- int pos = (size - 2) >>> 1; // Parent of last element
- // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin+", "+nextmin);
- while (pos >= nextmin) {
- // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin);
- while (pos >= curmin) {
- if (!heapifyDownComparator(pos, queue[pos])) {
- final int parent = (pos - 1) >>> 1;
- if (parent < curmin) {
- nextmin = Math.min(nextmin, parent);
- nextmax = Math.max(nextmax, parent);
- }
- }
- pos--;
- }
- curmin = nextmin;
- pos = Math.min(pos, nextmax);
- nextmax = -1;
- }
- } else {
- // Parent of first invalid
- int nextmin = validSize > 0 ? ((validSize - 1) >>> 1) : 0;
- int curmin = MathUtil.nextAllOnesInt(nextmin); // Next line
- int nextmax = curmin - 1; // End of valid line
- int pos = (size - 2) >>> 1; // Parent of last element
- // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin+", "+nextmin);
- while (pos >= nextmin) {
- // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin);
- while (pos >= curmin) {
- if (!heapifyDownComparable(pos, queue[pos])) {
- final int parent = (pos - 1) >>> 1;
- if (parent < curmin) {
- nextmin = Math.min(nextmin, parent);
- nextmax = Math.max(nextmax, parent);
- }
- }
- pos--;
- }
- curmin = nextmin;
- pos = Math.min(pos, nextmax);
- nextmax = -1;
- }
- }
- }
- validSize = size;
- }
- }
-
- /**
* Remove the element at the given position.
*
* @param pos Element position.
@@ -247,16 +173,8 @@ public class Heap<E> implements Iterable<E> {
// Replacement object:
final Object reinsert = queue[size - 1];
queue[size - 1] = null;
- // Keep heap in sync
- if (validSize == size) {
- size -= 1;
- validSize -= 1;
- heapifyDown(pos, reinsert);
- } else {
- size -= 1;
- validSize = Math.min(pos >>> 1, validSize);
- queue[pos] = reinsert;
- }
+ size--;
+ heapifyDown(pos, reinsert);
heapModified();
return ret;
}
@@ -367,7 +285,7 @@ public class Heap<E> implements Iterable<E> {
pos = cpos;
}
queue[pos] = cur;
- return (pos == ipos);
+ return (pos != ipos);
}
/**
@@ -405,7 +323,7 @@ public class Heap<E> implements Iterable<E> {
pos = min;
}
queue[pos] = cur;
- return (pos == ipos);
+ return (pos != ipos);
}
/**
@@ -453,15 +371,9 @@ public class Heap<E> implements Iterable<E> {
queue[i] = null;
}
this.size = 0;
- this.validSize = -1;
heapModified();
}
- @Override
- public Iterator<E> iterator() {
- return new Itr();
- }
-
/**
* Called at the end of each heap modification.
*/
@@ -470,52 +382,12 @@ public class Heap<E> implements Iterable<E> {
}
/**
- * Iterator over queue elements. No particular order (i.e. heap order!)
+ * Get an unordered heap iterator.
*
- * @author Erich Schubert
- *
- * @apiviz.exclude
+ * @return Iterator.
*/
- protected final class Itr implements Iterator<E> {
- /**
- * Cursor position.
- */
- private int cursor = 0;
-
- /**
- * Modification counter this iterator is valid for.
- */
- private int expectedModCount = modCount;
-
- @Override
- public boolean hasNext() {
- return cursor < size;
- }
-
- @SuppressWarnings("unchecked")
- @Override
- public E next() {
- if (expectedModCount != modCount) {
- throw new ConcurrentModificationException();
- }
- if (cursor < size) {
- return (E) queue[cursor++];
- }
- throw new NoSuchElementException();
- }
-
- @Override
- public void remove() {
- if (expectedModCount != modCount) {
- throw new ConcurrentModificationException();
- }
- if (cursor > 0) {
- cursor--;
- } else {
- throw new IllegalStateException();
- }
- expectedModCount = modCount;
- }
+ public UnorderedIter unorderedIter() {
+ return new UnorderedIter();
}
/**
@@ -526,7 +398,6 @@ public class Heap<E> implements Iterable<E> {
* @return {@code null} when the heap is correct
*/
protected String checkHeap() {
- ensureValid();
if (comparator == null) {
for (int i = 1; i < size; i++) {
final int parent = (i - 1) >>> 1;
@@ -546,4 +417,43 @@ public class Heap<E> implements Iterable<E> {
}
return null;
}
+
+ /**
+ * Heap iterator.
+ *
+ * @author Erich Schubert
+ */
+ public class UnorderedIter implements de.lmu.ifi.dbs.elki.utilities.datastructures.iterator.Iter {
+ /**
+ * Current iterator position.
+ */
+ int pos = 0;
+
+ /**
+ * Constructor.
+ */
+ protected UnorderedIter() {
+ super();
+ }
+
+ @Override
+ public boolean valid() {
+ return pos < size();
+ }
+
+ @Override
+ public void advance() {
+ pos++;
+ }
+
+ /**
+ * Get the current queue element.
+ *
+ * @return Element
+ */
+ @SuppressWarnings("unchecked")
+ public E get() {
+ return (E) queue[pos];
+ }
+ }
}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerHeap.java
index 6203ad96..3235926b 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerHeap.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerHeap.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -23,53 +23,22 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import java.util.Arrays;
-
-import de.lmu.ifi.dbs.elki.math.MathUtil;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.iterator.Iter;
/**
- * Basic in-memory heap structure.
- *
- * This heap is built lazily: if you first add many elements, then poll the
- * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log
- * n). This is implemented via a simple validTo counter.
+ * Basic in-memory heap for int values.
*
* @author Erich Schubert
+ *
+ * @apiviz.has UnsortedIter
*/
-public abstract class IntegerHeap extends AbstractHeap {
- /**
- * Heap storage: queue
- */
- protected transient int[] queue;
-
- /**
- * Constructor with initial capacity.
- *
- * @param size initial capacity
- */
- public IntegerHeap(int size) {
- super();
- this.size = 0;
- this.queue = new int[size];
- }
-
+public interface IntegerHeap {
/**
* Add a key-value pair to the heap
*
* @param key Key
*/
- public void add(int key) {
- // resize when needed
- if (size + 1 > queue.length) {
- resize(size + 1);
- }
- // final int pos = size;
- this.queue[size] = key;
- this.size += 1;
- heapifyUp(size - 1, key);
- validSize += 1;
- heapModified();
- }
+ void add(int key);
/**
* Add a key-value pair to the heap, except if the new element is larger than
@@ -78,13 +47,7 @@ public abstract class IntegerHeap extends AbstractHeap {
* @param key Key
* @param max Maximum size of heap
*/
- public void add(int key, int max) {
- if (size < max) {
- add(key);
- } else if (comp(key, peek())) {
- replaceTopElement(key);
- }
- }
+ void add(int key, int max);
/**
* Combined operation that removes the top element, and inserts a new element
@@ -93,172 +56,67 @@ public abstract class IntegerHeap extends AbstractHeap {
* @param e New element to insert
* @return Previous top element of the heap
*/
- public int replaceTopElement(int e) {
- ensureValid();
- int oldroot = queue[0];
- heapifyDown(0, e);
- heapModified();
- return oldroot;
- }
+ int replaceTopElement(int e);
/**
* Get the current top key
*
* @return Top key
*/
- public int peek() {
- if (size == 0) {
- throw new ArrayIndexOutOfBoundsException("Peek() on an empty heap!");
- }
- ensureValid();
- return queue[0];
- }
+ int peek();
/**
* Remove the first element
*
* @return Top element
*/
- public int poll() {
- return removeAt(0);
- }
+ int poll();
/**
- * Repair the heap
+ * Delete all elements from the heap.
*/
- protected void ensureValid() {
- if (validSize != size) {
- if (size > 1) {
- // Parent of first invalid
- int nextmin = validSize > 0 ? ((validSize - 1) >>> 1) : 0;
- int curmin = MathUtil.nextAllOnesInt(nextmin); // Next line
- int nextmax = curmin - 1; // End of valid line
- int pos = (size - 2) >>> 1; // Parent of last element
- // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin+", "+nextmin);
- while (pos >= nextmin) {
- // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin);
- while (pos >= curmin) {
- if (!heapifyDown(pos, queue[pos])) {
- final int parent = (pos - 1) >>> 1;
- if (parent < curmin) {
- nextmin = Math.min(nextmin, parent);
- nextmax = Math.max(nextmax, parent);
- }
- }
- pos--;
- }
- curmin = nextmin;
- pos = Math.min(pos, nextmax);
- nextmax = -1;
- }
- }
- validSize = size;
- }
- }
+ void clear();
/**
- * Remove the element at the given position.
+ * Query the size
*
- * @param pos Element position.
- * @return Removed element
+ * @return Size
*/
- protected int removeAt(int pos) {
- if (pos < 0 || pos >= size) {
- return 0;
- }
- final int top = queue[0];
- // Replacement object:
- final int reinkey = queue[size - 1];
- // Keep heap in sync
- if (validSize == size) {
- size -= 1;
- validSize -= 1;
- heapifyDown(pos, reinkey);
- } else {
- size -= 1;
- validSize = Math.min(pos >>> 1, validSize);
- queue[pos] = reinkey;
- }
- heapModified();
- return top;
- }
-
+ public int size();
+
/**
- * Execute a "Heapify Upwards" aka "SiftUp". Used in insertions.
+ * Is the heap empty?
*
- * @param pos insertion position
- * @param curkey Current key
+ * @return {@code true} when the size is 0.
*/
- protected void heapifyUp(int pos, int curkey) {
- while (pos > 0) {
- final int parent = (pos - 1) >>> 1;
- int parkey = queue[parent];
-
- if (comp(curkey, parkey)) { // Compare
- break;
- }
- queue[pos] = parkey;
- pos = parent;
- }
- queue[pos] = curkey;
- }
+ public boolean isEmpty();
/**
- * Execute a "Heapify Downwards" aka "SiftDown". Used in deletions.
+ * Get an unsorted iterator to inspect the heap.
*
- * @param ipos re-insertion position
- * @param curkey Current key
- * @return true when the order was changed
+ * @return Iterator
*/
- protected boolean heapifyDown(final int ipos, int curkey) {
- int pos = ipos;
- final int half = size >>> 1;
- while (pos < half) {
- // Get left child (must exist!)
- int cpos = (pos << 1) + 1;
- int chikey = queue[cpos];
- // Test right child, if present
- final int rchild = cpos + 1;
- if (rchild < size) {
- int right = queue[rchild];
- if (comp(chikey, right)) { // Compare
- cpos = rchild;
- chikey = right;
- }
- }
-
- if (comp(chikey, curkey)) { // Compare
- break;
- }
- queue[pos] = chikey;
- pos = cpos;
- }
- queue[pos] = curkey;
- return (pos == ipos);
- }
+ UnsortedIter unsortedIter();
/**
- * Test whether we need to resize to have the requested capacity.
+ * Unsorted iterator - in heap order. Does not poll the heap.
*
- * @param requiredSize required capacity
- */
- protected final void resize(int requiredSize) {
- queue = Arrays.copyOf(queue, desiredSize(requiredSize, queue.length));
- }
-
- /**
- * Delete all elements from the heap.
+ * <pre>
+ * {@code
+ * for (IntegerHeap.UnsortedIter iter = heap.unsortedIter(); iter.valid(); iter.next()) {
+ * doSomething(iter.get());
+ * }
+ * }
+ * </pre>
+ *
+ * @author Erich Schubert
*/
- @Override
- public void clear() {
- super.clear();
- for (int i = 0; i < size; i++) {
- queue[i] = 0;
- }
+ public static interface UnsortedIter extends Iter {
+ /**
+ * Get the iterators current object.
+ *
+ * @return Current object
+ */
+ int get();
}
-
- /**
- * Compare two objects
- */
- abstract protected boolean comp(int o1, int o2);
}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMaxHeap.java
index 383eb727..60f61d99 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMaxHeap.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMaxHeap.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -23,40 +23,400 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+import java.util.Arrays;
+import java.util.ConcurrentModificationException;
+
+import de.lmu.ifi.dbs.elki.math.MathUtil;
+
/**
- * Basic in-memory heap structure.
+ * Advanced priority queue class, based on a binary heap (for small sizes),
+ * which will for larger heaps be accompanied by a 4-ary heap (attached below
+ * the root of the two-ary heap, making the root actually 3-ary).
+ *
+ * This code was automatically instantiated for the type: Integer
*
- * This heap is built lazily: if you first add many elements, then poll the
- * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log
- * n). This is implemented via a simple validTo counter.
+ * This combination was found to work quite well in benchmarks, but YMMV.
+ *
+ * Some other observations from benchmarking:
+ * <ul>
+ * <li>Bulk loading did not improve things</li>
+ * <li>Primitive heaps are substantially faster.</li>
+ * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and
+ * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li>
+ * <li>Workload makes a huge difference. A load-once, poll-until-empty priority
+ * queue is something different than e.g. a top-k heap, which will see a lot of
+ * top element replacements.</li>
+ * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for
+ * top-k make a difference.</li>
+ * <li>Different day, different benchmark results ...</li>
+ * </ul>
*
* @author Erich Schubert
+ *
+ * @apiviz.has UnsortedIter
*/
-public class IntegerMaxHeap extends IntegerHeap {
+public class IntegerMaxHeap implements IntegerHeap {
+ /**
+ * Base heap.
+ */
+ protected int[] twoheap;
+
+ /**
+ * Extension heap.
+ */
+ protected int[] fourheap;
+
+ /**
+ * Current size of heap.
+ */
+ protected int size;
+
+ /**
+ * (Structural) modification counter. Used to invalidate iterators.
+ */
+ protected int modCount = 0;
+
+ /**
+ * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements.
+ */
+ private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1;
+
/**
- * Constructor with default capacity.
+ * Initial size of the 2-ary heap.
+ */
+ private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1;
+
+ /**
+ * Initial size of 4-ary heap when initialized.
+ *
+ * 21 = 4-ary heap of height 2: 1 + 4 + 4*4
+ *
+ * 85 = 4-ary heap of height 3: 21 + 4*4*4
+ *
+ * 341 = 4-ary heap of height 4: 85 + 4*4*4*4
+ *
+ * Since we last grew by 255 (to 511), let's use 341.
+ */
+ private final static int FOUR_HEAP_INITIAL_SIZE = 341;
+
+ /**
+ * Constructor, with default size.
*/
public IntegerMaxHeap() {
- super(DEFAULT_INITIAL_CAPACITY);
+ super();
+ int[] twoheap = new int[TWO_HEAP_INITIAL_SIZE];
+
+ this.twoheap = twoheap;
+ this.fourheap = null;
+ this.size = 0;
+ this.modCount = 0;
}
/**
- * Constructor with initial capacity.
+ * Constructor, with given minimum size.
*
- * @param size initial capacity
+ * @param minsize Minimum size
*/
- public IntegerMaxHeap(int size) {
- super(size);
+ public IntegerMaxHeap(int minsize) {
+ super();
+ if (minsize < TWO_HEAP_MAX_SIZE) {
+ final int size = MathUtil.nextPow2Int(minsize + 1) - 1;
+ int[] twoheap = new int[size];
+
+ this.twoheap = twoheap;
+ this.fourheap = null;
+ } else {
+ int[] twoheap = new int[TWO_HEAP_INITIAL_SIZE];
+ int[] fourheap = new int[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)];
+ this.twoheap = twoheap;
+ this.fourheap = fourheap;
+ }
+ this.size = 0;
+ this.modCount = 0;
+ }
+
+ @Override
+ public void clear() {
+ size = 0;
+ ++modCount;
+ fourheap = null;
+ Arrays.fill(twoheap, 0);
+ }
+
+ @Override
+ public int size() {
+ return size;
+ }
+
+ @Override
+ public boolean isEmpty() {
+ return (size == 0);
+ }
+
+ @Override
+ public void add(int o) {
+ final int co = o;
+ // System.err.println("Add: " + o);
+ if (size < TWO_HEAP_MAX_SIZE) {
+ if (size >= twoheap.length) {
+ // Grow by one layer.
+ twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1);
+ }
+ final int twopos = size;
+ twoheap[twopos] = co;
+ ++size;
+ heapifyUp2(twopos, co);
+ ++modCount;
+ } else {
+ final int fourpos = size - TWO_HEAP_MAX_SIZE;
+ if (fourheap == null) {
+ fourheap = new int[FOUR_HEAP_INITIAL_SIZE];
+ } else if (fourpos >= fourheap.length) {
+ // Grow extension heap by half.
+ fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1));
+ }
+ fourheap[fourpos] = co;
+ ++size;
+ heapifyUp4(fourpos, co);
+ ++modCount;
+ }
+ }
+
+ @Override
+ public void add(int key, int max) {
+ if (size < max) {
+ add(key);
+ } else if (twoheap[0] >= key) {
+ replaceTopElement(key);
+ }
+ }
+
+ @Override
+ public int replaceTopElement(int reinsert) {
+ final int ret = twoheap[0];
+ heapifyDown( reinsert);
+ ++modCount;
+ return ret;
+ }
+
+ /**
+ * Heapify-Up method for 2-ary heap.
+ *
+ * @param twopos Position in 2-ary heap.
+ * @param cur Current object
+ */
+ private void heapifyUp2(int twopos, int cur) {
+ while (twopos > 0) {
+ final int parent = (twopos - 1) >>> 1;
+ int par = twoheap[parent];
+ if (cur <= par) {
+ break;
+ }
+ twoheap[twopos] = par;
+ twopos = parent;
+ }
+ twoheap[twopos] = cur;
+ }
+
+ /**
+ * Heapify-Up method for 4-ary heap.
+ *
+ * @param fourpos Position in 4-ary heap.
+ * @param cur Current object
+ */
+ private void heapifyUp4(int fourpos, int cur) {
+ while (fourpos > 0) {
+ final int parent = (fourpos - 1) >> 2;
+ int par = fourheap[parent];
+ if (cur <= par) {
+ break;
+ }
+ fourheap[fourpos] = par;
+ fourpos = parent;
+ }
+ if (fourpos == 0 && twoheap[0] < cur) {
+ fourheap[0] = twoheap[0];
+ twoheap[0] = cur;
+ } else {
+ fourheap[fourpos] = cur;
+ }
+ }
+
+ @Override
+ public int poll() {
+ final int ret = twoheap[0];
+ --size;
+ // Replacement object:
+ if (size >= TWO_HEAP_MAX_SIZE) {
+ final int last = size - TWO_HEAP_MAX_SIZE;
+ final int reinsert = fourheap[last];
+ fourheap[last] = 0;
+ heapifyDown(reinsert);
+ } else if (size > 0) {
+ final int reinsert = twoheap[size];
+ twoheap[size] = 0;
+ heapifyDown(reinsert);
+ } else {
+ twoheap[0] = 0;
+ }
+ ++modCount;
+ return ret;
+ }
+
+ /**
+ * Invoke heapify-down for the root object.
+ *
+ * @param reinsert Object to insert.
+ */
+ private void heapifyDown(int reinsert) {
+ if (size > TWO_HEAP_MAX_SIZE) {
+ // Special case: 3-ary situation.
+ final int best = (twoheap[1] >= twoheap[2]) ? 1 : 2;
+ if (fourheap[0] > twoheap[best]) {
+ twoheap[0] = fourheap[0];
+ heapifyDown4(0, reinsert);
+ } else {
+ twoheap[0] = twoheap[best];
+ heapifyDown2(best, reinsert);
+ }
+ return;
+ }
+ heapifyDown2(0, reinsert);
+ }
+
+ /**
+ * Heapify-Down for 2-ary heap.
+ *
+ * @param twopos Position in 2-ary heap.
+ * @param cur Current object
+ */
+ private void heapifyDown2(int twopos, int cur) {
+ final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1;
+ while (twopos < stop) {
+ int bestchild = (twopos << 1) + 1;
+ int best = twoheap[bestchild];
+ final int right = bestchild + 1;
+ if (right < size && best < twoheap[right]) {
+ bestchild = right;
+ best = twoheap[right];
+ }
+ if (cur >= best) {
+ break;
+ }
+ twoheap[twopos] = best;
+ twopos = bestchild;
+ }
+ twoheap[twopos] = cur;
}
/**
- * Compare two objects
+ * Heapify-Down for 4-ary heap.
*
- * @param o1 First object
- * @param o2 Second object
+ * @param fourpos Position in 4-ary heap.
+ * @param cur Current object
*/
+ private void heapifyDown4(int fourpos, int cur) {
+ final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2;
+ while (fourpos < stop) {
+ final int child = (fourpos << 2) + 1;
+ int best = fourheap[child];
+ int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE;
+ if (size > minsize) {
+ int nextchild = fourheap[candidate];
+ if (best < nextchild) {
+ bestchild = candidate;
+ best = nextchild;
+ }
+
+ minsize += 2;
+ if (size >= minsize) {
+ nextchild = fourheap[++candidate];
+ if (best < nextchild) {
+ bestchild = candidate;
+ best = nextchild;
+ }
+
+ if (size > minsize) {
+ nextchild = fourheap[++candidate];
+ if (best < nextchild) {
+ bestchild = candidate;
+ best = nextchild;
+ }
+ }
+ }
+ }
+ if (cur >= best) {
+ break;
+ }
+ fourheap[fourpos] = best;
+ fourpos = bestchild;
+ }
+ fourheap[fourpos] = cur;
+ }
+
+ @Override
+ public int peek() {
+ return twoheap[0];
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder buf = new StringBuilder();
+ buf.append(IntegerMaxHeap.class.getSimpleName()).append(" [");
+ for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) {
+ buf.append(iter.get()).append(',');
+ }
+ buf.append(']');
+ return buf.toString();
+ }
+
@Override
- protected boolean comp(int o1, int o2) {
- return o1 < o2;
+ public UnsortedIter unsortedIter() {
+ return new UnsortedIter();
+ }
+
+ /**
+ * Unsorted iterator - in heap order. Does not poll the heap.
+ *
+ * Use this class as follows:
+ *
+ * <pre>
+ * {@code
+ * for (IntegerHeap.UnsortedIter iter = heap.unsortedIter(); iter.valid(); iter.next()) {
+ * doSomething(iter.get());
+ * }
+ * }
+ * </pre>
+ *
+ * @author Erich Schubert
+ */
+ private class UnsortedIter implements IntegerHeap.UnsortedIter {
+ /**
+ * Iterator position.
+ */
+ protected int pos = 0;
+
+ /**
+ * Modification counter we were initialized at.
+ */
+ protected final int myModCount = modCount;
+
+ @Override
+ public boolean valid() {
+ if (modCount != myModCount) {
+ throw new ConcurrentModificationException();
+ }
+ return pos < size;
+ }
+
+ @Override
+ public void advance() {
+ pos++;
+ }
+
+ @Override
+ public int get() {
+ return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]);
+ }
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMinHeap.java
index f81fe275..c352ece4 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMinHeap.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMinHeap.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -23,40 +23,400 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+import java.util.Arrays;
+import java.util.ConcurrentModificationException;
+
+import de.lmu.ifi.dbs.elki.math.MathUtil;
+
/**
- * Basic in-memory heap structure.
+ * Advanced priority queue class, based on a binary heap (for small sizes),
+ * which will for larger heaps be accompanied by a 4-ary heap (attached below
+ * the root of the two-ary heap, making the root actually 3-ary).
+ *
+ * This code was automatically instantiated for the type: Integer
*
- * This heap is built lazily: if you first add many elements, then poll the
- * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log
- * n). This is implemented via a simple validTo counter.
+ * This combination was found to work quite well in benchmarks, but YMMV.
+ *
+ * Some other observations from benchmarking:
+ * <ul>
+ * <li>Bulk loading did not improve things</li>
+ * <li>Primitive heaps are substantially faster.</li>
+ * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and
+ * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li>
+ * <li>Workload makes a huge difference. A load-once, poll-until-empty priority
+ * queue is something different than e.g. a top-k heap, which will see a lot of
+ * top element replacements.</li>
+ * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for
+ * top-k make a difference.</li>
+ * <li>Different day, different benchmark results ...</li>
+ * </ul>
*
* @author Erich Schubert
+ *
+ * @apiviz.has UnsortedIter
*/
-public class IntegerMinHeap extends IntegerHeap {
+public class IntegerMinHeap implements IntegerHeap {
+ /**
+ * Base heap.
+ */
+ protected int[] twoheap;
+
+ /**
+ * Extension heap.
+ */
+ protected int[] fourheap;
+
+ /**
+ * Current size of heap.
+ */
+ protected int size;
+
+ /**
+ * (Structural) modification counter. Used to invalidate iterators.
+ */
+ protected int modCount = 0;
+
+ /**
+ * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements.
+ */
+ private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1;
+
/**
- * Constructor with default capacity.
+ * Initial size of the 2-ary heap.
+ */
+ private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1;
+
+ /**
+ * Initial size of 4-ary heap when initialized.
+ *
+ * 21 = 4-ary heap of height 2: 1 + 4 + 4*4
+ *
+ * 85 = 4-ary heap of height 3: 21 + 4*4*4
+ *
+ * 341 = 4-ary heap of height 4: 85 + 4*4*4*4
+ *
+ * Since we last grew by 255 (to 511), let's use 341.
+ */
+ private final static int FOUR_HEAP_INITIAL_SIZE = 341;
+
+ /**
+ * Constructor, with default size.
*/
public IntegerMinHeap() {
- super(DEFAULT_INITIAL_CAPACITY);
+ super();
+ int[] twoheap = new int[TWO_HEAP_INITIAL_SIZE];
+
+ this.twoheap = twoheap;
+ this.fourheap = null;
+ this.size = 0;
+ this.modCount = 0;
}
/**
- * Constructor with initial capacity.
+ * Constructor, with given minimum size.
*
- * @param size initial capacity
+ * @param minsize Minimum size
*/
- public IntegerMinHeap(int size) {
- super(size);
+ public IntegerMinHeap(int minsize) {
+ super();
+ if (minsize < TWO_HEAP_MAX_SIZE) {
+ final int size = MathUtil.nextPow2Int(minsize + 1) - 1;
+ int[] twoheap = new int[size];
+
+ this.twoheap = twoheap;
+ this.fourheap = null;
+ } else {
+ int[] twoheap = new int[TWO_HEAP_INITIAL_SIZE];
+ int[] fourheap = new int[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)];
+ this.twoheap = twoheap;
+ this.fourheap = fourheap;
+ }
+ this.size = 0;
+ this.modCount = 0;
+ }
+
+ @Override
+ public void clear() {
+ size = 0;
+ ++modCount;
+ fourheap = null;
+ Arrays.fill(twoheap, 0);
+ }
+
+ @Override
+ public int size() {
+ return size;
+ }
+
+ @Override
+ public boolean isEmpty() {
+ return (size == 0);
+ }
+
+ @Override
+ public void add(int o) {
+ final int co = o;
+ // System.err.println("Add: " + o);
+ if (size < TWO_HEAP_MAX_SIZE) {
+ if (size >= twoheap.length) {
+ // Grow by one layer.
+ twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1);
+ }
+ final int twopos = size;
+ twoheap[twopos] = co;
+ ++size;
+ heapifyUp2(twopos, co);
+ ++modCount;
+ } else {
+ final int fourpos = size - TWO_HEAP_MAX_SIZE;
+ if (fourheap == null) {
+ fourheap = new int[FOUR_HEAP_INITIAL_SIZE];
+ } else if (fourpos >= fourheap.length) {
+ // Grow extension heap by half.
+ fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1));
+ }
+ fourheap[fourpos] = co;
+ ++size;
+ heapifyUp4(fourpos, co);
+ ++modCount;
+ }
+ }
+
+ @Override
+ public void add(int key, int max) {
+ if (size < max) {
+ add(key);
+ } else if (twoheap[0] <= key) {
+ replaceTopElement(key);
+ }
+ }
+
+ @Override
+ public int replaceTopElement(int reinsert) {
+ final int ret = twoheap[0];
+ heapifyDown( reinsert);
+ ++modCount;
+ return ret;
+ }
+
+ /**
+ * Heapify-Up method for 2-ary heap.
+ *
+ * @param twopos Position in 2-ary heap.
+ * @param cur Current object
+ */
+ private void heapifyUp2(int twopos, int cur) {
+ while (twopos > 0) {
+ final int parent = (twopos - 1) >>> 1;
+ int par = twoheap[parent];
+ if (cur >= par) {
+ break;
+ }
+ twoheap[twopos] = par;
+ twopos = parent;
+ }
+ twoheap[twopos] = cur;
+ }
+
+ /**
+ * Heapify-Up method for 4-ary heap.
+ *
+ * @param fourpos Position in 4-ary heap.
+ * @param cur Current object
+ */
+ private void heapifyUp4(int fourpos, int cur) {
+ while (fourpos > 0) {
+ final int parent = (fourpos - 1) >> 2;
+ int par = fourheap[parent];
+ if (cur >= par) {
+ break;
+ }
+ fourheap[fourpos] = par;
+ fourpos = parent;
+ }
+ if (fourpos == 0 && twoheap[0] > cur) {
+ fourheap[0] = twoheap[0];
+ twoheap[0] = cur;
+ } else {
+ fourheap[fourpos] = cur;
+ }
+ }
+
+ @Override
+ public int poll() {
+ final int ret = twoheap[0];
+ --size;
+ // Replacement object:
+ if (size >= TWO_HEAP_MAX_SIZE) {
+ final int last = size - TWO_HEAP_MAX_SIZE;
+ final int reinsert = fourheap[last];
+ fourheap[last] = 0;
+ heapifyDown(reinsert);
+ } else if (size > 0) {
+ final int reinsert = twoheap[size];
+ twoheap[size] = 0;
+ heapifyDown(reinsert);
+ } else {
+ twoheap[0] = 0;
+ }
+ ++modCount;
+ return ret;
+ }
+
+ /**
+ * Invoke heapify-down for the root object.
+ *
+ * @param reinsert Object to insert.
+ */
+ private void heapifyDown(int reinsert) {
+ if (size > TWO_HEAP_MAX_SIZE) {
+ // Special case: 3-ary situation.
+ final int best = (twoheap[1] <= twoheap[2]) ? 1 : 2;
+ if (fourheap[0] < twoheap[best]) {
+ twoheap[0] = fourheap[0];
+ heapifyDown4(0, reinsert);
+ } else {
+ twoheap[0] = twoheap[best];
+ heapifyDown2(best, reinsert);
+ }
+ return;
+ }
+ heapifyDown2(0, reinsert);
+ }
+
+ /**
+ * Heapify-Down for 2-ary heap.
+ *
+ * @param twopos Position in 2-ary heap.
+ * @param cur Current object
+ */
+ private void heapifyDown2(int twopos, int cur) {
+ final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1;
+ while (twopos < stop) {
+ int bestchild = (twopos << 1) + 1;
+ int best = twoheap[bestchild];
+ final int right = bestchild + 1;
+ if (right < size && best > twoheap[right]) {
+ bestchild = right;
+ best = twoheap[right];
+ }
+ if (cur <= best) {
+ break;
+ }
+ twoheap[twopos] = best;
+ twopos = bestchild;
+ }
+ twoheap[twopos] = cur;
}
/**
- * Compare two objects
+ * Heapify-Down for 4-ary heap.
*
- * @param o1 First object
- * @param o2 Second object
+ * @param fourpos Position in 4-ary heap.
+ * @param cur Current object
*/
+ private void heapifyDown4(int fourpos, int cur) {
+ final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2;
+ while (fourpos < stop) {
+ final int child = (fourpos << 2) + 1;
+ int best = fourheap[child];
+ int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE;
+ if (size > minsize) {
+ int nextchild = fourheap[candidate];
+ if (best > nextchild) {
+ bestchild = candidate;
+ best = nextchild;
+ }
+
+ minsize += 2;
+ if (size >= minsize) {
+ nextchild = fourheap[++candidate];
+ if (best > nextchild) {
+ bestchild = candidate;
+ best = nextchild;
+ }
+
+ if (size > minsize) {
+ nextchild = fourheap[++candidate];
+ if (best > nextchild) {
+ bestchild = candidate;
+ best = nextchild;
+ }
+ }
+ }
+ }
+ if (cur <= best) {
+ break;
+ }
+ fourheap[fourpos] = best;
+ fourpos = bestchild;
+ }
+ fourheap[fourpos] = cur;
+ }
+
+ @Override
+ public int peek() {
+ return twoheap[0];
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder buf = new StringBuilder();
+ buf.append(IntegerMinHeap.class.getSimpleName()).append(" [");
+ for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) {
+ buf.append(iter.get()).append(',');
+ }
+ buf.append(']');
+ return buf.toString();
+ }
+
@Override
- protected boolean comp(int o1, int o2) {
- return o1 > o2;
+ public UnsortedIter unsortedIter() {
+ return new UnsortedIter();
+ }
+
+ /**
+ * Unsorted iterator - in heap order. Does not poll the heap.
+ *
+ * Use this class as follows:
+ *
+ * <pre>
+ * {@code
+ * for (IntegerHeap.UnsortedIter iter = heap.unsortedIter(); iter.valid(); iter.next()) {
+ * doSomething(iter.get());
+ * }
+ * }
+ * </pre>
+ *
+ * @author Erich Schubert
+ */
+ private class UnsortedIter implements IntegerHeap.UnsortedIter {
+ /**
+ * Iterator position.
+ */
+ protected int pos = 0;
+
+ /**
+ * Modification counter we were initialized at.
+ */
+ protected final int myModCount = modCount;
+
+ @Override
+ public boolean valid() {
+ if (modCount != myModCount) {
+ throw new ConcurrentModificationException();
+ }
+ return pos < size;
+ }
+
+ @Override
+ public void advance() {
+ pos++;
+ }
+
+ @Override
+ public int get() {
+ return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]);
+ }
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectHeap.java
new file mode 100644
index 00000000..01f7aea0
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectHeap.java
@@ -0,0 +1,129 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.utilities.datastructures.iterator.Iter;
+
+/**
+ * Basic in-memory heap interface, for int keys and V values.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.has UnsortedIter
+ * @param <V> Value type
+ */
+public interface IntegerObjectHeap<V> {
+ /**
+ * Add a key-value pair to the heap
+ *
+ * @param key Key
+ * @param val Value
+ */
+ void add(int key, V val);
+
+ /**
+ * Add a key-value pair to the heap if it improves the top.
+ *
+ * @param key Key
+ * @param val Value
+ * @param k Desired maximum size
+ */
+ void add(int key, V val, int k);
+
+ /**
+ * Combined operation that removes the top element, and inserts a new element
+ * instead.
+ *
+ * @param key Key of new element
+ * @param val Value of new element
+ */
+ void replaceTopElement(int key, V val);
+
+ /**
+ * Get the current top key
+ *
+ * @return Top key
+ */
+ int peekKey();
+
+ /**
+ * Get the current top value
+ *
+ * @return Value
+ */
+ V peekValue();
+
+ /**
+ * Remove the first element
+ */
+ void poll();
+
+ /**
+ * Clear the heap contents.
+ */
+ void clear();
+
+ /**
+ * Query the size
+ *
+ * @return Size
+ */
+ public int size();
+
+ /**
+ * Is the heap empty?
+ *
+ * @return {@code true} when the size is 0.
+ */
+ public boolean isEmpty();
+
+ /**
+ * Get an unsorted iterator to inspect the heap.
+ *
+ * @return Iterator
+ */
+ UnsortedIter<V> unsortedIter();
+
+ /**
+ * Unsorted iterator - in heap order. Does not poll the heap.
+ *
+ * @author Erich Schubert
+ * @param <V> Value type
+ */
+ public static interface UnsortedIter<V> extends Iter {
+ /**
+ * Get the current key
+ *
+ * @return Current key
+ */
+ int getKey();
+
+ /**
+ * Get the current value
+ *
+ * @return Current value
+ */
+ V getValue();
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectMaxHeap.java
new file mode 100644
index 00000000..93a4e75a
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectMaxHeap.java
@@ -0,0 +1,482 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.Arrays;
+import java.util.ConcurrentModificationException;
+
+import de.lmu.ifi.dbs.elki.math.MathUtil;
+
+/**
+ * Advanced priority queue class, based on a binary heap (for small sizes),
+ * which will for larger heaps be accompanied by a 4-ary heap (attached below
+ * the root of the two-ary heap, making the root actually 3-ary).
+ *
+ * This code was automatically instantiated for the types: Integer and Object
+ *
+ * This combination was found to work quite well in benchmarks, but YMMV.
+ *
+ * Some other observations from benchmarking:
+ * <ul>
+ * <li>Bulk loading did not improve things</li>
+ * <li>Primitive heaps are substantially faster.</li>
+ * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and
+ * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li>
+ * <li>Workload makes a huge difference. A load-once, poll-until-empty priority
+ * queue is something different than e.g. a top-k heap, which will see a lot of
+ * top element replacements.</li>
+ * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for
+ * top-k make a difference.</li>
+ * <li>Different day, different benchmark results ...</li>
+ * </ul>
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.has UnsortedIter
+ * @param <V> Value type
+ */
+public class IntegerObjectMaxHeap<V> implements IntegerObjectHeap<V> {
+ /**
+ * Base heap.
+ */
+ protected int[] twoheap;
+
+ /**
+ * Base heap values.
+ */
+ protected Object[] twovals;
+
+ /**
+ * Extension heap.
+ */
+ protected int[] fourheap;
+
+ /**
+ * Extension heapvalues.
+ */
+ protected Object[] fourvals;
+
+ /**
+ * Current size of heap.
+ */
+ protected int size;
+
+ /**
+ * (Structural) modification counter. Used to invalidate iterators.
+ */
+ protected int modCount = 0;
+
+ /**
+ * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements.
+ */
+ private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1;
+
+ /**
+ * Initial size of the 2-ary heap.
+ */
+ private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1;
+
+ /**
+ * Initial size of 4-ary heap when initialized.
+ *
+ * 21 = 4-ary heap of height 2: 1 + 4 + 4*4
+ *
+ * 85 = 4-ary heap of height 3: 21 + 4*4*4
+ *
+ * 341 = 4-ary heap of height 4: 85 + 4*4*4*4
+ *
+ * Since we last grew by 255 (to 511), let's use 341.
+ */
+ private final static int FOUR_HEAP_INITIAL_SIZE = 341;
+
+ /**
+ * Constructor, with default size.
+ */
+ public IntegerObjectMaxHeap() {
+ super();
+ int[] twoheap = new int[TWO_HEAP_INITIAL_SIZE];
+ Object[] twovals = new Object[TWO_HEAP_INITIAL_SIZE];
+
+ this.twoheap = twoheap;
+ this.twovals = twovals;
+ this.fourheap = null;
+ this.fourvals = null;
+ this.size = 0;
+ this.modCount = 0;
+ }
+
+ /**
+ * Constructor, with given minimum size.
+ *
+ * @param minsize Minimum size
+ */
+ public IntegerObjectMaxHeap(int minsize) {
+ super();
+ if (minsize < TWO_HEAP_MAX_SIZE) {
+ final int size = MathUtil.nextPow2Int(minsize + 1) - 1;
+ int[] twoheap = new int[size];
+ Object[] twovals = new Object[size];
+
+ this.twoheap = twoheap;
+ this.twovals = twovals;
+ this.fourheap = null;
+ this.fourvals = null;
+ } else {
+ int[] twoheap = new int[TWO_HEAP_INITIAL_SIZE];
+ Object[] twovals = new Object[TWO_HEAP_INITIAL_SIZE];
+ int[] fourheap = new int[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)];
+ Object[] fourvals = new Object[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)];
+ this.twoheap = twoheap;
+ this.twovals = twovals;
+ this.fourheap = fourheap;
+ this.fourvals = fourvals;
+ }
+ this.size = 0;
+ this.modCount = 0;
+ }
+
+ @Override
+ public void clear() {
+ size = 0;
+ ++modCount;
+ fourheap = null;
+ fourvals = null;
+ Arrays.fill(twoheap, 0);
+ Arrays.fill(twovals, null);
+ }
+
+ @Override
+ public int size() {
+ return size;
+ }
+
+ @Override
+ public boolean isEmpty() {
+ return (size == 0);
+ }
+
+ @Override
+ public void add(int o, V v) {
+ final int co = o;
+ final Object cv = (Object)v;
+ // System.err.println("Add: " + o);
+ if (size < TWO_HEAP_MAX_SIZE) {
+ if (size >= twoheap.length) {
+ // Grow by one layer.
+ twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1);
+ twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1);
+ }
+ final int twopos = size;
+ twoheap[twopos] = co;
+ twovals[twopos] = cv;
+ ++size;
+ heapifyUp2(twopos, co, cv);
+ ++modCount;
+ } else {
+ final int fourpos = size - TWO_HEAP_MAX_SIZE;
+ if (fourheap == null) {
+ fourheap = new int[FOUR_HEAP_INITIAL_SIZE];
+ fourvals = new Object[FOUR_HEAP_INITIAL_SIZE];
+ } else if (fourpos >= fourheap.length) {
+ // Grow extension heap by half.
+ fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1));
+ fourvals = Arrays.copyOf(fourvals, fourvals.length + (fourvals.length >> 1));
+ }
+ fourheap[fourpos] = co;
+ fourvals[fourpos] = cv;
+ ++size;
+ heapifyUp4(fourpos, co, cv);
+ ++modCount;
+ }
+ }
+
+ @Override
+ public void add(int key, V val, int max) {
+ if (size < max) {
+ add(key, val);
+ } else if (twoheap[0] >= key) {
+ replaceTopElement(key, val);
+ }
+ }
+
+ @Override
+ public void replaceTopElement(int reinsert, V val) {
+ heapifyDown(reinsert, (Object)val);
+ ++modCount;
+ }
+
+ /**
+ * Heapify-Up method for 2-ary heap.
+ *
+ * @param twopos Position in 2-ary heap.
+ * @param cur Current object
+ * @param val Current value
+ */
+ private void heapifyUp2(int twopos, int cur, Object val) {
+ while (twopos > 0) {
+ final int parent = (twopos - 1) >>> 1;
+ int par = twoheap[parent];
+ if (cur <= par) {
+ break;
+ }
+ twoheap[twopos] = par;
+ twovals[twopos] = twovals[parent];
+ twopos = parent;
+ }
+ twoheap[twopos] = cur;
+ twovals[twopos] = val;
+ }
+
+ /**
+ * Heapify-Up method for 4-ary heap.
+ *
+ * @param fourpos Position in 4-ary heap.
+ * @param cur Current object
+ * @param val Current value
+ */
+ private void heapifyUp4(int fourpos, int cur, Object val) {
+ while (fourpos > 0) {
+ final int parent = (fourpos - 1) >> 2;
+ int par = fourheap[parent];
+ if (cur <= par) {
+ break;
+ }
+ fourheap[fourpos] = par;
+ fourvals[fourpos] = fourvals[parent];
+ fourpos = parent;
+ }
+ if (fourpos == 0 && twoheap[0] < cur) {
+ fourheap[0] = twoheap[0];
+ fourvals[0] = twovals[0];
+ twoheap[0] = cur;
+ twovals[0] = val;
+ } else {
+ fourheap[fourpos] = cur;
+ fourvals[fourpos] = val;
+ }
+ }
+
+ @Override
+ public void poll() {
+ --size;
+ // Replacement object:
+ if (size >= TWO_HEAP_MAX_SIZE) {
+ final int last = size - TWO_HEAP_MAX_SIZE;
+ final int reinsert = fourheap[last];
+ final Object reinsertv = fourvals[last];
+ fourheap[last] = 0;
+ fourvals[last] = null;
+ heapifyDown(reinsert, reinsertv);
+ } else if (size > 0) {
+ final int reinsert = twoheap[size];
+ final Object reinsertv = twovals[size];
+ twoheap[size] = 0;
+ twovals[size] = null;
+ heapifyDown(reinsert, reinsertv);
+ } else {
+ twoheap[0] = 0;
+ twovals[0] = null;
+ }
+ ++modCount;
+ }
+
+ /**
+ * Invoke heapify-down for the root object.
+ *
+ * @param reinsert Object to insert.
+ * @param val Value to reinsert.
+ */
+ private void heapifyDown(int reinsert, Object val) {
+ if (size > TWO_HEAP_MAX_SIZE) {
+ // Special case: 3-ary situation.
+ final int best = (twoheap[1] >= twoheap[2]) ? 1 : 2;
+ if (fourheap[0] > twoheap[best]) {
+ twoheap[0] = fourheap[0];
+ twovals[0] = fourvals[0];
+ heapifyDown4(0, reinsert, val);
+ } else {
+ twoheap[0] = twoheap[best];
+ twovals[0] = twovals[best];
+ heapifyDown2(best, reinsert, val);
+ }
+ return;
+ }
+ heapifyDown2(0, reinsert, val);
+ }
+
+ /**
+ * Heapify-Down for 2-ary heap.
+ *
+ * @param twopos Position in 2-ary heap.
+ * @param cur Current object
+ * @param val Value to reinsert.
+ */
+ private void heapifyDown2(int twopos, int cur, Object val) {
+ final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1;
+ while (twopos < stop) {
+ int bestchild = (twopos << 1) + 1;
+ int best = twoheap[bestchild];
+ final int right = bestchild + 1;
+ if (right < size && best < twoheap[right]) {
+ bestchild = right;
+ best = twoheap[right];
+ }
+ if (cur >= best) {
+ break;
+ }
+ twoheap[twopos] = best;
+ twovals[twopos] = twovals[bestchild];
+ twopos = bestchild;
+ }
+ twoheap[twopos] = cur;
+ twovals[twopos] = val;
+ }
+
+ /**
+ * Heapify-Down for 4-ary heap.
+ *
+ * @param fourpos Position in 4-ary heap.
+ * @param cur Current object
+ * @param val Value to reinsert.
+ */
+ private void heapifyDown4(int fourpos, int cur, Object val) {
+ final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2;
+ while (fourpos < stop) {
+ final int child = (fourpos << 2) + 1;
+ int best = fourheap[child];
+ int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE;
+ if (size > minsize) {
+ int nextchild = fourheap[candidate];
+ if (best < nextchild) {
+ bestchild = candidate;
+ best = nextchild;
+ }
+
+ minsize += 2;
+ if (size >= minsize) {
+ nextchild = fourheap[++candidate];
+ if (best < nextchild) {
+ bestchild = candidate;
+ best = nextchild;
+ }
+
+ if (size > minsize) {
+ nextchild = fourheap[++candidate];
+ if (best < nextchild) {
+ bestchild = candidate;
+ best = nextchild;
+ }
+ }
+ }
+ }
+ if (cur >= best) {
+ break;
+ }
+ fourheap[fourpos] = best;
+ fourvals[fourpos] = fourvals[bestchild];
+ fourpos = bestchild;
+ }
+ fourheap[fourpos] = cur;
+ fourvals[fourpos] = val;
+ }
+
+ @Override
+ public int peekKey() {
+ return twoheap[0];
+ }
+
+ @Override
+ @SuppressWarnings("unchecked")
+ public V peekValue() {
+ return (V)twovals[0];
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder buf = new StringBuilder();
+ buf.append(IntegerObjectMaxHeap.class.getSimpleName()).append(" [");
+ for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) {
+ buf.append(iter.getKey()).append(':').append(iter.getValue()).append(',');
+ }
+ buf.append(']');
+ return buf.toString();
+ }
+
+ @Override
+ public UnsortedIter unsortedIter() {
+ return new UnsortedIter();
+ }
+
+ /**
+ * Unsorted iterator - in heap order. Does not poll the heap.
+ *
+ * Use this class as follows:
+ *
+ * <pre>
+ * {@code
+ * for (IntegerObjectHeap.UnsortedIter<V> iter = heap.unsortedIter(); iter.valid(); iter.next()) {
+ * doSomething(iter.get());
+ * }
+ * }
+ * </pre>
+ *
+ * @author Erich Schubert
+ */
+ private class UnsortedIter implements IntegerObjectHeap.UnsortedIter<V> {
+ /**
+ * Iterator position.
+ */
+ protected int pos = 0;
+
+ /**
+ * Modification counter we were initialized at.
+ */
+ protected final int myModCount = modCount;
+
+ @Override
+ public boolean valid() {
+ if (modCount != myModCount) {
+ throw new ConcurrentModificationException();
+ }
+ return pos < size;
+ }
+
+ @Override
+ public void advance() {
+ pos++;
+ }
+
+ @Override
+ public int getKey() {
+ return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]);
+ }
+
+ @SuppressWarnings("unchecked")
+
+ @Override
+ public V getValue() {
+ return (V)((pos < TWO_HEAP_MAX_SIZE) ? twovals[pos] : fourvals[pos - TWO_HEAP_MAX_SIZE]);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectMinHeap.java
new file mode 100644
index 00000000..e54c7d28
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectMinHeap.java
@@ -0,0 +1,482 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.Arrays;
+import java.util.ConcurrentModificationException;
+
+import de.lmu.ifi.dbs.elki.math.MathUtil;
+
+/**
+ * Advanced priority queue class, based on a binary heap (for small sizes),
+ * which will for larger heaps be accompanied by a 4-ary heap (attached below
+ * the root of the two-ary heap, making the root actually 3-ary).
+ *
+ * This code was automatically instantiated for the types: Integer and Object
+ *
+ * This combination was found to work quite well in benchmarks, but YMMV.
+ *
+ * Some other observations from benchmarking:
+ * <ul>
+ * <li>Bulk loading did not improve things</li>
+ * <li>Primitive heaps are substantially faster.</li>
+ * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and
+ * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li>
+ * <li>Workload makes a huge difference. A load-once, poll-until-empty priority
+ * queue is something different than e.g. a top-k heap, which will see a lot of
+ * top element replacements.</li>
+ * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for
+ * top-k make a difference.</li>
+ * <li>Different day, different benchmark results ...</li>
+ * </ul>
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.has UnsortedIter
+ * @param <V> Value type
+ */
+public class IntegerObjectMinHeap<V> implements IntegerObjectHeap<V> {
+ /**
+ * Base heap.
+ */
+ protected int[] twoheap;
+
+ /**
+ * Base heap values.
+ */
+ protected Object[] twovals;
+
+ /**
+ * Extension heap.
+ */
+ protected int[] fourheap;
+
+ /**
+ * Extension heapvalues.
+ */
+ protected Object[] fourvals;
+
+ /**
+ * Current size of heap.
+ */
+ protected int size;
+
+ /**
+ * (Structural) modification counter. Used to invalidate iterators.
+ */
+ protected int modCount = 0;
+
+ /**
+ * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements.
+ */
+ private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1;
+
+ /**
+ * Initial size of the 2-ary heap.
+ */
+ private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1;
+
+ /**
+ * Initial size of 4-ary heap when initialized.
+ *
+ * 21 = 4-ary heap of height 2: 1 + 4 + 4*4
+ *
+ * 85 = 4-ary heap of height 3: 21 + 4*4*4
+ *
+ * 341 = 4-ary heap of height 4: 85 + 4*4*4*4
+ *
+ * Since we last grew by 255 (to 511), let's use 341.
+ */
+ private final static int FOUR_HEAP_INITIAL_SIZE = 341;
+
+ /**
+ * Constructor, with default size.
+ */
+ public IntegerObjectMinHeap() {
+ super();
+ int[] twoheap = new int[TWO_HEAP_INITIAL_SIZE];
+ Object[] twovals = new Object[TWO_HEAP_INITIAL_SIZE];
+
+ this.twoheap = twoheap;
+ this.twovals = twovals;
+ this.fourheap = null;
+ this.fourvals = null;
+ this.size = 0;
+ this.modCount = 0;
+ }
+
+ /**
+ * Constructor, with given minimum size.
+ *
+ * @param minsize Minimum size
+ */
+ public IntegerObjectMinHeap(int minsize) {
+ super();
+ if (minsize < TWO_HEAP_MAX_SIZE) {
+ final int size = MathUtil.nextPow2Int(minsize + 1) - 1;
+ int[] twoheap = new int[size];
+ Object[] twovals = new Object[size];
+
+ this.twoheap = twoheap;
+ this.twovals = twovals;
+ this.fourheap = null;
+ this.fourvals = null;
+ } else {
+ int[] twoheap = new int[TWO_HEAP_INITIAL_SIZE];
+ Object[] twovals = new Object[TWO_HEAP_INITIAL_SIZE];
+ int[] fourheap = new int[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)];
+ Object[] fourvals = new Object[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)];
+ this.twoheap = twoheap;
+ this.twovals = twovals;
+ this.fourheap = fourheap;
+ this.fourvals = fourvals;
+ }
+ this.size = 0;
+ this.modCount = 0;
+ }
+
+ @Override
+ public void clear() {
+ size = 0;
+ ++modCount;
+ fourheap = null;
+ fourvals = null;
+ Arrays.fill(twoheap, 0);
+ Arrays.fill(twovals, null);
+ }
+
+ @Override
+ public int size() {
+ return size;
+ }
+
+ @Override
+ public boolean isEmpty() {
+ return (size == 0);
+ }
+
+ @Override
+ public void add(int o, V v) {
+ final int co = o;
+ final Object cv = (Object)v;
+ // System.err.println("Add: " + o);
+ if (size < TWO_HEAP_MAX_SIZE) {
+ if (size >= twoheap.length) {
+ // Grow by one layer.
+ twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1);
+ twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1);
+ }
+ final int twopos = size;
+ twoheap[twopos] = co;
+ twovals[twopos] = cv;
+ ++size;
+ heapifyUp2(twopos, co, cv);
+ ++modCount;
+ } else {
+ final int fourpos = size - TWO_HEAP_MAX_SIZE;
+ if (fourheap == null) {
+ fourheap = new int[FOUR_HEAP_INITIAL_SIZE];
+ fourvals = new Object[FOUR_HEAP_INITIAL_SIZE];
+ } else if (fourpos >= fourheap.length) {
+ // Grow extension heap by half.
+ fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1));
+ fourvals = Arrays.copyOf(fourvals, fourvals.length + (fourvals.length >> 1));
+ }
+ fourheap[fourpos] = co;
+ fourvals[fourpos] = cv;
+ ++size;
+ heapifyUp4(fourpos, co, cv);
+ ++modCount;
+ }
+ }
+
+ @Override
+ public void add(int key, V val, int max) {
+ if (size < max) {
+ add(key, val);
+ } else if (twoheap[0] <= key) {
+ replaceTopElement(key, val);
+ }
+ }
+
+ @Override
+ public void replaceTopElement(int reinsert, V val) {
+ heapifyDown(reinsert, (Object)val);
+ ++modCount;
+ }
+
+ /**
+ * Heapify-Up method for 2-ary heap.
+ *
+ * @param twopos Position in 2-ary heap.
+ * @param cur Current object
+ * @param val Current value
+ */
+ private void heapifyUp2(int twopos, int cur, Object val) {
+ while (twopos > 0) {
+ final int parent = (twopos - 1) >>> 1;
+ int par = twoheap[parent];
+ if (cur >= par) {
+ break;
+ }
+ twoheap[twopos] = par;
+ twovals[twopos] = twovals[parent];
+ twopos = parent;
+ }
+ twoheap[twopos] = cur;
+ twovals[twopos] = val;
+ }
+
+ /**
+ * Heapify-Up method for 4-ary heap.
+ *
+ * @param fourpos Position in 4-ary heap.
+ * @param cur Current object
+ * @param val Current value
+ */
+ private void heapifyUp4(int fourpos, int cur, Object val) {
+ while (fourpos > 0) {
+ final int parent = (fourpos - 1) >> 2;
+ int par = fourheap[parent];
+ if (cur >= par) {
+ break;
+ }
+ fourheap[fourpos] = par;
+ fourvals[fourpos] = fourvals[parent];
+ fourpos = parent;
+ }
+ if (fourpos == 0 && twoheap[0] > cur) {
+ fourheap[0] = twoheap[0];
+ fourvals[0] = twovals[0];
+ twoheap[0] = cur;
+ twovals[0] = val;
+ } else {
+ fourheap[fourpos] = cur;
+ fourvals[fourpos] = val;
+ }
+ }
+
+ @Override
+ public void poll() {
+ --size;
+ // Replacement object:
+ if (size >= TWO_HEAP_MAX_SIZE) {
+ final int last = size - TWO_HEAP_MAX_SIZE;
+ final int reinsert = fourheap[last];
+ final Object reinsertv = fourvals[last];
+ fourheap[last] = 0;
+ fourvals[last] = null;
+ heapifyDown(reinsert, reinsertv);
+ } else if (size > 0) {
+ final int reinsert = twoheap[size];
+ final Object reinsertv = twovals[size];
+ twoheap[size] = 0;
+ twovals[size] = null;
+ heapifyDown(reinsert, reinsertv);
+ } else {
+ twoheap[0] = 0;
+ twovals[0] = null;
+ }
+ ++modCount;
+ }
+
+ /**
+ * Invoke heapify-down for the root object.
+ *
+ * @param reinsert Object to insert.
+ * @param val Value to reinsert.
+ */
+ private void heapifyDown(int reinsert, Object val) {
+ if (size > TWO_HEAP_MAX_SIZE) {
+ // Special case: 3-ary situation.
+ final int best = (twoheap[1] <= twoheap[2]) ? 1 : 2;
+ if (fourheap[0] < twoheap[best]) {
+ twoheap[0] = fourheap[0];
+ twovals[0] = fourvals[0];
+ heapifyDown4(0, reinsert, val);
+ } else {
+ twoheap[0] = twoheap[best];
+ twovals[0] = twovals[best];
+ heapifyDown2(best, reinsert, val);
+ }
+ return;
+ }
+ heapifyDown2(0, reinsert, val);
+ }
+
+ /**
+ * Heapify-Down for 2-ary heap.
+ *
+ * @param twopos Position in 2-ary heap.
+ * @param cur Current object
+ * @param val Value to reinsert.
+ */
+ private void heapifyDown2(int twopos, int cur, Object val) {
+ final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1;
+ while (twopos < stop) {
+ int bestchild = (twopos << 1) + 1;
+ int best = twoheap[bestchild];
+ final int right = bestchild + 1;
+ if (right < size && best > twoheap[right]) {
+ bestchild = right;
+ best = twoheap[right];
+ }
+ if (cur <= best) {
+ break;
+ }
+ twoheap[twopos] = best;
+ twovals[twopos] = twovals[bestchild];
+ twopos = bestchild;
+ }
+ twoheap[twopos] = cur;
+ twovals[twopos] = val;
+ }
+
+ /**
+ * Heapify-Down for 4-ary heap.
+ *
+ * @param fourpos Position in 4-ary heap.
+ * @param cur Current object
+ * @param val Value to reinsert.
+ */
+ private void heapifyDown4(int fourpos, int cur, Object val) {
+ final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2;
+ while (fourpos < stop) {
+ final int child = (fourpos << 2) + 1;
+ int best = fourheap[child];
+ int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE;
+ if (size > minsize) {
+ int nextchild = fourheap[candidate];
+ if (best > nextchild) {
+ bestchild = candidate;
+ best = nextchild;
+ }
+
+ minsize += 2;
+ if (size >= minsize) {
+ nextchild = fourheap[++candidate];
+ if (best > nextchild) {
+ bestchild = candidate;
+ best = nextchild;
+ }
+
+ if (size > minsize) {
+ nextchild = fourheap[++candidate];
+ if (best > nextchild) {
+ bestchild = candidate;
+ best = nextchild;
+ }
+ }
+ }
+ }
+ if (cur <= best) {
+ break;
+ }
+ fourheap[fourpos] = best;
+ fourvals[fourpos] = fourvals[bestchild];
+ fourpos = bestchild;
+ }
+ fourheap[fourpos] = cur;
+ fourvals[fourpos] = val;
+ }
+
+ @Override
+ public int peekKey() {
+ return twoheap[0];
+ }
+
+ @Override
+ @SuppressWarnings("unchecked")
+ public V peekValue() {
+ return (V)twovals[0];
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder buf = new StringBuilder();
+ buf.append(IntegerObjectMinHeap.class.getSimpleName()).append(" [");
+ for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) {
+ buf.append(iter.getKey()).append(':').append(iter.getValue()).append(',');
+ }
+ buf.append(']');
+ return buf.toString();
+ }
+
+ @Override
+ public UnsortedIter unsortedIter() {
+ return new UnsortedIter();
+ }
+
+ /**
+ * Unsorted iterator - in heap order. Does not poll the heap.
+ *
+ * Use this class as follows:
+ *
+ * <pre>
+ * {@code
+ * for (IntegerObjectHeap.UnsortedIter<V> iter = heap.unsortedIter(); iter.valid(); iter.next()) {
+ * doSomething(iter.get());
+ * }
+ * }
+ * </pre>
+ *
+ * @author Erich Schubert
+ */
+ private class UnsortedIter implements IntegerObjectHeap.UnsortedIter<V> {
+ /**
+ * Iterator position.
+ */
+ protected int pos = 0;
+
+ /**
+ * Modification counter we were initialized at.
+ */
+ protected final int myModCount = modCount;
+
+ @Override
+ public boolean valid() {
+ if (modCount != myModCount) {
+ throw new ConcurrentModificationException();
+ }
+ return pos < size;
+ }
+
+ @Override
+ public void advance() {
+ pos++;
+ }
+
+ @Override
+ public int getKey() {
+ return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]);
+ }
+
+ @SuppressWarnings("unchecked")
+
+ @Override
+ public V getValue() {
+ return (V)((pos < TWO_HEAP_MAX_SIZE) ? twovals[pos] : fourvals[pos - TWO_HEAP_MAX_SIZE]);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerPriorityObject.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerPriorityObject.java
index 2014de65..f007b9fc 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerPriorityObject.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerPriorityObject.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ObjectHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ObjectHeap.java
index 2e20ed56..b5dbbb0e 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ObjectHeap.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ObjectHeap.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -23,53 +23,24 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import java.util.Arrays;
-
-import de.lmu.ifi.dbs.elki.math.MathUtil;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.iterator.Iter;
/**
- * Basic in-memory heap structure.
- *
- * This heap is built lazily: if you first add many elements, then poll the
- * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log
- * n). This is implemented via a simple validTo counter.
+ * Basic in-memory heap for K values.
*
* @author Erich Schubert
+ *
+ * @apiviz.has UnsortedIter
+ *
+ * @param <K> Key type
*/
-public abstract class ObjectHeap<K> extends AbstractHeap {
- /**
- * Heap storage: queue
- */
- protected transient Object[] queue;
-
- /**
- * Constructor with initial capacity.
- *
- * @param size initial capacity
- */
- public ObjectHeap(int size) {
- super();
- this.size = 0;
- this.queue = new Object[size];
- }
-
+public interface ObjectHeap<K> {
/**
* Add a key-value pair to the heap
*
* @param key Key
*/
- public void add(Object key) {
- // resize when needed
- if (size + 1 > queue.length) {
- resize(size + 1);
- }
- // final int pos = size;
- this.queue[size] = key;
- this.size += 1;
- heapifyUp(size - 1, key);
- validSize += 1;
- heapModified();
- }
+ void add(K key);
/**
* Add a key-value pair to the heap, except if the new element is larger than
@@ -78,13 +49,7 @@ public abstract class ObjectHeap<K> extends AbstractHeap {
* @param key Key
* @param max Maximum size of heap
*/
- public void add(Object key, int max) {
- if (size < max) {
- add(key);
- } else if (comp(key, peek())) {
- replaceTopElement(key);
- }
- }
+ void add(K key, int max);
/**
* Combined operation that removes the top element, and inserts a new element
@@ -93,175 +58,69 @@ public abstract class ObjectHeap<K> extends AbstractHeap {
* @param e New element to insert
* @return Previous top element of the heap
*/
- @SuppressWarnings("unchecked")
- public Object replaceTopElement(Object e) {
- ensureValid();
- Object oldroot = (K) queue[0];
- heapifyDown(0, e);
- heapModified();
- return oldroot;
- }
+ K replaceTopElement(K e);
/**
* Get the current top key
*
* @return Top key
*/
- @SuppressWarnings("unchecked")
- public Object peek() {
- if (size == 0) {
- throw new ArrayIndexOutOfBoundsException("Peek() on an empty heap!");
- }
- ensureValid();
- return (K) queue[0];
- }
+ K peek();
/**
* Remove the first element
*
* @return Top element
*/
- public Object poll() {
- return removeAt(0);
- }
+ K poll();
/**
- * Repair the heap
+ * Delete all elements from the heap.
*/
- protected void ensureValid() {
- if (validSize != size) {
- if (size > 1) {
- // Parent of first invalid
- int nextmin = validSize > 0 ? ((validSize - 1) >>> 1) : 0;
- int curmin = MathUtil.nextAllOnesInt(nextmin); // Next line
- int nextmax = curmin - 1; // End of valid line
- int pos = (size - 2) >>> 1; // Parent of last element
- // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin+", "+nextmin);
- while (pos >= nextmin) {
- // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin);
- while (pos >= curmin) {
- if (!heapifyDown(pos, queue[pos])) {
- final int parent = (pos - 1) >>> 1;
- if (parent < curmin) {
- nextmin = Math.min(nextmin, parent);
- nextmax = Math.max(nextmax, parent);
- }
- }
- pos--;
- }
- curmin = nextmin;
- pos = Math.min(pos, nextmax);
- nextmax = -1;
- }
- }
- validSize = size;
- }
- }
+ void clear();
/**
- * Remove the element at the given position.
+ * Query the size
*
- * @param pos Element position.
- * @return Removed element
+ * @return Size
*/
- @SuppressWarnings("unchecked")
- protected Object removeAt(int pos) {
- if (pos < 0 || pos >= size) {
- return null;
- }
- final Object top = (K) queue[0];
- // Replacement object:
- final Object reinkey = queue[size - 1];
- // Keep heap in sync
- if (validSize == size) {
- size -= 1;
- validSize -= 1;
- heapifyDown(pos, reinkey);
- } else {
- size -= 1;
- validSize = Math.min(pos >>> 1, validSize);
- queue[pos] = reinkey;
- }
- heapModified();
- return top;
- }
+ public int size();
/**
- * Execute a "Heapify Upwards" aka "SiftUp". Used in insertions.
+ * Is the heap empty?
*
- * @param pos insertion position
- * @param curkey Current key
+ * @return {@code true} when the size is 0.
*/
- protected void heapifyUp(int pos, Object curkey) {
- while (pos > 0) {
- final int parent = (pos - 1) >>> 1;
- Object parkey = queue[parent];
-
- if (comp(curkey, parkey)) { // Compare
- break;
- }
- queue[pos] = parkey;
- pos = parent;
- }
- queue[pos] = curkey;
- }
+ public boolean isEmpty();
/**
- * Execute a "Heapify Downwards" aka "SiftDown". Used in deletions.
+ * Get an unsorted iterator to inspect the heap.
*
- * @param ipos re-insertion position
- * @param curkey Current key
- * @return true when the order was changed
+ * @return Iterator
*/
- protected boolean heapifyDown(final int ipos, Object curkey) {
- int pos = ipos;
- final int half = size >>> 1;
- while (pos < half) {
- // Get left child (must exist!)
- int cpos = (pos << 1) + 1;
- Object chikey = queue[cpos];
- // Test right child, if present
- final int rchild = cpos + 1;
- if (rchild < size) {
- Object right = queue[rchild];
- if (comp(chikey, right)) { // Compare
- cpos = rchild;
- chikey = right;
- }
- }
-
- if (comp(chikey, curkey)) { // Compare
- break;
- }
- queue[pos] = chikey;
- pos = cpos;
- }
- queue[pos] = curkey;
- return (pos == ipos);
- }
+ UnsortedIter<K> unsortedIter();
/**
- * Test whether we need to resize to have the requested capacity.
+ * Unsorted iterator - in heap order. Does not poll the heap.
*
- * @param requiredSize required capacity
- */
- protected final void resize(int requiredSize) {
- queue = Arrays.copyOf(queue, desiredSize(requiredSize, queue.length));
- }
-
- /**
- * Delete all elements from the heap.
+ * <pre>
+ * {@code
+ * for (ObjectHeap.UnsortedIter<K> iter = heap.unsortedIter(); iter.valid(); iter.next()) {
+ * doSomething(iter.get());
+ * }
+ * }
+ * </pre>
+ *
+ * @author Erich Schubert
+ *
+ * @param <K> Key type
*/
- @Override
- public void clear() {
- super.clear();
- for (int i = 0; i < size; i++) {
- queue[i] = null;
- }
+ public static interface UnsortedIter<K> extends Iter {
+ /**
+ * Get the iterators current object.
+ *
+ * @return Current object
+ */
+ K get();
}
-
- /**
- * Compare two objects
- */
- abstract protected boolean comp(Object o1, Object o2);
}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TiedTopBoundedHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TiedTopBoundedHeap.java
index 2daaafa4..32f57999 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TiedTopBoundedHeap.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TiedTopBoundedHeap.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -25,11 +25,8 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
import java.util.ArrayList;
import java.util.Comparator;
-import java.util.Iterator;
import java.util.List;
-import de.lmu.ifi.dbs.elki.utilities.iterator.MergedIterator;
-
/**
* A size-limited heap similar to {@link TopBoundedHeap}, discarding elements
* with the highest value. However, this variation keeps a list of tied
@@ -43,7 +40,7 @@ public class TiedTopBoundedHeap<E> extends TopBoundedHeap<E> {
/**
* List to keep ties in.
*/
- private List<E> ties = new ArrayList<E>();
+ private List<E> ties = new ArrayList<>();
/**
* Constructor with comparator.
@@ -75,12 +72,6 @@ public class TiedTopBoundedHeap<E> extends TopBoundedHeap<E> {
ties.clear();
}
- @SuppressWarnings("unchecked")
- @Override
- public Iterator<E> iterator() {
- return new MergedIterator<E>(ties.iterator(), super.iterator());
- }
-
@Override
public E peek() {
if (ties.isEmpty()) {
@@ -131,4 +122,44 @@ public class TiedTopBoundedHeap<E> extends TopBoundedHeap<E> {
ties.clear();
}
}
+
+ /**
+ * Get an unordered heap iterator.
+ *
+ * @return Iterator.
+ */
+ @Override
+ public UnorderedIter unorderedIter() {
+ return new UnorderedIter();
+ }
+
+ /**
+ * Unordered heap iterator class.
+ *
+ * @author Erich Schubert
+ *
+ */
+ public class UnorderedIter extends Heap<E>.UnorderedIter {
+ /**
+ * Constructor.
+ */
+ protected UnorderedIter() {
+ super();
+ }
+
+ @Override
+ public boolean valid() {
+ return pos < size();
+ }
+
+ @Override
+ public E get() {
+ final int ssize = TiedTopBoundedHeap.super.size();
+ if (pos < ssize) {
+ return super.get();
+ } else {
+ return ties.get(pos - ssize);
+ }
+ }
+ }
}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TiedTopBoundedUpdatableHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TiedTopBoundedUpdatableHeap.java
index 8e39af1d..3905030f 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TiedTopBoundedUpdatableHeap.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TiedTopBoundedUpdatableHeap.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -29,7 +29,6 @@ import java.util.Iterator;
import java.util.List;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
-import de.lmu.ifi.dbs.elki.utilities.iterator.MergedIterator;
/**
* A size-limited heap similar to {@link TopBoundedHeap}, discarding elements
@@ -44,7 +43,7 @@ public class TiedTopBoundedUpdatableHeap<E> extends TopBoundedUpdatableHeap<E> {
/**
* List to keep ties in.
*/
- private List<E> ties = new ArrayList<E>();
+ private List<E> ties = new ArrayList<>();
/**
* Constructor with comparator.
@@ -76,12 +75,6 @@ public class TiedTopBoundedUpdatableHeap<E> extends TopBoundedUpdatableHeap<E> {
ties.clear();
}
- @SuppressWarnings("unchecked")
- @Override
- public Iterator<E> iterator() {
- return new MergedIterator<E>(ties.iterator(), super.iterator());
- }
-
@Override
public void offerAt(int pos, E e) {
if(pos == IN_TIES) {
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TopBoundedHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TopBoundedHeap.java
index 07b595f6..9adda9f3 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TopBoundedHeap.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TopBoundedHeap.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -69,7 +69,6 @@ public class TopBoundedHeap<E> extends Heap<E> {
return;
}
// Peek at the top element, return if we are worse.
- ensureValid();
final int comp;
if (comparator == null) {
@SuppressWarnings("unchecked")
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TopBoundedUpdatableHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TopBoundedUpdatableHeap.java
index 75f2abcf..4a591d4c 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TopBoundedUpdatableHeap.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TopBoundedUpdatableHeap.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -68,7 +68,6 @@ public class TopBoundedUpdatableHeap<E> extends UpdatableHeap<E> {
super.offerAt(pos, e);
return;
}
- ensureValid();
if (compare(e, queue[0]) < 0) {
// while we did not change, this still was "successful".
return;
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/UpdatableHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/UpdatableHeap.java
index 1ab5f4df..a585d94d 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/UpdatableHeap.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/UpdatableHeap.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -49,7 +49,7 @@ public class UpdatableHeap<O> extends Heap<O> {
/**
* Holds the indices in the heap of each element.
*/
- protected final TObjectIntMap<Object> index = new TObjectIntHashMap<Object>(100, 0.5f, NO_VALUE);
+ protected final TObjectIntMap<Object> index = new TObjectIntHashMap<>(100, 0.5f, NO_VALUE);
/**
* Simple constructor with default size.
@@ -105,43 +105,32 @@ public class UpdatableHeap<O> extends Heap<O> {
* @param e Element
*/
protected void offerAt(final int pos, O e) {
- if(pos == NO_VALUE) {
+ if (pos == NO_VALUE) {
// resize when needed
- if(size + 1 > queue.length) {
+ if (size + 1 > queue.length) {
resize(size + 1);
}
- // final int pos = size;
- this.queue[size] = e;
index.put(e, size);
- size += 1;
- // We do NOT YET update the heap. This is done lazily.
+ size++;
+ heapifyUp(size - 1, e);
heapModified();
return;
- }
- else {
+ } else {
assert (pos >= 0) : "Unexpected negative position.";
assert (queue[pos].equals(e));
// Did the value improve?
- if(comparator == null) {
+ if (comparator == null) {
@SuppressWarnings("unchecked")
Comparable<Object> c = (Comparable<Object>) e;
- if(c.compareTo(queue[pos]) >= 0) {
+ if (c.compareTo(queue[pos]) >= 0) {
return;
}
- }
- else {
- if(comparator.compare(e, queue[pos]) >= 0) {
+ } else {
+ if (comparator.compare(e, queue[pos]) >= 0) {
return;
}
}
- if(pos >= validSize) {
- queue[pos] = e;
- // validSize = Math.min(pos, validSize);
- }
- else {
- // ensureValid();
- heapifyUp(pos, e);
- }
+ heapifyUp(pos, e);
heapModified();
return;
}
@@ -149,7 +138,7 @@ public class UpdatableHeap<O> extends Heap<O> {
@Override
protected O removeAt(int pos) {
- if(pos < 0 || pos >= size) {
+ if (pos < 0 || pos >= size) {
return null;
}
@SuppressWarnings("unchecked")
@@ -158,34 +147,22 @@ public class UpdatableHeap<O> extends Heap<O> {
final Object reinsert = queue[size - 1];
queue[size - 1] = null;
// Keep heap in sync?
- if(validSize == size) {
- size -= 1;
- validSize -= 1;
- if(comparator != null) {
- if(comparator.compare(ret, reinsert) > 0) {
- heapifyUpComparator(pos, reinsert);
- }
- else {
- heapifyDownComparator(pos, reinsert);
- }
+ size--;
+ if (comparator != null) {
+ if (comparator.compare(ret, reinsert) > 0) {
+ heapifyUpComparator(pos, reinsert);
+ } else {
+ heapifyDownComparator(pos, reinsert);
}
- else {
- @SuppressWarnings("unchecked")
- Comparable<Object> comp = (Comparable<Object>) ret;
- if(comp.compareTo(reinsert) > 0) {
- heapifyUpComparable(pos, reinsert);
- }
- else {
- heapifyDownComparable(pos, reinsert);
- }
+ } else {
+ @SuppressWarnings("unchecked")
+ Comparable<Object> comp = (Comparable<Object>) ret;
+ if (comp.compareTo(reinsert) > 0) {
+ heapifyUpComparable(pos, reinsert);
+ } else {
+ heapifyDownComparable(pos, reinsert);
}
}
- else {
- size -= 1;
- validSize = Math.min(pos >>> 1, validSize);
- queue[pos] = reinsert;
- index.put(reinsert, pos);
- }
heapModified();
// Keep index up to date
index.remove(ret);
@@ -200,10 +177,9 @@ public class UpdatableHeap<O> extends Heap<O> {
*/
public O removeObject(O e) {
int pos = index.get(e);
- if(pos >= 0) {
+ if (pos >= 0) {
return removeAt(pos);
- }
- else {
+ } else {
return null;
}
}
@@ -214,7 +190,7 @@ public class UpdatableHeap<O> extends Heap<O> {
index.remove(node);
return node;
}
-
+
@Override
public O replaceTopElement(O e) {
O node = super.replaceTopElement(e);
@@ -232,11 +208,11 @@ public class UpdatableHeap<O> extends Heap<O> {
@SuppressWarnings("unchecked")
protected void heapifyUpComparable(int pos, Object elem) {
final Comparable<Object> cur = (Comparable<Object>) elem; // queue[pos];
- while(pos > 0) {
+ while (pos > 0) {
final int parent = (pos - 1) >>> 1;
Object par = queue[parent];
- if(cur.compareTo(par) >= 0) {
+ if (cur.compareTo(par) >= 0) {
break;
}
queue[pos] = par;
@@ -255,11 +231,11 @@ public class UpdatableHeap<O> extends Heap<O> {
*/
@Override
protected void heapifyUpComparator(int pos, Object cur) {
- while(pos > 0) {
+ while (pos > 0) {
final int parent = (pos - 1) >>> 1;
Object par = queue[parent];
- if(comparator.compare(cur, par) >= 0) {
+ if (comparator.compare(cur, par) >= 0) {
break;
}
queue[pos] = par;
@@ -276,21 +252,21 @@ public class UpdatableHeap<O> extends Heap<O> {
Comparable<Object> cur = (Comparable<Object>) reinsert;
int pos = ipos;
final int half = size >>> 1;
- while(pos < half) {
+ while (pos < half) {
// Get left child (must exist!)
int cpos = (pos << 1) + 1;
Object child = queue[cpos];
// Test right child, if present
final int rchild = cpos + 1;
- if(rchild < size) {
+ if (rchild < size) {
Object right = queue[rchild];
- if(((Comparable<Object>) child).compareTo(right) > 0) {
+ if (((Comparable<Object>) child).compareTo(right) > 0) {
cpos = rchild;
child = right;
}
}
- if(cur.compareTo(child) <= 0) {
+ if (cur.compareTo(child) <= 0) {
break;
}
queue[pos] = child;
@@ -299,32 +275,32 @@ public class UpdatableHeap<O> extends Heap<O> {
}
queue[pos] = cur;
index.put(cur, pos);
- return (pos == ipos);
+ return (pos != ipos);
}
@Override
protected boolean heapifyDownComparator(final int ipos, Object cur) {
int pos = ipos;
final int half = size >>> 1;
- while(pos < half) {
+ while (pos < half) {
int min = pos;
Object best = cur;
final int lchild = (pos << 1) + 1;
Object left = queue[lchild];
- if(comparator.compare(best, left) > 0) {
+ if (comparator.compare(best, left) > 0) {
min = lchild;
best = left;
}
final int rchild = lchild + 1;
- if(rchild < size) {
+ if (rchild < size) {
Object right = queue[rchild];
- if(comparator.compare(best, right) > 0) {
+ if (comparator.compare(best, right) > 0) {
min = rchild;
best = right;
}
}
- if(min == pos) {
+ if (min == pos) {
break;
}
queue[pos] = best;
@@ -333,6 +309,6 @@ public class UpdatableHeap<O> extends Heap<O> {
}
queue[pos] = cur;
index.put(cur, pos);
- return (pos == ipos);
+ return (pos != ipos);
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/package-info.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/package-info.java
index 3f193171..83be37f4 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/package-info.java
@@ -5,7 +5,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2012
+Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/HashMapHierarchy.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/HashMapHierarchy.java
new file mode 100644
index 00000000..c77a9329
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/HashMapHierarchy.java
@@ -0,0 +1,580 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Iterator;
+
+/**
+ * Centralized hierarchy implementation, using a HashMap of Lists.
+ *
+ * @author Erich Schubert
+ *
+ * @param <O> Object type (arbitrary!)
+ */
+public class HashMapHierarchy<O> implements ModifiableHierarchy<O> {
+ /**
+ * Reference storage.
+ */
+ final private HashMap<O, Rec<O>> graph;
+
+ /**
+ * Constructor.
+ */
+ public HashMapHierarchy() {
+ super();
+ this.graph = new HashMap<>();
+ }
+
+ @Override
+ public int size() {
+ return graph.size();
+ }
+
+ @Override
+ public void add(O parent, O child) {
+ // Add child to parent.
+ {
+ Rec<O> rec = graph.get(parent);
+ if (rec == null) {
+ rec = new Rec<>();
+ graph.put(parent, rec);
+ }
+ rec.addChild(child);
+ }
+ // Add child to parent
+ {
+ Rec<O> rec = graph.get(child);
+ if (rec == null) {
+ rec = new Rec<>();
+ graph.put(child, rec);
+ }
+ rec.addParent(parent);
+ }
+ }
+
+ @Override
+ public void add(O entry) {
+ Rec<O> rec = graph.get(entry);
+ if (rec == null) {
+ rec = new Rec<>();
+ graph.put(entry, rec);
+ }
+ }
+
+ @Override
+ public void remove(O parent, O child) {
+ // Remove child from parent.
+ {
+ Rec<O> rec = graph.get(parent);
+ if (rec != null) {
+ rec.removeChild(child);
+ }
+ }
+ // Remove parent from child
+ {
+ Rec<O> rec = graph.get(child);
+ if (rec != null) {
+ rec.removeParent(parent);
+ }
+ }
+ }
+
+ @Override
+ public void remove(O entry) {
+ Rec<O> rec = graph.get(entry);
+ if (rec == null) {
+ return;
+ }
+ for (int i = 0; i < rec.nump; i++) {
+ graph.get(rec.parents[i]).removeChild(entry);
+ rec.parents[i] = null;
+ }
+ for (int i = 0; i < rec.numc; i++) {
+ graph.get(rec.children[i]).removeParent(entry);
+ rec.children[i] = null;
+ }
+ graph.remove(entry);
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public void removeSubtree(O entry) {
+ Rec<O> rec = graph.get(entry);
+ if (rec == null) {
+ return;
+ }
+ for (int i = 0; i < rec.nump; i++) {
+ graph.get(rec.parents[i]).removeChild(entry);
+ rec.parents[i] = null;
+ }
+ for (int i = 0; i < rec.numc; i++) {
+ final Rec<O> crec = graph.get(rec.children[i]);
+ crec.removeParent(entry);
+ if (crec.nump == 0) {
+ removeSubtree((O) rec.children[i]);
+ }
+ rec.children[i] = null;
+ }
+ }
+
+ @Override
+ public int numChildren(O obj) {
+ Rec<O> rec = graph.get(obj);
+ if (rec == null) {
+ return 0;
+ }
+ return rec.numc;
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public Iter<O> iterChildren(O obj) {
+ Rec<O> rec = graph.get(obj);
+ if (rec == null) {
+ return (Iter<O>) EMPTY_ITERATOR;
+ }
+ return rec.iterChildren();
+ }
+
+ @Override
+ public Iter<O> iterDescendants(O obj) {
+ return new ItrDesc(obj);
+ }
+
+ @Override
+ public int numParents(O obj) {
+ Rec<O> rec = graph.get(obj);
+ if (rec == null) {
+ return 0;
+ }
+ return rec.nump;
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public Iter<O> iterParents(O obj) {
+ Rec<O> rec = graph.get(obj);
+ if (rec == null) {
+ return (Iter<O>) EMPTY_ITERATOR;
+ }
+ return rec.iterParents();
+ }
+
+ @Override
+ public Iter<O> iterAncestors(O obj) {
+ return new ItrAnc(obj);
+ }
+
+ @Override
+ public Iter<O> iterAll() {
+ return new ItrAll();
+ }
+
+ /**
+ * Hierarchy pointers for an object.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ *
+ * @param <O> object type
+ */
+ private static class Rec<O> {
+ /**
+ * Number of parents, number of children.
+ */
+ int nump = 0, numc = 0;
+
+ /**
+ * Parents.
+ */
+ Object[] parents = null;
+
+ /**
+ * Children.
+ */
+ Object[] children = null;
+
+ /**
+ * Add a parent.
+ *
+ * @param parent Parent to add.
+ */
+ void addParent(O parent) {
+ if (parents == null) {
+ parents = new Object[1];
+ parents[0] = parent;
+ nump = 1;
+ } else {
+ for (int i = 0; i < nump; i++) {
+ if (parent.equals(parents[i])) {
+ return;
+ }
+ }
+ if (parents.length == nump) {
+ final int newsize = Math.min(5, (parents.length << 1) + 1);
+ parents = Arrays.copyOf(parents, newsize);
+ }
+ parents[nump] = parent;
+ nump++;
+ }
+ }
+
+ /**
+ * Add a child.
+ *
+ * @param child Child to add
+ */
+ void addChild(O child) {
+ if (children == null) {
+ children = new Object[5];
+ children[0] = child;
+ numc = 1;
+ } else {
+ for (int i = 0; i < numc; i++) {
+ if (child.equals(children[i])) {
+ return;
+ }
+ }
+ if (children.length == numc) {
+ children = Arrays.copyOf(children, (children.length << 1) + 1);
+ }
+ children[numc] = child;
+ numc++;
+ }
+ }
+
+ /**
+ * Remove a parent.
+ *
+ * @param parent Parent to remove.
+ */
+ void removeParent(O parent) {
+ if (parents == null) {
+ return;
+ }
+ for (int i = 0; i < nump; i++) {
+ if (parent.equals(parents[i])) {
+ System.arraycopy(parents, i + 1, parents, i, nump - 1 - i);
+ parents[nump] = null;
+ nump--;
+ break;
+ }
+ }
+ if (nump == 0) {
+ parents = null;
+ }
+ }
+
+ /**
+ * Remove a child.
+ *
+ * @param child Child to remove.
+ */
+ void removeChild(O child) {
+ if (children == null) {
+ return;
+ }
+ for (int i = 0; i < numc; i++) {
+ if (child.equals(children[i])) {
+ System.arraycopy(children, i + 1, children, i, numc - 1 - i);
+ children[numc] = null;
+ numc--;
+ break;
+ }
+ }
+ if (numc == 0) {
+ children = null;
+ }
+ }
+
+ /**
+ * Iterate over parents.
+ *
+ * @return Iterator for parents.
+ */
+ @SuppressWarnings("unchecked")
+ public Iter<O> iterParents() {
+ if (nump == 0) {
+ return (Iter<O>) EMPTY_ITERATOR;
+ }
+ return new ItrParents();
+ }
+
+ /**
+ * Iterate over parents.
+ *
+ * @return Iterator for parents.
+ */
+ @SuppressWarnings("unchecked")
+ public Iter<O> iterChildren() {
+ if (numc == 0) {
+ return (Iter<O>) EMPTY_ITERATOR;
+ }
+ return new ItrChildren();
+ }
+
+ /**
+ * Parent iterator.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ class ItrParents implements Iter<O> {
+ int pos = 0;
+
+ @Override
+ public boolean valid() {
+ return pos < nump;
+ }
+
+ @Override
+ public void advance() {
+ pos++;
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public O get() {
+ return (O) parents[pos];
+ }
+ }
+
+ /**
+ * Child iterator.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ class ItrChildren implements Iter<O> {
+ int pos = 0;
+
+ @Override
+ public boolean valid() {
+ return pos < numc;
+ }
+
+ @Override
+ public void advance() {
+ pos++;
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public O get() {
+ return (O) children[pos];
+ }
+ }
+ }
+
+ /**
+ * Iterator to collect into the descendants.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ private class ItrDesc implements Iter<O> {
+ /**
+ * Iterator over children
+ */
+ final Iter<O> childiter;
+
+ /**
+ * Iterator of current child
+ */
+ Iter<O> subiter = null;
+
+ /**
+ * Starting element.
+ *
+ * @param start
+ */
+ ItrDesc(O start) {
+ childiter = iterChildren(start);
+ }
+
+ @Override
+ public boolean valid() {
+ return childiter.valid() || (subiter != null && subiter.valid());
+ }
+
+ @Override
+ public void advance() {
+ if (subiter == null) { // Not yet descended
+ assert (childiter.valid());
+ subiter = iterDescendants(childiter.get());
+ } else { // Continue with subtree
+ subiter.advance();
+ }
+ if (subiter.valid()) {
+ return;
+ }
+ // Proceed to next child.
+ childiter.advance();
+ subiter = null;
+ }
+
+ @Override
+ public O get() {
+ if (subiter != null) {
+ assert (subiter.valid());
+ return subiter.get();
+ } else {
+ assert (childiter.valid());
+ return childiter.get();
+ }
+ }
+ }
+
+ /**
+ * Iterator over all Ancestors.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ private class ItrAnc implements Iter<O> {
+ /**
+ * Iterator over children
+ */
+ final Iter<O> parentiter;
+
+ /**
+ * Iterator of current child
+ */
+ Iter<O> subiter = null;
+
+ /**
+ * Starting element.
+ *
+ * @param start
+ */
+ ItrAnc(O start) {
+ parentiter = iterParents(start);
+ }
+
+ @Override
+ public boolean valid() {
+ return parentiter.valid() || (subiter != null && subiter.valid());
+ }
+
+ @Override
+ public void advance() {
+ if (subiter == null) { // Not yet descended
+ assert (parentiter.valid());
+ subiter = iterAncestors(parentiter.get());
+ } else { // Continue with subtree
+ subiter.advance();
+ }
+ if (subiter.valid()) {
+ return;
+ }
+ // Proceed to next child.
+ parentiter.advance();
+ subiter = null;
+ }
+
+ @Override
+ public O get() {
+ if (subiter != null) {
+ assert (subiter.valid());
+ return subiter.get();
+ } else {
+ assert (parentiter.valid());
+ return parentiter.get();
+ }
+ }
+ }
+
+ /**
+ * Iterator over all members of the hierarchy.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ private class ItrAll implements Iter<O> {
+ /**
+ * The true iterator.
+ */
+ final Iterator<O> iter;
+
+ /**
+ * Current object.
+ */
+ O cur = null;
+
+ /**
+ * Constructor.
+ */
+ ItrAll() {
+ iter = graph.keySet().iterator();
+ advance();
+ }
+
+ @Override
+ public boolean valid() {
+ return cur != null;
+ }
+
+ @Override
+ public void advance() {
+ if (iter.hasNext()) {
+ cur = iter.next();
+ } else {
+ cur = null;
+ }
+ }
+
+ @Override
+ public O get() {
+ return cur;
+ }
+ }
+
+ /**
+ * Empty iterator.
+ */
+ private static final Iter<?> EMPTY_ITERATOR = new Iter<Object>() {
+ @Override
+ public boolean valid() {
+ return false;
+ }
+
+ @Override
+ public void advance() {
+ throw new UnsupportedOperationException("Empty iterators must not be advanced.");
+ }
+
+ @Override
+ public Object get() {
+ throw new UnsupportedOperationException("Iterator is empty.");
+ }
+ };
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/Hierarchical.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/Hierarchical.java
deleted file mode 100644
index 29909069..00000000
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/Hierarchical.java
+++ /dev/null
@@ -1,90 +0,0 @@
-package de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy;
-
-/*
- This file is part of ELKI:
- Environment for Developing KDD-Applications Supported by Index-Structures
-
- Copyright (C) 2012
- Ludwig-Maximilians-Universität München
- Lehr- und Forschungseinheit für Datenbanksysteme
- ELKI Development Team
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-import java.util.Iterator;
-import java.util.List;
-
-
-/**
- * Interface for objects with an <b>internal</b> hierarchy interface.
- *
- * Note that the object can chose to delegate the hierarchy to an external hierarchy.
- *
- * @author Erich Schubert
- *
- * @param <O> Object type in hierarchy
- */
-public interface Hierarchical<O> {
- /**
- * Test for hierarchical properties
- *
- * @return hierarchical data model.
- */
- public boolean isHierarchical();
-
- /**
- * Get number of children
- *
- * @return number of children
- */
- public int numChildren();
-
- /**
- * Get children list. Resulting list MAY be modified. Result MAY be null, if
- * the model is not hierarchical.
- *
- * @return list of children
- */
- public List<O> getChildren();
-
- /**
- * Iterate descendants (recursive children)
- *
- * @return iterator for descendants
- */
- public Iterator<O> iterDescendants();
-
- /**
- * Get number of parents
- *
- * @return number of parents
- */
- public int numParents();
-
- /**
- * Get parents list. Resulting list MAY be modified. Result MAY be null, if
- * the model is not hierarchical.
- *
- * @return list of parents
- */
- public List<O> getParents();
-
- /**
- * Iterate ancestors (recursive parents)
- *
- * @return iterator for ancestors
- */
- public Iterator<O> iterAncestors();
-} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/Hierarchy.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/Hierarchy.java
index 0a16e9b7..fec9c7b4 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/Hierarchy.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/Hierarchy.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -23,38 +23,40 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import java.util.Iterator;
-import java.util.List;
-
/**
* This interface represents an (external) hierarchy of objects. It can contain
* arbitrary objects, BUT the hierarchy has to be accessed using the hierarchy
- * object, i.e. {@code hierarchy.getChildren(object);}.
- *
- * See {@link Hierarchical} for an interface for objects with an internal
- * hierarchy (where you can use {@code object.getChildren();})
+ * object, i.e. {@code hierarchy.iterChildren(object);}.
*
* @author Erich Schubert
*
+ * @apiviz.has Iter
+ *
* @param <O> Object type
*/
public interface Hierarchy<O> {
/**
+ * Total size - number of objects contained.
+ *
+ * @return Size
+ */
+ int size();
+
+ /**
* Get number of children
*
* @param self object to get number of children for
* @return number of children
*/
- public int numChildren(O self);
+ int numChildren(O self);
/**
- * Get children list. Resulting list MAY be modified. Result MAY be null, if
- * the model is not hierarchical.
+ * Iterate over the (direct) children.
*
* @param self object to get children for
- * @return list of children
+ * @return iterator for children
*/
- public List<O> getChildren(O self);
+ Iter<O> iterChildren(O self);
/**
* Iterate descendants (recursive children)
@@ -62,7 +64,7 @@ public interface Hierarchy<O> {
* @param self object to get descendants for
* @return iterator for descendants
*/
- public Iterator<O> iterDescendants(O self);
+ Iter<O> iterDescendants(O self);
/**
* Get number of (direct) parents
@@ -70,16 +72,15 @@ public interface Hierarchy<O> {
* @param self reference object
* @return number of parents
*/
- public int numParents(O self);
+ int numParents(O self);
/**
- * Get parents list. Resulting list MAY be modified. Result MAY be null, if
- * the model is not hierarchical.
+ * Iterate over the (direct) parents.
*
* @param self object to get parents for
- * @return list of parents
+ * @return iterator of parents
*/
- public List<O> getParents(O self);
+ Iter<O> iterParents(O self);
/**
* Iterate ancestors (recursive parents)
@@ -87,5 +88,30 @@ public interface Hierarchy<O> {
* @param self object to get ancestors for
* @return iterator for ancestors
*/
- public Iterator<O> iterAncestors(O self);
-} \ No newline at end of file
+ Iter<O> iterAncestors(O self);
+
+ /**
+ * Iterate over all members.
+ *
+ * @return Iterator over all members.
+ */
+ Iter<O> iterAll();
+
+ /**
+ * Iterator interface.
+ *
+ * TODO: add a skipSubtree method?
+ *
+ * @author Erich Schubert
+ *
+ * @param <O> Object type.
+ */
+ static interface Iter<O> extends de.lmu.ifi.dbs.elki.utilities.datastructures.iterator.Iter {
+ /**
+ * Access the current object.
+ *
+ * @return Current object
+ */
+ O get();
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/HierarchyHashmapList.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/HierarchyHashmapList.java
deleted file mode 100644
index bd6d67bf..00000000
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/HierarchyHashmapList.java
+++ /dev/null
@@ -1,299 +0,0 @@
-package de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy;
-
-/*
- This file is part of ELKI:
- Environment for Developing KDD-Applications Supported by Index-Structures
-
- Copyright (C) 2012
- Ludwig-Maximilians-Universität München
- Lehr- und Forschungseinheit für Datenbanksysteme
- ELKI Development Team
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.LinkedList;
-import java.util.List;
-
-import de.lmu.ifi.dbs.elki.logging.LoggingUtil;
-import de.lmu.ifi.dbs.elki.utilities.iterator.EmptyIterator;
-
-/**
- * Centralized hierarchy implementation, using a HashMap of Lists.
- *
- * @author Erich Schubert
- *
- * @param <O> Object type (arbitrary!)
- */
-public class HierarchyHashmapList<O> implements ModifiableHierarchy<O> {
- /**
- * The data storage for parents
- */
- final private HashMap<O, List<O>> pmap;
-
- /**
- * The data storage for children
- */
- final private HashMap<O, List<O>> cmap;
-
- /**
- * Constructor
- */
- public HierarchyHashmapList() {
- super();
- this.pmap = new HashMap<O, List<O>>();
- this.cmap = new HashMap<O, List<O>>();
- }
-
- @Override
- public void add(O parent, O child) {
- // Add child to parent.
- {
- List<O> pchi = this.cmap.get(parent);
- if(pchi == null) {
- pchi = new LinkedList<O>();
- this.cmap.put(parent, pchi);
- }
- if(!pchi.contains(child)) {
- pchi.add(child);
- } else {
- LoggingUtil.warning("Result added twice: "+parent+" -> "+child, new Throwable());
- }
- }
- // Add child to parent
- {
- List<O> cpar = this.pmap.get(child);
- if(cpar == null) {
- cpar = new LinkedList<O>();
- this.pmap.put(child, cpar);
- }
- if(!cpar.contains(parent)) {
- cpar.add(parent);
- } else {
- LoggingUtil.warning("Result added twice: "+parent+" <- "+child, new Throwable());
- }
- }
- }
-
- @Override
- public void remove(O parent, O child) {
- // Remove child from parent.
- {
- List<O> pchi = this.cmap.get(parent);
- if(pchi != null) {
- while(pchi.remove(child)) {
- // repeat - remove all instances
- }
- if(pchi.size() == 0) {
- this.cmap.remove(parent);
- }
- }
- }
- // Remove parent from child
- {
- List<O> cpar = this.pmap.get(child);
- if(cpar != null) {
- while(cpar.remove(parent)) {
- // repeat - remove all instances
- }
- if(cpar.size() == 0) {
- this.pmap.remove(child);
- }
- }
- }
- }
-
- /**
- * Put an object along with parent and child lists.
- *
- * @param obj Object
- * @param parents Parent list
- * @param children Child list
- */
- public void put(O obj, List<O> parents, List<O> children) {
- this.pmap.put(obj, parents);
- this.cmap.put(obj, children);
- }
-
- @Override
- public int numChildren(O obj) {
- List<O> children = this.cmap.get(obj);
- if(children == null) {
- return 0;
- }
- return children.size();
- }
-
- @Override
- public List<O> getChildren(O obj) {
- List<O> children = this.cmap.get(obj);
- if(children == null) {
- return Collections.emptyList();
- }
- return children;
- }
-
- @Override
- public Iterator<O> iterDescendants(O obj) {
- return new ItrDesc(obj);
- }
-
- @Override
- public int numParents(O obj) {
- List<O> parents = this.pmap.get(obj);
- if(parents == null) {
- return 0;
- }
- return parents.size();
- }
-
- @Override
- public List<O> getParents(O obj) {
- List<O> parents = this.pmap.get(obj);
- if(parents == null) {
- return Collections.emptyList();
- }
- return parents;
- }
-
- @Override
- public Iterator<O> iterAncestors(O obj) {
- return new ItrAnc(obj);
- }
-
- /**
- * Iterator to collect into the descendants.
- *
- * @author Erich Schubert
- *
- * @apiviz.exclude
- */
- private class ItrDesc implements Iterator<O> {
- /**
- * Starting object (for cloning);
- */
- final O start;
-
- /**
- * Iterator over children
- */
- final Iterator<O> childiter;
-
- /**
- * Iterator of current child
- */
- Iterator<O> subiter;
-
- public ItrDesc(O start) {
- this.start = start;
- List<O> children = getChildren(start);
- if(children != null) {
- this.childiter = children.iterator();
- }
- else {
- this.childiter = EmptyIterator.STATIC();
- }
- this.subiter = null;
- }
-
- @Override
- public boolean hasNext() {
- if(subiter != null && subiter.hasNext()) {
- return true;
- }
- return childiter.hasNext();
- }
-
- @Override
- public O next() {
- // Try nested iterator first ...
- if(subiter != null && subiter.hasNext()) {
- return subiter.next();
- }
- // Next direct child, update subiter.
- final O child = childiter.next();
- subiter = iterDescendants(child);
- return child;
- }
-
- @Override
- public void remove() {
- throw new UnsupportedOperationException();
- }
- }
-
- /**
- * Iterator over all Ancestors.
- *
- * @author Erich Schubert
- *
- * @apiviz.exclude
- */
- private class ItrAnc implements Iterator<O> {
- /**
- * Starting object (for cloning);
- */
- final O start;
-
- /**
- * Iterator over parents
- */
- final Iterator<O> parentiter;
-
- /**
- * Iterator of current parent
- */
- Iterator<O> subiter;
-
- public ItrAnc(O start) {
- this.start = start;
- List<O> parents = getParents(start);
- if(parents != null) {
- this.parentiter = parents.iterator();
- }
- else {
- this.parentiter = EmptyIterator.STATIC();
- }
- this.subiter = null;
- }
-
- @Override
- public boolean hasNext() {
- if(subiter != null && subiter.hasNext()) {
- return true;
- }
- return parentiter.hasNext();
- }
-
- @Override
- public O next() {
- // Try nested iterator first ...
- if(subiter != null && subiter.hasNext()) {
- return subiter.next();
- }
- // Next direct parent, update subiter.
- final O parent = parentiter.next();
- subiter = iterAncestors(parent);
- return parent;
- }
-
- @Override
- public void remove() {
- throw new UnsupportedOperationException();
- }
- }
-} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/HierarchyReferenceLists.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/HierarchyReferenceLists.java
deleted file mode 100644
index 76091298..00000000
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/HierarchyReferenceLists.java
+++ /dev/null
@@ -1,232 +0,0 @@
-package de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy;
-
-/*
- This file is part of ELKI:
- Environment for Developing KDD-Applications Supported by Index-Structures
-
- Copyright (C) 2012
- Ludwig-Maximilians-Universität München
- Lehr- und Forschungseinheit für Datenbanksysteme
- ELKI Development Team
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-import java.util.Iterator;
-import java.util.List;
-
-import de.lmu.ifi.dbs.elki.utilities.iterator.EmptyIterator;
-
-/**
- * Hierarchy implementation with a per-object representation.
- *
- * @author Erich Schubert
- *
- * @apiviz.uses Hierarchical
- *
- * @param <O> Type of objects in hierarchy
- */
-public class HierarchyReferenceLists<O extends Hierarchical<O>> implements Hierarchy<O> {
- /**
- * Owner
- */
- protected O owner;
-
- /**
- * Storage for children
- */
- protected List<O> children;
-
- /**
- * Storage for parents
- */
- protected List<O> parents;
-
- /**
- * Constructor for hierarchy object.
- *
- * @param owner owning cluster.
- * @param children child clusters. May be null.
- * @param parents parent clusters. May be null.
- */
- public HierarchyReferenceLists(O owner, List<O> children, List<O> parents) {
- super();
- this.owner = owner;
- this.children = children;
- this.parents = parents;
- }
-
- @Override
- public int numChildren(O self) {
- if(owner != self) {
- throw new UnsupportedOperationException("Decentral hierarchy queried for wrong object!");
- }
- if(children == null) {
- return 0;
- }
- return children.size();
- }
-
- @Override
- public List<O> getChildren(O self) {
- if(owner != self) {
- throw new UnsupportedOperationException("Decentral hierarchy queried for wrong object!");
- }
- return children;
- }
-
- @Override
- public Iterator<O> iterDescendants(O self) {
- if(owner != self) {
- return EmptyIterator.STATIC();
- }
- if (children == null) {
- return EmptyIterator.STATIC();
- }
- return new ItrDesc(self);
- }
-
- @Override
- public int numParents(O self) {
- if(owner != self) {
- throw new UnsupportedOperationException("Decentral hierarchy queried for wrong object!");
- }
- if (parents == null) {
- return 0;
- }
- return parents.size();
- }
-
- /**
- * Return parents
- */
- @Override
- public List<O> getParents(O self) {
- if(owner != self) {
- throw new UnsupportedOperationException("Decentral hierarchy queried for wrong object!");
- }
- return parents;
- }
-
- @Override
- public Iterator<O> iterAncestors(O self) {
- if(owner != self) {
- throw new UnsupportedOperationException("Decentral hierarchy queried for wrong object!");
- }
- if (parents == null) {
- return EmptyIterator.STATIC();
- }
- return new ItrAnc(self);
- }
-
- /**
- * Iterator to collect into the descendants.
- *
- * @author Erich Schubert
- *
- * @apiviz.exclude
- */
- private class ItrDesc implements Iterator<O> {
- /**
- * Iterator over children
- */
- final Iterator<O> childiter;
-
- /**
- * Iterator of current child
- */
- Iterator<O> subiter;
-
- public ItrDesc(O start) {
- assert (start == owner);
- this.childiter = children.iterator();
- this.subiter = null;
- }
-
- @Override
- public boolean hasNext() {
- if(subiter != null && subiter.hasNext()) {
- return true;
- }
- return childiter.hasNext();
- }
-
- @Override
- public O next() {
- // Try nested iterator first ...
- if(subiter != null && subiter.hasNext()) {
- return subiter.next();
- }
- // Next direct child, update subiter.
- final O child = childiter.next();
- subiter = child.iterDescendants();
- return child;
- }
-
- @Override
- public void remove() {
- throw new UnsupportedOperationException();
- }
- }
-
- /**
- * Iterator over all Ancestors.
- *
- * @author Erich Schubert
- *
- * @apiviz.exclude
- */
- private class ItrAnc implements Iterator<O> {
- /**
- * Iterator over parents
- */
- final Iterator<O> parentiter;
-
- /**
- * Iterator of current parent
- */
- Iterator<O> subiter;
-
- public ItrAnc(O start) {
- assert (start == owner);
- this.parentiter = parents.iterator();
- this.subiter = null;
- }
-
- @Override
- public boolean hasNext() {
- if(subiter != null && subiter.hasNext()) {
- return true;
- }
- return parentiter.hasNext();
- }
-
- @Override
- public O next() {
- // Try nested iterator first ...
- if(subiter != null && subiter.hasNext()) {
- return subiter.next();
- }
- // Next direct parent, update subiter.
- final O parent = parentiter.next();
- subiter = parent.iterAncestors();
- return parent;
- }
-
- @Override
- public void remove() {
- throw new UnsupportedOperationException();
- }
- }
-} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/ModifiableHierarchy.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/ModifiableHierarchy.java
index dadc6f66..06001d6b 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/ModifiableHierarchy.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/ModifiableHierarchy.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -23,8 +23,6 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-
-
/**
* Modifiable Hierarchy.
*
@@ -39,8 +37,14 @@ public interface ModifiableHierarchy<O> extends Hierarchy<O> {
* @param parent Parent
* @param child Child
*/
- // TODO: return true when new?
- public void add(O parent, O child);
+ void add(O parent, O child);
+
+ /**
+ * Add an entry (initializes data structures).
+ *
+ * @param entry Entry
+ */
+ void add(O entry);
/**
* Remove a parent-child relationship.
@@ -48,6 +52,20 @@ public interface ModifiableHierarchy<O> extends Hierarchy<O> {
* @param parent Parent
* @param child Child
*/
- // TODO: return true when found?
- public void remove(O parent, O child);
+ void remove(O parent, O child);
+
+ /**
+ * Remove an entry and all its parent-child relationships.
+ *
+ * @param entry Entry
+ */
+ void remove(O entry);
+
+ /**
+ * Remove an entry and it's whole subtree (unless the elements are reachable
+ * by a different path!)
+ *
+ * @param entry Entry
+ */
+ void removeSubtree(O entry);
}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/package-info.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/package-info.java
index 0aba31be..965b15fc 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/package-info.java
@@ -5,7 +5,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2012
+Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractObjDynamicHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractObjDynamicHistogram.java
index 9d0dba0d..165c2c8b 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractObjDynamicHistogram.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractObjDynamicHistogram.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -127,21 +127,36 @@ public abstract class AbstractObjDynamicHistogram<T> extends AbstractObjStaticHi
// Store in cache
if (cachefill >= 0) {
if (cachefill < cacheposs.length) {
-
cacheposs[cachefill] = coord;
cachevals[cachefill] = cloneForCache(value);
- cachefill++;
+ ++cachefill;
return;
- } else {
- materialize();
- // But continue below!
}
}
- // Check if we need to resample to accomodate this bin.
- testResample(coord);
- // super class will handle histogram resizing / shifting
- T exist = get(coord);
- data[getBinNr(coord)] = aggregate(exist, value);
+ if (coord == Double.NEGATIVE_INFINITY) {
+ aggregateSpecial(value, 0);
+ } else if (coord == Double.POSITIVE_INFINITY) {
+ aggregateSpecial(value, 1);
+ } else if (Double.isNaN(coord)) {
+ aggregateSpecial(value, 2);
+ } else {
+ // super class will handle histogram resizing / shifting
+ T exist = get(coord);
+ data[getBinNr(coord)] = aggregate(exist, value);
+ }
+ }
+
+ /**
+ * Aggregate for a special value.
+ *
+ * @param value Parameter value
+ * @param bin Special bin index.
+ */
+ protected void aggregateSpecial(T value, int bin) {
+ final T exist = getSpecial(bin);
+ // Note: do not inline above accessor, as getSpecial will initialize the
+ // special variable used below!
+ special[bin] = aggregate(exist, value);
}
/**
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractObjStaticHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractObjStaticHistogram.java
index c1882302..4a1649af 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractObjStaticHistogram.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractObjStaticHistogram.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -35,6 +35,16 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram;
*/
public abstract class AbstractObjStaticHistogram<T> extends AbstractStaticHistogram implements ObjHistogram<T> {
/**
+ * Data store
+ */
+ Object[] data;
+
+ /**
+ * Special value storage: infinity, NaN
+ */
+ Object[] special = null;
+
+ /**
* Constructor.
*
* @param bins Number of bins
@@ -46,15 +56,13 @@ public abstract class AbstractObjStaticHistogram<T> extends AbstractStaticHistog
if (bins >= 0) {
// -1 will be used by FlexiHistogram to delay initialization.
data = new Object[bins];
+ for (int i = 0; i < bins; i++) {
+ data[i] = makeObject();
+ }
}
}
/**
- * Data store
- */
- Object[] data;
-
- /**
* Access the value of a bin with new data.
*
* @param coord Coordinate
@@ -62,6 +70,15 @@ public abstract class AbstractObjStaticHistogram<T> extends AbstractStaticHistog
*/
@SuppressWarnings("unchecked")
public T get(double coord) {
+ if (coord == Double.NEGATIVE_INFINITY) {
+ return getSpecial(0);
+ }
+ if (coord == Double.POSITIVE_INFINITY) {
+ return getSpecial(1);
+ }
+ if (Double.isNaN(coord)) {
+ return getSpecial(2);
+ }
int bin = getBinNr(coord);
if (bin < 0) {
if (size - bin > data.length) {
@@ -103,6 +120,19 @@ public abstract class AbstractObjStaticHistogram<T> extends AbstractStaticHistog
}
/**
+ * Ensure that we have storage for special values (infinity, NaN)
+ *
+ * @param idx Index to return.
+ */
+ @SuppressWarnings("unchecked")
+ protected T getSpecial(int idx) {
+ if (special == null) {
+ special = new Object[] { makeObject(), makeObject(), makeObject() };
+ }
+ return (T) special[idx];
+ }
+
+ /**
* Class to make a new object for the data store.
*
* @return New instance.
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractStaticHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractStaticHistogram.java
index 799ac009..3363e61e 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractStaticHistogram.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractStaticHistogram.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleArrayStaticHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleArrayStaticHistogram.java
index aeba3c4b..86b53d03 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleArrayStaticHistogram.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleArrayStaticHistogram.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2012
+Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleDynamicHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleDynamicHistogram.java
index 77a1f9e4..84f97dfe 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleDynamicHistogram.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleDynamicHistogram.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleHistogram.java
index d5cee785..e4a24c95 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleHistogram.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleHistogram.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleStaticHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleStaticHistogram.java
index db839d10..5a634cf2 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleStaticHistogram.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleStaticHistogram.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatDynamicHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatDynamicHistogram.java
index a14ed00a..9829eaf8 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatDynamicHistogram.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatDynamicHistogram.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatHistogram.java
index f5a65bfa..7f034152 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatHistogram.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatHistogram.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatStaticHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatStaticHistogram.java
index b3f41994..063bd80a 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatStaticHistogram.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatStaticHistogram.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/Histogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/Histogram.java
index 75be6830..8c8d9a87 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/Histogram.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/Histogram.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -23,7 +23,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import de.lmu.ifi.dbs.elki.utilities.iterator.ArrayIter;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.iterator.ArrayIter;
/**
* Abstract API for histograms. Without specific type information, to allow this
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntArrayStaticHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntArrayStaticHistogram.java
index 8d00604b..ff9a82aa 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntArrayStaticHistogram.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntArrayStaticHistogram.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2012
+Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntDynamicHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntDynamicHistogram.java
index 0967ebd5..b131af7d 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntDynamicHistogram.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntDynamicHistogram.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntHistogram.java
index 9ec4ec56..9bfae100 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntHistogram.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntHistogram.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntStaticHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntStaticHistogram.java
index d4de36d7..7b1eed94 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntStaticHistogram.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntStaticHistogram.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongArrayStaticHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongArrayStaticHistogram.java
index efbf751f..e3580792 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongArrayStaticHistogram.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongArrayStaticHistogram.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongDynamicHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongDynamicHistogram.java
index 676a5e8f..93c4eee5 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongDynamicHistogram.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongDynamicHistogram.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongHistogram.java
index 9be15e65..16577c38 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongHistogram.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongHistogram.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongStaticHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongStaticHistogram.java
index 63e15599..b270908d 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongStaticHistogram.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongStaticHistogram.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/MeanVarianceStaticHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/MeanVarianceStaticHistogram.java
index 2a464382..0f1ea0a3 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/MeanVarianceStaticHistogram.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/MeanVarianceStaticHistogram.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ObjHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ObjHistogram.java
index ac4d4e4b..bad4eec1 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ObjHistogram.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ObjHistogram.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortDynamicHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortDynamicHistogram.java
index ff94928b..a49810ee 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortDynamicHistogram.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortDynamicHistogram.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortHistogram.java
index 699df896..0b83bc4c 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortHistogram.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortHistogram.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortStaticHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortStaticHistogram.java
index b2809e1e..2819d966 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortStaticHistogram.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortStaticHistogram.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/package-info.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/package-info.java
index 65dd6446..cee1836b 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/package-info.java
@@ -13,7 +13,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/iterator/ArrayIter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/iterator/ArrayIter.java
new file mode 100644
index 00000000..7b2a96ad
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/iterator/ArrayIter.java
@@ -0,0 +1,59 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.iterator;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/**
+ * Array iterators can also go backwards and seek.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.excludeSubtypes
+ */
+public interface ArrayIter extends Iter {
+ /**
+ * Get current iterator offset.
+ *
+ * @return Iterator position
+ */
+ public int getOffset();
+
+ /**
+ * Moves the iterator forward or backward by the given offset.
+ *
+ * @param count offset to move forward or backwards
+ */
+ public void advance(int count);
+
+ /**
+ * Moves the iterator backward to the previous entry.
+ */
+ public void retract();
+
+ /**
+ * Moves the iterator to the given position
+ *
+ * @param off Seek offset
+ */
+ public void seek(int off);
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/iterator/ArrayListIter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/iterator/ArrayListIter.java
new file mode 100644
index 00000000..820217ec
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/iterator/ArrayListIter.java
@@ -0,0 +1,99 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.iterator;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.List;
+
+/**
+ * ELKI style Iterator for array lists.
+ *
+ * Note: this implementation is only efficient for lists with efficient random
+ * access and seeking (i.e. ArrayLists, but not Linked Lists!)
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.excludeSubtypes
+ *
+ * @param <O> contained object type.
+ */
+public class ArrayListIter<O> implements ArrayIter {
+ /**
+ * The array list to iterate over.
+ */
+ final List<O> data;
+
+ /**
+ * Current position.
+ */
+ int pos = 0;
+
+ /**
+ * Constructor.
+ *
+ * @param data Data array.
+ */
+ public ArrayListIter(List<O> data) {
+ super();
+ this.data = data;
+ }
+
+ @Override
+ public boolean valid() {
+ return pos < data.size();
+ }
+
+ @Override
+ public void advance() {
+ pos++;
+ }
+
+ @Override
+ public int getOffset() {
+ return pos;
+ }
+
+ @Override
+ public void advance(int count) {
+ pos += count;
+ }
+
+ @Override
+ public void retract() {
+ pos--;
+ }
+
+ @Override
+ public void seek(int off) {
+ pos = off;
+ }
+
+ /**
+ * Get the current element.
+ *
+ * @return current element
+ */
+ public O get() {
+ return data.get(pos);
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/iterator/Iter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/iterator/Iter.java
new file mode 100644
index 00000000..3d111f14
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/iterator/Iter.java
@@ -0,0 +1,71 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.iterator;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/**
+ * Iterator interface for more than one return value.
+ *
+ * The Java standard {@link java.util.Iterator} interface has some drawbacks:
+ * <ul>
+ * <li>the only way to get the current value is to advance the iterator</li>
+ * <li>the iterator can only point to a single value</li>
+ * <li>the iterator can only return objects, not primitives</li>
+ * </ul>
+ *
+ * This iterator interface is a bit more flexible. For example on a distance
+ * list, we can have a single type of iterator that allows access to the
+ * distance, the object ID or the combination of both.
+ *
+ * In some situations, this can save the creation of many small objects, which
+ * put load on the garbage collector. This super interface does not have a "get"
+ * operation, which is to come from specialized interfaces instead.
+ *
+ * Usage example:
+ *
+ * <pre>
+ * {@code
+ * for (Iter iter = ids.iter(); iter.valid(); iter.advance()) {
+ * iter.doSomething();
+ * }
+ * }
+ * </pre>
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.landmark
+ * @apiviz.excludeSubtypes
+ */
+public interface Iter {
+ /**
+ * Returns true if the iterator currently points to a valid object.
+ *
+ * @return a <code>boolean</code> value, whether the position is valid.
+ */
+ public boolean valid();
+
+ /**
+ * Moves the iterator forward to the next entry.
+ */
+ public void advance();
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/iterator/MIter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/iterator/MIter.java
new file mode 100644
index 00000000..14e5443d
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/iterator/MIter.java
@@ -0,0 +1,54 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.iterator;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/**
+ * Modifiable iterator, that also supports removal.
+ *
+ * Usage example:
+ *
+ * <pre>
+ * {@code
+ * for (MIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ * if (testSomething(iter)) {
+ * iter.remove();
+ * continue; // Iterator may point to something else
+ * }
+ * }
+ * }
+ * </pre>
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.excludeSubtypes
+ */
+public interface MIter extends Iter {
+ /**
+ * Remove the object the iterator currently points to.
+ *
+ * Note that, usually, the iterator will now point to a different object, very
+ * often to the previous one (but this is not guaranteed!)
+ */
+ void remove();
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/iterator/package-info.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/iterator/package-info.java
new file mode 100644
index 00000000..d241fcc4
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/iterator/package-info.java
@@ -0,0 +1,40 @@
+/**
+ * <p>ELKI Iterator API.</p>
+ *
+ * <p>ELKI uses a custom iterator API instead of the usual {@link java.util.Iterator} classes (the "Java Collections API").
+ * The reason for this is largely efficiency. Benchmarking showed that the Java Iterator API can be quite expensive when dealing
+ * with primitive types, as {@link java.util.Iterator#next} is meant to always return an object.</p>
+ *
+ * <p>However, the benefits become more apparent when considering multi-valued iterators.
+ * For example an iterator over a k nearest neighbor set in ELKI both represents an object by its DBID,
+ * and a distance value. For double-valued distances, it can be retrieved using a primitive value getter
+ * (saving an extra object copy), and since the iterator can be used as a DBIDRef, it can also represent
+ * the current object without creating additional objects.</p>
+ *
+ * <p>While it may seem odd to depart from Java conventions such as the collections API,
+ * note that these iterators are very close to the standard C++ conventions, so nothing entirely unusual.
+ * Also the GNU trove libraries - used by ELKI in various places - use the same kind of iterators.</p>
+ */
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package de.lmu.ifi.dbs.elki.utilities.datastructures.iterator; \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/package-info.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/package-info.java
index ae8308af..a0d894a9 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/package-info.java
@@ -5,7 +5,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2012
+Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team