diff options
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/utilities')
264 files changed, 11073 insertions, 4988 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/InspectionUtilFrequentlyScanned.java b/src/de/lmu/ifi/dbs/elki/utilities/Alias.java index 11ea1da8..839fc406 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/InspectionUtilFrequentlyScanned.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/Alias.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,13 +23,25 @@ package de.lmu.ifi.dbs.elki.utilities; along with this program. If not, see <http://www.gnu.org/licenses/>. */ +import java.lang.annotation.Documented; +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; /** - * Marker interface for classes that are frequently scanned for by the UI - * (i.e. InspectionUtil) The classes will therefore be cached. + * This interface defines alias names for classes. * * @author Erich Schubert */ -public interface InspectionUtilFrequentlyScanned { - // empty -}
\ No newline at end of file +@Documented +@Retention(RetentionPolicy.RUNTIME) +@Target({ ElementType.TYPE }) +public @interface Alias { + /** + * Alias names of the class. + * + * @return Array of alias names + */ + String[] value(); +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/Base64.java b/src/de/lmu/ifi/dbs/elki/utilities/Base64.java index bcd4c8ff..c6a22f20 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/Base64.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/Base64.java @@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.utilities; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/BitsUtil.java b/src/de/lmu/ifi/dbs/elki/utilities/BitsUtil.java index ed00dbab..d1e0587c 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/BitsUtil.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/BitsUtil.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -969,19 +969,19 @@ public final class BitsUtil { * @return Position of first set bit, 64 if no set bit was found. */ public static int numberOfLeadingZeros(long v) { - return Long.SIZE - magnitude(v); + return Long.numberOfLeadingZeros(v); } /** - * Find the number of leading zeros; 64 if all zero + * Find the number of leading zeros; 32 if all zero * - * Note: this the same as {@link Long#numberOfLeadingZeros}. + * Note: this the same as {@link Integer#numberOfLeadingZeros}. * * @param v Bitset - * @return Position of first set bit, 64 if no set bit was found. + * @return Position of first set bit, 32 if no set bit was found. */ public static int numberOfLeadingZeros(int v) { - return Integer.SIZE - magnitude(v); + return Integer.numberOfLeadingZeros(v); } /** @@ -1114,34 +1114,7 @@ public final class BitsUtil { * @return position of highest bit set, or 0. */ public static int magnitude(long v) { - int log = 0, t; - if ((v & 0xffffffff00000000L) != 0) { - t = (int) (v >>>= 32); - log = 32; - } else { - t = (int) v; - } - if ((t & 0xffff0000) != 0) { - t >>>= 16; - log += 16; - } - if (t >= 256) { - t >>>= 8; - log += 8; - } - if (t >= 16) { - t >>>= 4; - log += 4; - } - if (t >= 4) { - t >>>= 2; - log += 2; - } - if (t >= 2) { - t >>>= 1; - log += 1; - } - return log + t; + return Long.SIZE - Long.numberOfLeadingZeros(v); } /** @@ -1151,28 +1124,7 @@ public final class BitsUtil { * @return position of highest bit set, or 0. */ public static int magnitude(int v) { - int log = 0; - if ((v & 0xffff0000) != 0) { - v >>>= 16; - log = 16; - } - if (v >= 256) { - v >>>= 8; - log += 8; - } - if (v >= 16) { - v >>>= 4; - log += 4; - } - if (v >= 4) { - v >>>= 2; - log += 2; - } - if (v >= 2) { - v >>>= 1; - log += 1; - } - return log + v; + return Integer.SIZE - Integer.numberOfLeadingZeros(v); } /** diff --git a/src/de/lmu/ifi/dbs/elki/utilities/ClassGenericsUtil.java b/src/de/lmu/ifi/dbs/elki/utilities/ClassGenericsUtil.java index 90b5ac62..a60bf15f 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/ClassGenericsUtil.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/ClassGenericsUtil.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -325,7 +325,7 @@ public final class ClassGenericsUtil { public static <T> ArrayList<T>[] newArrayOfEmptyArrayList(int len) { ArrayList[] result = new ArrayList[len]; for (int i = 0; i < len; i++) { - result[i] = new ArrayList<T>(); + result[i] = new ArrayList<>(); } return result; } @@ -344,7 +344,7 @@ public final class ClassGenericsUtil { public static <T> HashSet<T>[] newArrayOfEmptyHashSet(int len) { HashSet[] result = new HashSet[len]; for (int i = 0; i < len; i++) { - result[i] = new HashSet<T>(); + result[i] = new HashSet<>(); } return result; } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/DatabaseUtil.java b/src/de/lmu/ifi/dbs/elki/utilities/DatabaseUtil.java index 31e2431d..baa90829 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/DatabaseUtil.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/DatabaseUtil.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -58,6 +58,9 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair; * @author Erich Schubert * * @apiviz.landmark + * + * @apiviz.has RelationObjectIterator + * @apiviz.has CollectionFromRelation */ public final class DatabaseUtil { /** @@ -130,7 +133,7 @@ public final class DatabaseUtil { NumberVector.Factory<NV, ?> factory = RelationUtil.getNumberVectorFactory(relation); NV min = factory.newNumberVector(mins); NV max = factory.newNumberVector(maxs); - return new Pair<NV, NV>(min, max); + return new Pair<>(min, max); } /** @@ -261,7 +264,7 @@ public final class DatabaseUtil { * database */ public static SortedSet<ClassLabel> getClassLabels(Relation<? extends ClassLabel> database) { - SortedSet<ClassLabel> labels = new TreeSet<ClassLabel>(); + SortedSet<ClassLabel> labels = new TreeSet<>(); for (DBIDIter it = database.iterDBIDs(); it.valid(); it.advance()) { labels.add(database.get(it)); } @@ -305,7 +308,7 @@ public final class DatabaseUtil { * @return Superclass of all objects in the database */ public static <O> Class<?> getBaseObjectClassExpensive(Relation<O> database) { - List<Class<?>> candidates = new ArrayList<Class<?>>(); + List<Class<?>> candidates = new ArrayList<>(); DBIDIter iditer = database.iterDBIDs(); // empty database?! if (!iditer.valid()) { @@ -471,7 +474,7 @@ public final class DatabaseUtil { @Override public Iterator<O> iterator() { - return new DatabaseUtil.RelationObjectIterator<O>(db); + return new DatabaseUtil.RelationObjectIterator<>(db); } @Override diff --git a/src/de/lmu/ifi/dbs/elki/utilities/ELKIServiceLoader.java b/src/de/lmu/ifi/dbs/elki/utilities/ELKIServiceLoader.java index 129c6458..d42b2834 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/ELKIServiceLoader.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/ELKIServiceLoader.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -112,20 +112,19 @@ public class ELKIServiceLoader implements Iterator<Class<?>> { try { String fullName = PREFIX + parent.getName(); configfiles = cl.getResources(fullName); - } - catch(IOException x) { + } catch (IOException x) { throw new AbortException("Could not load service configuration files.", x); } } @Override public boolean hasNext() { - if(nextclass != null) { + if (nextclass != null) { return true; } // Find next iterator - while((curiter == null) || !curiter.hasNext()) { - if(!configfiles.hasMoreElements()) { + while ((curiter == null) || !curiter.hasNext()) { + if (!configfiles.hasMoreElements()) { return false; } curiter = parseFile(configfiles.nextElement()); @@ -135,58 +134,55 @@ public class ELKIServiceLoader implements Iterator<Class<?>> { } private Iterator<Class<?>> parseFile(URL nextElement) { - ArrayList<Class<?>> classes = new ArrayList<Class<?>>(); + ArrayList<Class<?>> classes = new ArrayList<>(); try { BufferedReader r = new BufferedReader(new InputStreamReader(nextElement.openStream(), "utf-8")); - while(parseLine(r.readLine(), classes, nextElement)) { + while (parseLine(r.readLine(), classes, nextElement)) { // Continue } - } - catch(IOException x) { + } catch (IOException x) { throw new AbortException("Error reading configuration file", x); } return classes.iterator(); } private boolean parseLine(String line, ArrayList<Class<?>> classes, URL nextElement) { - if(line == null) { + if (line == null) { return false; } // Ignore comments, trim whitespace { int begin = 0; int end = line.indexOf(COMMENT_CHAR); - if(end < 0) { + if (end < 0) { end = line.length(); } - while(begin < end && line.charAt(begin) == ' ') { + while (begin < end && line.charAt(begin) == ' ') { begin++; } - while(end - 1 > begin && line.charAt(end - 1) == ' ') { + while (end - 1 > begin && line.charAt(end - 1) == ' ') { end--; } - if(begin > 0 || end < line.length()) { + if (begin > 0 || end < line.length()) { line = line.substring(begin, end); } } - if(line.length() <= 0) { + if (line.length() <= 0) { return true; // Empty/comment lines are okay, continue } // Try to load the class try { Class<?> cls = cl.loadClass(line); // Should not happen. Check anyway. - if(cls == null) { + if (cls == null) { return true; } - if(parent.isAssignableFrom(cls)) { + if (parent.isAssignableFrom(cls)) { classes.add(cls); - } - else { + } else { LOG.warning("Class " + line + " does not implement " + parent + " but listed in service file " + nextElement); } - } - catch(ClassNotFoundException e) { + } catch (ClassNotFoundException e) { LOG.warning("Class not found: " + line + "; listed in service file " + nextElement, e); } return true; @@ -203,4 +199,4 @@ public class ELKIServiceLoader implements Iterator<Class<?>> { public void remove() { throw new UnsupportedOperationException(); } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/FileUtil.java b/src/de/lmu/ifi/dbs/elki/utilities/FileUtil.java index ef01e084..5c3c78b5 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/FileUtil.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/FileUtil.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -39,7 +39,7 @@ import java.util.zip.GZIPInputStream; public final class FileUtil { /** * Fake Constructor. Use static methods. - * + * */ private FileUtil() { // Do not instantiate. @@ -68,19 +68,19 @@ public final class FileUtil { * <code>null</code> */ public static String getFilenameExtension(String name) { - if(name == null) { + if (name == null) { return null; } int index = name.lastIndexOf('.'); - if(index >= name.length() - 1) { + if (index >= name.length() - 1) { return null; } return name.substring(name.lastIndexOf('.') + 1).toLowerCase(); } /** - * Try to open a file, first trying the file system, - * then falling back to the classpath. + * Try to open a file, first trying the file system, then falling back to the + * classpath. * * @param filename File name in system notation * @return Input stream @@ -89,8 +89,7 @@ public final class FileUtil { public static InputStream openSystemFile(String filename) throws FileNotFoundException { try { return new FileInputStream(filename); - } - catch(FileNotFoundException e) { + } catch (FileNotFoundException e) { // try with classloader String resname = filename.replace(File.separatorChar, '/'); InputStream result = ClassLoader.getSystemResourceAsStream(resname); @@ -116,7 +115,7 @@ public final class FileUtil { PushbackInputStream pb = new PushbackInputStream(in, 16); in = pb; // read a magic from the file header - byte[] magic = {0, 0}; + byte[] magic = { 0, 0 }; pb.read(magic); pb.unread(magic); if (magic[0] == 31 && magic[1] == -117) { @@ -145,55 +144,73 @@ public final class FileUtil { public static File locateFile(String name, String basedir) { // Try exact match first. File f = new File(name); - if(f.exists()) { + if (f.exists()) { return f; } // Try with base directory - if(basedir != null) { + if (basedir != null) { f = new File(basedir, name); // logger.warning("Trying: "+f.getAbsolutePath()); - if(f.exists()) { + if (f.exists()) { return f; } } // try stripping whitespace { String name2 = name.trim(); - if(!name.equals(name2)) { + if (!name.equals(name2)) { // logger.warning("Trying without whitespace."); f = locateFile(name2, basedir); - if(f != null) { + if (f != null) { return f; } } } // try substituting path separators { - String name2 = name.replace('/',File.separatorChar); + String name2 = name.replace('/', File.separatorChar); if (!name.equals(name2)) { // logger.warning("Trying with replaced separators."); f = locateFile(name2, basedir); - if(f != null) { + if (f != null) { return f; } } - name2 = name.replace('\\',File.separatorChar); + name2 = name.replace('\\', File.separatorChar); if (!name.equals(name2)) { // logger.warning("Trying with replaced separators."); f = locateFile(name2, basedir); - if(f != null) { + if (f != null) { return f; } } } // try stripping extra characters, such as quotes. - if(name.length() > 2 && name.charAt(0) == '"' && name.charAt(name.length() - 1) == '"') { + if (name.length() > 2 && name.charAt(0) == '"' && name.charAt(name.length() - 1) == '"') { // logger.warning("Trying without quotes."); f = locateFile(name.substring(1, name.length() - 1), basedir); - if(f != null) { + if (f != null) { return f; } } return null; } + + /** + * Load an input stream (e.g. a Java resource) into a String buffer. The + * stream is closed afterwards. + * + * @param is Input stream + * @return String with file/resource contents. + * @throws IOException on IO errors + */ + public static String slurp(InputStream is) throws IOException { + StringBuilder buf = new StringBuilder(); + final byte[] b = new byte[4096]; + for (int n; (n = is.read(b)) != -1;) { + buf.append(new String(b, 0, n)); + } + is.close(); + return buf.toString(); + } } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/FormatUtil.java b/src/de/lmu/ifi/dbs/elki/utilities/FormatUtil.java index e4431fd5..08b7bd0d 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/FormatUtil.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/FormatUtil.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -72,7 +72,7 @@ public final class FormatUtil { static { NF.setMinimumFractionDigits(0); - NF.setMaximumFractionDigits(10); + NF.setMaximumFractionDigits(8); NF.setGroupingUsed(false); NF0.setMinimumFractionDigits(0); NF0.setMaximumFractionDigits(0); @@ -721,7 +721,7 @@ public final class FormatUtil { * @return String representation of this Matrix */ public static String format(Matrix m) { - return format(m, FormatUtil.NF8); + return format(m, FormatUtil.NF); } /** @@ -741,7 +741,7 @@ public final class FormatUtil { * @return String representation of this Vector */ public static String format(Vector m) { - return format(m, FormatUtil.NF8); + return format(m, FormatUtil.NF); } /** @@ -871,7 +871,7 @@ public final class FormatUtil { * @return string fragments */ public static List<String> splitAtLastBlank(String s, int width) { - List<String> chunks = new ArrayList<String>(); + List<String> chunks = new ArrayList<>(); String tmp = s; while (tmp.length() > 0) { diff --git a/src/de/lmu/ifi/dbs/elki/utilities/HandlerList.java b/src/de/lmu/ifi/dbs/elki/utilities/HandlerList.java index 8b5fcaca..fa17e04f 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/HandlerList.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/HandlerList.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -42,7 +42,7 @@ public final class HandlerList<H> { * List with registered Handlers. The list is kept in backwards order, that is * the later entrys take precedence. */ - private ArrayList<Pair<Class<?>, H>> handlers = new ArrayList<Pair<Class<?>, H>>(); + private ArrayList<Pair<Class<?>, H>> handlers = new ArrayList<>(); /** * Insert a handler to the beginning of the stack. diff --git a/src/de/lmu/ifi/dbs/elki/utilities/InspectionUtil.java b/src/de/lmu/ifi/dbs/elki/utilities/InspectionUtil.java index aedc4e7c..0d147420 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/InspectionUtil.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/InspectionUtil.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -28,6 +28,7 @@ import gnu.trove.set.hash.THashSet; import java.io.File; import java.io.IOException; import java.lang.reflect.Modifier; +import java.net.URISyntaxException; import java.net.URL; import java.util.ArrayList; import java.util.Collections; @@ -36,12 +37,8 @@ import java.util.Enumeration; import java.util.Iterator; import java.util.List; import java.util.WeakHashMap; -import java.util.jar.JarEntry; -import java.util.jar.JarFile; import de.lmu.ifi.dbs.elki.logging.Logging; -import de.lmu.ifi.dbs.elki.logging.LoggingUtil; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ClassParameter; import de.lmu.ifi.dbs.elki.utilities.pairs.Pair; /** @@ -58,6 +55,11 @@ public class InspectionUtil { private static final Logging LOG = Logging.getLogger(InspectionUtil.class); /** + * Class loader + */ + private static final ClassLoader CLASSLOADER = ClassLoader.getSystemClassLoader(); + + /** * Default package ignores. */ private static final String[] DEFAULT_IGNORES = { @@ -77,6 +79,8 @@ public class InspectionUtil { "spin.", "osxadapter.", "antlr.", "ca.odell.", "com.jgoodies.", "com.michaelbaranov.", "com.mysql.", "gnu.dtools.", "net.sf.ext.", "net.sf.jabref.", "org.antlr.", "org.gjt.", "org.java.plugin.", "org.jempbox.", "org.pdfbox.", "wsi.ra.", // GNU trove "gnu.trove.", + // Java OpenGL + "jogamp.", "com.jogamp.", "javax.media.", "jogl.util." // }; @@ -86,12 +90,17 @@ public class InspectionUtil { */ public static final boolean NONSTATIC_CLASSPATH; + /** + * Factory class postfix. + */ + public static final String FACTORY_POSTFIX = "$Factory"; + // Check for non-jar entries in classpath. static { String[] classpath = System.getProperty("java.class.path").split(System.getProperty("path.separator")); boolean hasnonstatic = false; - for(String path : classpath) { - if(!path.endsWith(".jar")) { + for (String path : classpath) { + if (!path.endsWith(".jar")) { hasnonstatic = true; } } @@ -101,7 +110,7 @@ public class InspectionUtil { /** * Weak hash map for class lookups */ - private static WeakHashMap<Class<?>, List<Class<?>>> CLASS_CACHE = new WeakHashMap<Class<?>, List<Class<?>>>(); + private static WeakHashMap<Class<?>, List<Class<?>>> CLASS_CACHE = new WeakHashMap<>(); /** * (Non-weak) cache for all "frequently scanned" classes. @@ -116,11 +125,11 @@ public class InspectionUtil { * @return Found implementations */ public static List<Class<?>> cachedFindAllImplementations(Class<?> c) { - if(c == null) { + if (c == null) { return Collections.emptyList(); } List<Class<?>> res = CLASS_CACHE.get(c); - if(res == null) { + if (res == null) { res = findAllImplementations(c, false); CLASS_CACHE.put(c, res); } @@ -137,38 +146,33 @@ public class InspectionUtil { * @return List of found classes. */ public static List<Class<?>> findAllImplementations(Class<?> c, boolean everything) { - ArrayList<Class<?>> list = new ArrayList<Class<?>>(); + ArrayList<Class<?>> list = new ArrayList<>(); // Add all from service files (i.e. jars) { Iterator<Class<?>> iter = new ELKIServiceLoader(c); - while(iter.hasNext()) { + while (iter.hasNext()) { list.add(iter.next()); } } - if(!InspectionUtil.NONSTATIC_CLASSPATH) { - if(list.size() == 0) { + if (!InspectionUtil.NONSTATIC_CLASSPATH) { + if (list.size() == 0) { LOG.warning("No implementations for " + c.getName() + " were found using index files."); } - } - else { + } else { // Duplicate checking - THashSet<Class<?>> dupes = new THashSet<Class<?>>(list); - // Scan for additional ones in class path - Iterator<Class<?>> iter; - // If possible, reuse an existing scan result - if(InspectionUtilFrequentlyScanned.class.isAssignableFrom(c)) { - iter = getFrequentScan(); - } - else { - iter = slowScan(c).iterator(); + THashSet<Class<?>> dupes = new THashSet<>(list); + // Build cache on first use: + if (MASTER_CACHE == null) { + MASTER_CACHE = slowScan(); } - while(iter.hasNext()) { + Iterator<Class<?>> iter = MASTER_CACHE.iterator(); + while (iter.hasNext()) { Class<?> cls = iter.next(); // skip abstract / private classes. - if(!everything && (Modifier.isInterface(cls.getModifiers()) || Modifier.isAbstract(cls.getModifiers()) || Modifier.isPrivate(cls.getModifiers()))) { + if (!everything && (Modifier.isInterface(cls.getModifiers()) || Modifier.isAbstract(cls.getModifiers()) || Modifier.isPrivate(cls.getModifiers()))) { continue; } - if(c.isAssignableFrom(cls) && !dupes.contains(cls)) { + if (c.isAssignableFrom(cls) && !dupes.contains(cls)) { list.add(cls); dupes.add(cls); } @@ -178,64 +182,96 @@ public class InspectionUtil { } /** - * Get (or create) the result of a scan for any "frequent scanned" class. + * Find an implementation of the given interface / super class, given a + * relative class name or alias name. * - * @return Scan result + * @param restrictionClass Restriction class + * @param value Class name, relative class name, or nickname. + * @return Class found or {@code null} */ - private static Iterator<Class<?>> getFrequentScan() { - if(MASTER_CACHE == null) { - MASTER_CACHE = slowScan(InspectionUtilFrequentlyScanned.class); + @SuppressWarnings("unchecked") + public static <C> Class<? extends C> findImplementation(Class<? super C> restrictionClass, String value) { + // Try exact class factory first. + try { + return (Class<? extends C>) CLASSLOADER.loadClass(value + FACTORY_POSTFIX); + } catch (ClassNotFoundException e) { + // Ignore, retry + } + try { + return (Class<? extends C>) CLASSLOADER.loadClass(value); + } catch (ClassNotFoundException e) { + // Ignore, retry + } + final String completedName = restrictionClass.getPackage().getName() + "." + value; + // Try factory for guessed name next + try { + return (Class<? extends C>) CLASSLOADER.loadClass(completedName + FACTORY_POSTFIX); + } catch (ClassNotFoundException e) { + // Ignore, retry + } + // Last try: guessed name prefix only + try { + return (Class<? extends C>) CLASSLOADER.loadClass(completedName); + } catch (ClassNotFoundException e) { + // Ignore, retry + } + // Try aliases: + for (Class<?> c : InspectionUtil.cachedFindAllImplementations(restrictionClass)) { + if (c.isAnnotationPresent(Alias.class)) { + Alias aliases = c.getAnnotation(Alias.class); + for (String alias : aliases.value()) { + if (alias.equalsIgnoreCase(value) || alias.equalsIgnoreCase(completedName)) { + return (Class<? extends C>) c; + } + } + } } - return MASTER_CACHE.iterator(); + return null; } /** * Perform a full (slow) scan for classes. * - * @param cond Class to include * @return List with the scan result */ - private static List<Class<?>> slowScan(Class<?> cond) { - ArrayList<Class<?>> res = new ArrayList<Class<?>>(); + private static List<Class<?>> slowScan() { + ArrayList<Class<?>> res = new ArrayList<>(); try { - ClassLoader cl = ClassLoader.getSystemClassLoader(); - Enumeration<URL> cps = cl.getResources(""); - while(cps.hasMoreElements()) { + Enumeration<URL> cps = CLASSLOADER.getResources(""); + while (cps.hasMoreElements()) { URL u = cps.nextElement(); // Scan file sources only. - if("file".equals(u.getProtocol())) { - Iterator<String> it = new DirClassIterator(new File(u.getFile()), DEFAULT_IGNORES); - while(it.hasNext()) { + if ("file".equals(u.getProtocol())) { + File path; + try { + path = new File(u.toURI()); + } catch (URISyntaxException e) { + LOG.exception("Error in classpath: " + u, e); + continue; + } + Iterator<String> it = new DirClassIterator(path, DEFAULT_IGNORES); + while (it.hasNext()) { String classname = it.next(); try { - Class<?> cls = cl.loadClass(classname); + Class<?> cls = CLASSLOADER.loadClass(classname); // skip classes where we can't get a full name. - if(cls.getCanonicalName() == null) { - continue; - } - // Implements the right interface? - if(cond != null && !cond.isAssignableFrom(cls)) { + if (cls.getCanonicalName() == null) { continue; } res.add(cls); - } - catch(ClassNotFoundException e) { + } catch (ClassNotFoundException e) { continue; - } - catch(NoClassDefFoundError e) { + } catch (NoClassDefFoundError e) { continue; - } - catch(Exception e) { + } catch (Exception e) { continue; - } - catch(Error e) { + } catch (Error e) { continue; } } } } - } - catch(IOException e) { + } catch (IOException e) { LOG.exception(e); } Collections.sort(res, new ClassSorter()); @@ -243,86 +279,6 @@ public class InspectionUtil { } /** - * Class to iterate over a Jar file. - * - * Note: this is currently unused, as we now require all jar files to include - * an index in the form of service-style files in META-INF/elki/ - * - * @author Erich Schubert - * - * @apiviz.exclude - */ - static class JarClassIterator implements Iterator<String> { - private Enumeration<JarEntry> jarentries; - - private String ne; - - private String[] ignorepackages; - - /** - * Constructor from Jar file. - * - * @param path Jar file entries to iterate over. - */ - public JarClassIterator(String path, String[] ignorepackages) { - this.ignorepackages = ignorepackages; - try { - JarFile jf = new JarFile(path); - this.jarentries = jf.entries(); - this.ne = findNext(); - } - catch(IOException e) { - LoggingUtil.exception("Error opening jar file: " + path, e); - this.jarentries = null; - this.ne = null; - } - } - - @Override - public boolean hasNext() { - // Do we have a next entry? - return (ne != null); - } - - /** - * Find the next entry, since we need to skip some jar file entries. - * - * @return next entry or null - */ - private String findNext() { - nextfile: while(jarentries.hasMoreElements()) { - JarEntry je = jarentries.nextElement(); - String name = je.getName(); - if(name.endsWith(".class")) { - String classname = name.substring(0, name.length() - ".class".length()).replace('/', '.'); - for(String pkg : ignorepackages) { - if(classname.startsWith(pkg)) { - continue nextfile; - } - } - if(classname.endsWith(ClassParameter.FACTORY_POSTFIX) || !classname.contains("$")) { - return classname.replace('/', '.'); - } - } - } - return null; - } - - @Override - public String next() { - // Return the previously stored entry. - String ret = ne; - ne = findNext(); - return ret; - } - - @Override - public void remove() { - throw new UnsupportedOperationException(); - } - } - - /** * Class to iterate over a directory tree. * * @author Erich Schubert @@ -332,15 +288,15 @@ public class InspectionUtil { static class DirClassIterator implements Iterator<String> { private static final String CLASS_EXT = ".class"; - private static final String FACTORY_FILE_EXT = ClassParameter.FACTORY_POSTFIX + CLASS_EXT; + private static final String FACTORY_FILE_EXT = FACTORY_POSTFIX + CLASS_EXT; private static final int CLASS_EXT_LENGTH = CLASS_EXT.length(); private String prefix; - private ArrayList<String> files = new ArrayList<String>(100); + private ArrayList<String> files = new ArrayList<>(100); - private ArrayList<Pair<File, String>> folders = new ArrayList<Pair<File, String>>(100); + private ArrayList<Pair<File, String>> folders = new ArrayList<>(100); private String[] ignorepackages; @@ -352,16 +308,16 @@ public class InspectionUtil { public DirClassIterator(File path, String[] ignorepackages) { this.ignorepackages = ignorepackages; this.prefix = path.getAbsolutePath(); - if(prefix.charAt(prefix.length() - 1) != File.separatorChar) { + if (prefix.charAt(prefix.length() - 1) != File.separatorChar) { prefix = prefix + File.separatorChar; } - this.folders.add(new Pair<File, String>(path, "")); + this.folders.add(new Pair<>(path, "")); } @Override public boolean hasNext() { - if(files.size() == 0) { + if (files.size() == 0) { findNext(); } return (files.size() > 0); @@ -371,19 +327,19 @@ public class InspectionUtil { * Find the next entry, since we need to skip some directories. */ private void findNext() { - while(folders.size() > 0) { + while (folders.size() > 0) { Pair<File, String> pair = folders.remove(folders.size() - 1); // recurse into directories - if(pair.first.isDirectory()) { - nextfile: for(String localname : pair.first.list()) { + if (pair.first.isDirectory()) { + nextfile: for (String localname : pair.first.list()) { // Ignore unix-hidden files/dirs - if(localname.charAt(0) == '.') { + if (localname.charAt(0) == '.') { continue; } // Classes - if(localname.endsWith(CLASS_EXT)) { - if(localname.indexOf('$') >= 0) { - if(!localname.endsWith(FACTORY_FILE_EXT)) { + if (localname.endsWith(CLASS_EXT)) { + if (localname.indexOf('$') >= 0) { + if (!localname.endsWith(FACTORY_FILE_EXT)) { continue; } } @@ -392,14 +348,14 @@ public class InspectionUtil { } // Recurse into directories File newf = new File(pair.first, localname); - if(newf.isDirectory()) { + if (newf.isDirectory()) { String newpref = pair.second + localname + '.'; - for(String ignore : ignorepackages) { - if(ignore.equals(newpref)) { + for (String ignore : ignorepackages) { + if (ignore.equals(newpref)) { continue nextfile; } } - folders.add(new Pair<File, String>(newf, newpref)); + folders.add(new Pair<>(newf, newpref)); } } } @@ -408,10 +364,10 @@ public class InspectionUtil { @Override public String next() { - if(files.size() == 0) { + if (files.size() == 0) { findNext(); } - if(files.size() > 0) { + if (files.size() > 0) { return files.remove(files.size() - 1); } return null; @@ -433,11 +389,19 @@ public class InspectionUtil { public static class ClassSorter implements Comparator<Class<?>> { @Override public int compare(Class<?> o1, Class<?> o2) { - int pkgcmp = o1.getPackage().getName().compareTo(o2.getPackage().getName()); - if(pkgcmp != 0) { + Package p1 = o1.getPackage(); + Package p2 = o2.getPackage(); + if (p1 == null) { + return -1; + } + if (p2 == null) { + return 1; + } + int pkgcmp = p1.getName().compareTo(p2.getName()); + if (pkgcmp != 0) { return pkgcmp; } return o1.getCanonicalName().compareTo(o2.getCanonicalName()); } } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/QueryStatistic.java b/src/de/lmu/ifi/dbs/elki/utilities/QueryStatistic.java deleted file mode 100644 index 91acad87..00000000 --- a/src/de/lmu/ifi/dbs/elki/utilities/QueryStatistic.java +++ /dev/null @@ -1,98 +0,0 @@ -package de.lmu.ifi.dbs.elki.utilities; - -/* - This file is part of ELKI: - Environment for Developing KDD-Applications Supported by Index-Structures - - Copyright (C) 2012 - Ludwig-Maximilians-Universität München - Lehr- und Forschungseinheit für Datenbanksysteme - ELKI Development Team - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - - -/** - * Provides some statistics about queries - * using a filter-refinement architecture. - * - * @author Elke Achtert - */ -public class QueryStatistic { - /** - * The number of candidate objects which need a refinement. - */ - private int candidates; - - /** - * The number of true hits which do not need a refinement. - */ - private int trueHits; - - /** - * The overall number of the result objects after refinement - * plus the true hits. - */ - private int results; - - /** - * Clears the values of this statistic. - */ - public void clear() { - this.candidates = 0; - this.trueHits = 0; - this.results = 0; - } - - /** - * Adds the specified number to the number of the result objects. - * - * @param results the number of the result objects to be added - */ - public void addResults(int results) { - this.results += results; - } - - /** - * Adds the specified number to the number of the candidate objects. - * - * @param candidates the number of the candidate objects to be added - */ - public void addCandidates(int candidates) { - this.candidates += candidates; - } - - /** - * Adds the specified number to the number of the true hits. - * - * @param trueHits the number of the true hits to be added - */ - public void addTrueHits(int trueHits) { - this.trueHits += trueHits; - } - - /** - * Returns the number of candidate objects and the number of the result objects after refinement. - * - * @return a string representation of this query statistic - */ - @Override - public String toString() { - return - "# candidates = " + candidates + - "\n# true hits = " + trueHits + - "\n# results = " + results; - } -} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/RandomFactory.java b/src/de/lmu/ifi/dbs/elki/utilities/RandomFactory.java index 65a92d33..9b870acb 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/RandomFactory.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/RandomFactory.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/Util.java b/src/de/lmu/ifi/dbs/elki/utilities/Util.java index 42144958..439ef171 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/Util.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/Util.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,12 +23,10 @@ package de.lmu.ifi.dbs.elki.utilities; along with this program. If not, see <http://www.gnu.org/licenses/>. */ - import java.util.BitSet; import java.util.Comparator; import java.util.Random; - /** * This class collects various static helper methods. * @@ -39,6 +37,11 @@ import java.util.Random; */ public final class Util { /** + * Prime number used in hash code computation. + */ + private static final long HASHPRIME = 2654435761L; + + /** * Fake constructor: do not instantiate. */ private Util() { @@ -77,21 +80,55 @@ public final class Util { /** * Mix multiple hashcodes into one. * + * @param hash Single Hashcodes to "mix" + * @return Original hash code + */ + @Deprecated + public static int mixHashCodes(int hash) { + return hash; + } + + /** + * Mix multiple hashcodes into one. + * + * @param hash1 First hashcode to mix + * @param hash2 Second hashcode to mix + * @return Mixed hash code + */ + public static int mixHashCodes(int hash1, int hash2) { + return (int) (hash1 * HASHPRIME + hash2); + } + + /** + * Mix multiple hashcodes into one. + * + * @param hash1 First hashcode to mix + * @param hash2 Second hashcode to mix + * @param hash3 Third hashcode to mix + * @return Mixed hash code + */ + public static int mixHashCodes(int hash1, int hash2, int hash3) { + long result = hash1 * HASHPRIME + hash2; + return (int) (result * HASHPRIME + hash3); + } + + /** + * Mix multiple hashcodes into one. + * * @param hash Hashcodes to mix * @return Mixed hash code */ public static int mixHashCodes(int... hash) { - final long prime = 2654435761L; if (hash.length == 0) { return 0; } long result = hash[0]; for (int i = 1; i < hash.length; i++) { - result = result * prime + hash[i]; + result = result * HASHPRIME + hash[i]; } return (int) result; } - + /** * Static instance. */ diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/AnyMap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/AnyMap.java deleted file mode 100644 index 763ce105..00000000 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/AnyMap.java +++ /dev/null @@ -1,94 +0,0 @@ -package de.lmu.ifi.dbs.elki.utilities.datastructures; - -/* - This file is part of ELKI: - Environment for Developing KDD-Applications Supported by Index-Structures - - Copyright (C) 2012 - Ludwig-Maximilians-Universität München - Lehr- und Forschungseinheit für Datenbanksysteme - ELKI Development Team - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -import java.util.HashMap; - -/** - * Associative storage based on a {@link HashMap} for multiple object types that - * offers a type checked {@link #get(Object, Class)} method. - * - * @author Erich Schubert - * - * @param <K> Key class type - */ -public class AnyMap<K> extends HashMap<K, Object> { - /** - * Serial version. - */ - private static final long serialVersionUID = 1L; - - /** - * Constructor - */ - public AnyMap() { - super(); - } - - /** - * Type checked get method - * - * @param <T> Return type - * @param key Key - * @param restriction restriction class - * @return Object that is guaranteed to be of class restriction or null - */ - public <T> T get(K key, Class<T> restriction) { - Object o = super.get(key); - if(o == null) { - return null; - } - try { - return restriction.cast(o); - } - catch(ClassCastException e) { - return null; - } - } - - /** - * (Largely) type checked get method for use with generic types - * - * @param <T> Return type - * @param key Key - * @param restriction restriction class - * @return Object that is guaranteed to be of class restriction or null - */ - @SuppressWarnings("unchecked") - public <T> T getGenerics(K key, Class<?> restriction) { - return (T) get(key, restriction); - } - - /** - * Depreciate the use of the untyped get method. - * - * @deprecated use {@link #get(Object, Class)} or - * {@link #getGenerics(Object, Class)} instead, for type safety! - */ - @Override - @Deprecated - public Object get(Object key) { - return super.get(key); - } -}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/HashMapList.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/HashMapList.java deleted file mode 100644 index 26fa4d19..00000000 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/HashMapList.java +++ /dev/null @@ -1,124 +0,0 @@ -package de.lmu.ifi.dbs.elki.utilities.datastructures; - -/* - This file is part of ELKI: - Environment for Developing KDD-Applications Supported by Index-Structures - - Copyright (C) 2012 - Ludwig-Maximilians-Universität München - Lehr- und Forschungseinheit für Datenbanksysteme - ELKI Development Team - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; - -/** - * Multi-Associative container, that stores a list of values for a particular key. - * - * @author Erich Schubert - * - * @apiviz.has List oneway - - contains - * - * @param <K> Key type - * @param <V> Value type - */ -// TODO: use MultiValueMap from apache collections instead? -public class HashMapList<K, V> extends HashMap<K, List<V>> { - /** - * Serial version - */ - private static final long serialVersionUID = 3883242025598456055L; - - /** - * Constructor. - */ - public HashMapList() { - super(); - } - - /** - * Constructor with initial capacity (of the hash) - * - * @param initialCapacity initial capacity - */ - public HashMapList(int initialCapacity) { - super(initialCapacity); - } - - /** - * Add a single value to the given key. - * - * @param key Key - * @param value Additional Value - */ - public synchronized void add(K key, V value) { - List<V> list = super.get(key); - if (list == null) { - list = new ArrayList<V>(1); - super.put(key, list); - } - list.add(value); - } - - /** - * Check that there is at least one value for the key. - */ - @Override - public boolean containsKey(Object key) { - List<V> list = super.get(key); - if (list == null) { - return false; - } - return list.size() > 0; - } - - /** - * Remove a single value from the map. - * - * @param key Key to remove - * @param value Value to remove. - * @return <tt>true</tt> if this list contained the specified element - */ - public synchronized boolean remove(K key, V value) { - List<V> list = super.get(key); - if (list == null) { - return false; - } - boolean success = list.remove(value); - // remove empty lists. - if (list.size() == 0) { - super.remove(key); - } - return success; - } - - /** - * Test if a given value is already present for the key. - * - * @param key Key - * @param value Value - * @return <tt>true</tt> if the keys list contains the specified element - */ - public boolean contains(K key, V value) { - List<V> list = super.get(key); - if (list == null) { - return false; - } - return list.contains(value); - } -} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/MaskedArrayList.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/MaskedArrayList.java deleted file mode 100644 index c24519d1..00000000 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/MaskedArrayList.java +++ /dev/null @@ -1,174 +0,0 @@ -package de.lmu.ifi.dbs.elki.utilities.datastructures; - -/* - This file is part of ELKI: - Environment for Developing KDD-Applications Supported by Index-Structures - - Copyright (C) 2012 - Ludwig-Maximilians-Universität München - Lehr- und Forschungseinheit für Datenbanksysteme - ELKI Development Team - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -import java.util.AbstractCollection; -import java.util.ArrayList; -import java.util.BitSet; -import java.util.Collection; -import java.util.Iterator; - -/** - * This class is a virtual collection based on masking an array list using a bit - * mask. - * - * @author Erich Schubert - * - * @apiviz.stereotype decorator - * @apiviz.composedOf java.util.ArrayList - * @apiviz.composedOf java.util.BitSet - * - * @param <T> Object type - */ -public class MaskedArrayList<T> extends AbstractCollection<T> implements Collection<T> { - /** - * Data storage - */ - protected ArrayList<T> data; - - /** - * The bitmask used for masking - */ - protected BitSet bits; - - /** - * Flag whether to iterator over set or unset values. - */ - protected boolean inverse = false; - - /** - * Constructor. - * - * @param data Data - * @param bits Bitset to use as mask - * @param inverse Flag to inverse the masking rule - */ - public MaskedArrayList(ArrayList<T> data, BitSet bits, boolean inverse) { - super(); - this.data = data; - this.bits = bits; - this.inverse = inverse; - } - - @Override - public boolean add(T e) { - throw new UnsupportedOperationException(); - } - - @Override - public Iterator<T> iterator() { - if(inverse) { - return new InvItr(); - } - else { - return new Itr(); - } - } - - @Override - public int size() { - if(inverse) { - return data.size() - bits.cardinality(); - } - else { - return bits.cardinality(); - } - } - - /** - * Iterator over set bits - * - * @author Erich Schubert - * - * @apiviz.exclude - */ - protected class Itr implements Iterator<T> { - /** - * Next position. - */ - private int pos; - - /** - * Constructor - */ - protected Itr() { - this.pos = bits.nextSetBit(0); - } - - @Override - public boolean hasNext() { - return (pos >= 0) && (pos < data.size()); - } - - @Override - public T next() { - T cur = data.get(pos); - pos = bits.nextSetBit(pos + 1); - return cur; - } - - @Override - public void remove() { - throw new UnsupportedOperationException(); - } - } - - /** - * Iterator over unset elements. - * - * @author Erich Schubert - * - * @apiviz.exclude - */ - protected class InvItr implements Iterator<T> { - /** - * Next unset position. - */ - private int pos; - - /** - * Constructor - */ - protected InvItr() { - this.pos = bits.nextClearBit(0); - } - - @Override - public boolean hasNext() { - return (pos >= 0) && (pos < data.size()); - } - - @Override - public T next() { - T cur = data.get(pos); - pos = bits.nextClearBit(pos + 1); - return cur; - } - - @Override - public void remove() { - throw new UnsupportedOperationException(); - } - } -}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/QuickSelect.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/QuickSelect.java index bfa7950d..3746ff87 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/QuickSelect.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/QuickSelect.java @@ -1,18 +1,10 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures; -import java.util.Comparator; -import java.util.List; - -import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs; -import de.lmu.ifi.dbs.elki.database.ids.DBID; -import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter; -import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; - /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -31,6 +23,14 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; along with this program. If not, see <http://www.gnu.org/licenses/>. */ +import java.util.Comparator; +import java.util.List; + +import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs; +import de.lmu.ifi.dbs.elki.database.ids.DBID; +import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter; +import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; + /** * QuickSelect computes ("selects") the element at a given rank and can be used * to compute Medians and arbitrary quantiles by computing the appropriate rank. @@ -43,13 +43,297 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; * * @apiviz.uses ArrayModifiableDBIDs * @apiviz.uses List - * @apiviz.uses Comparator + * @apiviz.uses Adapter */ public class QuickSelect { /** * For small arrays, use a simpler method: */ - private static final int SMALL = 10; + private static final int SMALL = 47; + + /** + * Choose the best pivot for the given rank. + * + * @param rank Rank + * @param m1 Pivot candidate + * @param m2 Pivot candidate + * @param m3 Pivot candidate + * @param m4 Pivot candidate + * @param m5 Pivot candidate + * @return Best pivot candidate + */ + private static final int bestPivot(int rank, int m1, int m2, int m3, int m4, int m5) { + if (rank < m1) { + return m1; + } + if (rank > m5) { + return m5; + } + if (rank < m2) { + return m2; + } + if (rank > m4) { + return m4; + } + return m3; + } + + /** + * QuickSelect is essentially quicksort, except that we only "sort" that half + * of the array that we are interested in. + * + * @param data Data to process + * @param start Interval start + * @param end Interval end (exclusive) + * @param rank rank position we are interested in (starting at 0) + */ + public static <T> void quickSelect(T data, Adapter<T> adapter, int start, int end, int rank) { + while (true) { + // Optimization for small arrays + // This also ensures a minimum size below + if (start + SMALL > end) { + insertionSort(data, adapter, start, end); + return; + } + + // Best of 5 pivot picking: + // Choose pivots by looking at five candidates. + final int len = end - start; + final int seventh = (len >> 3) + (len >> 6) + 1; + final int m3 = (start + end) >> 1; // middle + final int m2 = m3 - seventh; + final int m1 = m2 - seventh; + final int m4 = m3 + seventh; + final int m5 = m4 + seventh; + + // Explicit (and optimal) sorting network for 5 elements + // See Knuth for details. + if (adapter.compareGreater(data, m1, m2)) { + adapter.swap(data, m1, m2); + } + if (adapter.compareGreater(data, m1, m3)) { + adapter.swap(data, m1, m3); + } + if (adapter.compareGreater(data, m2, m3)) { + adapter.swap(data, m2, m3); + } + if (adapter.compareGreater(data, m4, m5)) { + adapter.swap(data, m4, m5); + } + if (adapter.compareGreater(data, m1, m4)) { + adapter.swap(data, m1, m4); + } + if (adapter.compareGreater(data, m3, m4)) { + adapter.swap(data, m3, m4); + } + if (adapter.compareGreater(data, m2, m5)) { + adapter.swap(data, m2, m5); + } + if (adapter.compareGreater(data, m2, m3)) { + adapter.swap(data, m2, m3); + } + if (adapter.compareGreater(data, m4, m5)) { + adapter.swap(data, m4, m5); + } + + int best = bestPivot(rank, m1, m2, m3, m4, m5); + // final double pivot = data[best]; + // Move middle element out of the way. + adapter.swap(data, best, end - 1); + + // Begin partitioning + int i = start, j = end - 2; + // This is classic quicksort stuff + while (true) { + while (i <= j && adapter.compareGreater(data, end - 1, i)) { + i++; + } + while (j >= i && !adapter.compareGreater(data, end - 1, j)) { + j--; + } + if (i >= j) { + break; + } + adapter.swap(data, i, j); + } + + // Move pivot (former middle element) back into the appropriate place + adapter.swap(data, i, end - 1); + + // In contrast to quicksort, we only need to recurse into the half we are + // interested in. Instead of recursion we now use iteration. + if (rank < i) { + end = i; + } else if (rank > i) { + start = i + 1; + } else { + break; + } + } // Loop until rank==i + } + + /** + * Sort a small array using repetitive insertion sort. + * + * @param data Data to sort + * @param start Interval start + * @param end Interval end + */ + private static <T> void insertionSort(T data, Adapter<T> adapter, int start, int end) { + for (int i = start + 1; i < end; i++) { + for (int j = i; j > start && adapter.compareGreater(data, j - 1, j); j--) { + adapter.swap(data, j, j - 1); + } + } + } + + /** + * Adapter class to apply QuickSelect to arbitrary data structures. + * + * @author Erich Schubert + * + * @param <T> Data structure type + */ + public static interface Adapter<T> { + /** + * Swap the two elements at positions i and j. + * + * @param data Data structure + * @param i Position i + * @param j Position j + */ + void swap(T data, int i, int j); + + /** + * Compare two elements. + * + * @param data Data structure + * @param i Position i + * @param j Position j + * @return {@code true} when the element at position i is greater than that + * at position j. + */ + boolean compareGreater(T data, int i, int j); + } + + /** + * Adapter for double arrays. + */ + public static Adapter<double[]> DOUBLE_ADAPTER = new Adapter<double[]>() { + @Override + public void swap(double[] data, int i, int j) { + double tmp = data[i]; + data[i] = data[j]; + data[j] = tmp; + } + + @Override + public boolean compareGreater(double[] data, int i, int j) { + return data[i] > data[j]; + } + }; + + /** + * Adapter for integer arrays. + */ + public static Adapter<int[]> INTEGER_ADAPTER = new Adapter<int[]>() { + @Override + public void swap(int[] data, int i, int j) { + int tmp = data[i]; + data[i] = data[j]; + data[j] = tmp; + } + + @Override + public boolean compareGreater(int[] data, int i, int j) { + return data[i] > data[j]; + } + }; + + /** + * Adapter for float arrays. + */ + public static Adapter<float[]> FLOAT_ADAPTER = new Adapter<float[]>() { + @Override + public void swap(float[] data, int i, int j) { + float tmp = data[i]; + data[i] = data[j]; + data[j] = tmp; + } + + @Override + public boolean compareGreater(float[] data, int i, int j) { + return data[i] > data[j]; + } + }; + + /** + * Adapter for short arrays. + */ + public static Adapter<short[]> SHORT_ADAPTER = new Adapter<short[]>() { + @Override + public void swap(short[] data, int i, int j) { + short tmp = data[i]; + data[i] = data[j]; + data[j] = tmp; + } + + @Override + public boolean compareGreater(short[] data, int i, int j) { + return data[i] > data[j]; + } + }; + + /** + * Adapter for long arrays. + */ + public static Adapter<long[]> LONG_ADAPTER = new Adapter<long[]>() { + @Override + public void swap(long[] data, int i, int j) { + long tmp = data[i]; + data[i] = data[j]; + data[j] = tmp; + } + + @Override + public boolean compareGreater(long[] data, int i, int j) { + return data[i] > data[j]; + } + }; + + /** + * Adapter for byte arrays. + */ + public static Adapter<byte[]> BYTE_ADAPTER = new Adapter<byte[]>() { + @Override + public void swap(byte[] data, int i, int j) { + byte tmp = data[i]; + data[i] = data[j]; + data[j] = tmp; + } + + @Override + public boolean compareGreater(byte[] data, int i, int j) { + return data[i] > data[j]; + } + }; + + /** + * Adapter for char arrays. + */ + public static Adapter<char[]> CHAR_ADAPTER = new Adapter<char[]>() { + @Override + public void swap(char[] data, int i, int j) { + char tmp = data[i]; + data[i] = data[j]; + data[j] = tmp; + } + + @Override + public boolean compareGreater(char[] data, int i, int j) { + return data[i] > data[j]; + } + }; /** * QuickSelect is essentially quicksort, except that we only "sort" that half @@ -94,11 +378,10 @@ public class QuickSelect { // Integer division is "floor" since we are non-negative. final int left = begin + ((length - 1) >> 1); quickSelect(data, begin, end, left); - if(length % 2 == 1) { + if (length % 2 == 1) { return data[left]; - } - else { - quickSelect(data, begin, end, left + 1); + } else { + quickSelect(data, left + 1, end, left + 1); return data[left] + .5 * (data[left + 1] - data[left]); } } @@ -136,11 +419,10 @@ public class QuickSelect { final double err = dleft - ileft; quickSelect(data, begin, end, ileft); - if(err <= Double.MIN_NORMAL) { + if (err <= Double.MIN_NORMAL) { return data[ileft]; - } - else { - quickSelect(data, begin, end, ileft + 1); + } else { + quickSelect(data, ileft + 1, end, ileft + 1); // Mix: double mix = data[ileft] + (data[ileft + 1] - data[ileft]) * err; return mix; @@ -155,66 +437,94 @@ public class QuickSelect { * @param start Interval start * @param end Interval end (exclusive) * @param rank rank position we are interested in (starting at 0) + * @return Element at the given rank (starting at 0). */ - public static void quickSelect(double[] data, int start, int end, int rank) { - while(true) { + public static double quickSelect(double[] data, int start, int end, int rank) { + while (true) { // Optimization for small arrays // This also ensures a minimum size below - if(start + SMALL > end) { + if (start + SMALL > end) { insertionSort(data, start, end); - return; + return data[rank]; } - // Pick pivot from three candidates: start, middle, end - // Since we compare them, we can also just "bubble sort" them. - final int middle = (start + end) >> 1; - if(data[start] > data[middle]) { - swap(data, start, middle); + // Best of 5 pivot picking: + // Choose pivots by looking at five candidates. + final int len = end - start; + final int seventh = (len >> 3) + (len >> 6) + 1; + final int m3 = (start + end) >> 1; // middle + final int m2 = m3 - seventh; + final int m1 = m2 - seventh; + final int m4 = m3 + seventh; + final int m5 = m4 + seventh; + + // Explicit (and optimal) sorting network for 5 elements + // See Knuth for details. + if (data[m1] > data[m2]) { + swap(data, m1, m2); + } + if (data[m1] > data[m3]) { + swap(data, m1, m3); + } + if (data[m2] > data[m3]) { + swap(data, m2, m3); + } + if (data[m4] > data[m5]) { + swap(data, m4, m5); + } + if (data[m1] > data[m4]) { + swap(data, m1, m4); } - if(data[start] > data[end - 1]) { - swap(data, start, end - 1); + if (data[m3] > data[m4]) { + swap(data, m3, m4); } - if(data[middle] > data[end - 1]) { - swap(data, middle, end - 1); + if (data[m2] > data[m5]) { + swap(data, m2, m5); + } + if (data[m2] > data[m3]) { + swap(data, m2, m3); + } + if (data[m4] > data[m5]) { + swap(data, m4, m5); } - // TODO: use more candidates for larger arrays? - final double pivot = data[middle]; - // Move middle element out of the way, just before end - // (Since we already know that "end" is bigger) - swap(data, middle, end - 2); + int best = bestPivot(rank, m1, m2, m3, m4, m5); + final double pivot = data[best]; + // Move middle element out of the way. + swap(data, best, end - 1); // Begin partitioning - int i = start + 1, j = end - 3; + int i = start, j = end - 2; // This is classic quicksort stuff - while(true) { - while(data[i] <= pivot && i <= j) { + while (true) { + while (i <= j && data[i] <= pivot) { i++; } - while(data[j] >= pivot && j >= i) { + while (j >= i && data[j] >= pivot) { j--; } - if(i >= j) { + if (i >= j) { break; } swap(data, i, j); + i++; + j--; } // Move pivot (former middle element) back into the appropriate place - swap(data, i, end - 2); + swap(data, i, end - 1); // In contrast to quicksort, we only need to recurse into the half we are // interested in. Instead of recursion we now use iteration. - if(rank < i) { + if (rank < i) { end = i; - } - else if(rank > i) { + } else if (rank > i) { start = i + 1; - } - else { + } else { break; } } // Loop until rank==i + return data[rank]; } /** @@ -225,8 +535,8 @@ public class QuickSelect { * @param end Interval end */ private static void insertionSort(double[] data, int start, int end) { - for(int i = start + 1; i < end; i++) { - for(int j = i; j > start && data[j - 1] > data[j]; j--) { + for (int i = start + 1; i < end; i++) { + for (int j = i; j > start && data[j - 1] > data[j]; j--) { swap(data, j, j - 1); } } @@ -345,61 +655,85 @@ public class QuickSelect { * @param rank rank position we are interested in (starting at 0) */ public static <T extends Comparable<? super T>> void quickSelect(T[] data, int start, int end, int rank) { - while(true) { + while (true) { // Optimization for small arrays // This also ensures a minimum size below - if(start + SMALL > end) { + if (start + SMALL > end) { insertionSort(data, start, end); return; } - // Pick pivot from three candidates: start, middle, end - // Since we compare them, we can also just "bubble sort" them. - final int middle = (start + end) >> 1; - if(data[start].compareTo(data[middle]) > 0) { - swap(data, start, middle); + // Best of 5 pivot picking: + // Choose pivots by looking at five candidates. + final int len = end - start; + final int seventh = (len >> 3) + (len >> 6) + 1; + final int m3 = (start + end) >> 1; // middle + final int m2 = m3 - seventh; + final int m1 = m2 - seventh; + final int m4 = m3 + seventh; + final int m5 = m4 + seventh; + + // Explicit (and optimal) sorting network for 5 elements + // See Knuth for details. + if (data[m1].compareTo(data[m2]) > 0) { + swap(data, m1, m2); + } + if (data[m1].compareTo(data[m3]) > 0) { + swap(data, m1, m3); + } + if (data[m2].compareTo(data[m3]) > 0) { + swap(data, m2, m3); + } + if (data[m4].compareTo(data[m5]) > 0) { + swap(data, m4, m5); + } + if (data[m1].compareTo(data[m4]) > 0) { + swap(data, m1, m4); } - if(data[start].compareTo(data[end - 1]) > 0) { - swap(data, start, end - 1); + if (data[m3].compareTo(data[m4]) > 0) { + swap(data, m3, m4); } - if(data[middle].compareTo(data[end - 1]) > 0) { - swap(data, middle, end - 1); + if (data[m2].compareTo(data[m5]) > 0) { + swap(data, m2, m5); + } + if (data[m2].compareTo(data[m3]) > 0) { + swap(data, m2, m3); + } + if (data[m4].compareTo(data[m5]) > 0) { + swap(data, m4, m5); } - // TODO: use more candidates for larger arrays? - final T pivot = data[middle]; - // Move middle element out of the way, just before end - // (Since we already know that "end" is bigger) - swap(data, middle, end - 2); + int best = bestPivot(rank, m1, m2, m3, m4, m5); + final T pivot = data[best]; + // Move middle element out of the way. + swap(data, best, end - 1); // Begin partitioning - int i = start + 1, j = end - 3; + int i = start, j = end - 2; // This is classic quicksort stuff - while(true) { - while(data[i].compareTo(pivot) <= 0 && i <= j) { + while (true) { + while (i <= j && data[i].compareTo(pivot) <= 0) { i++; } - while(data[j].compareTo(pivot) >= 0 && j >= i) { + while (j >= i && data[j].compareTo(pivot) >= 0) { j--; } - if(i >= j) { + if (i >= j) { break; } swap(data, i, j); } // Move pivot (former middle element) back into the appropriate place - swap(data, i, end - 2); + swap(data, i, end - 1); // In contrast to quicksort, we only need to recurse into the half we are // interested in. Instead of recursion we now use iteration. - if(rank < i) { + if (rank < i) { end = i; - } - else if(rank > i) { + } else if (rank > i) { start = i + 1; - } - else { + } else { break; } } // Loop until rank==i @@ -414,8 +748,8 @@ public class QuickSelect { * @param end Interval end */ private static <T extends Comparable<? super T>> void insertionSort(T[] data, int start, int end) { - for(int i = start + 1; i < end; i++) { - for(int j = i; j > start && data[j - 1].compareTo(data[j]) > 0; j--) { + for (int i = start + 1; i < end; i++) { + for (int j = i; j > start && data[j - 1].compareTo(data[j]) > 0; j--) { swap(data, j, j - 1); } } @@ -536,61 +870,86 @@ public class QuickSelect { * @param rank rank position we are interested in (starting at 0) */ public static <T extends Comparable<? super T>> void quickSelect(List<? extends T> data, int start, int end, int rank) { - while(true) { + while (true) { // Optimization for small arrays // This also ensures a minimum size below - if(start + SMALL > end) { + if (start + SMALL > end) { insertionSort(data, start, end); return; } - // Pick pivot from three candidates: start, middle, end - // Since we compare them, we can also just "bubble sort" them. - final int middle = (start + end) >> 1; - if(data.get(start).compareTo(data.get(middle)) > 0) { - swap(data, start, middle); + // Best of 5 pivot picking: + // Choose pivots by looking at five candidates. + final int len = end - start; + final int seventh = (len >> 3) + (len >> 6) + 1; + final int m3 = (start + end) >> 1; // middle + final int m2 = m3 - seventh; + final int m1 = m2 - seventh; + final int m4 = m3 + seventh; + final int m5 = m4 + seventh; + + // Explicit (and optimal) sorting network for 5 elements + // See Knuth for details. + if (data.get(m1).compareTo(data.get(m2)) > 0) { + swap(data, m1, m2); } - if(data.get(start).compareTo(data.get(end - 1)) > 0) { - swap(data, start, end - 1); + if (data.get(m1).compareTo(data.get(m3)) > 0) { + swap(data, m1, m3); } - if(data.get(middle).compareTo(data.get(end - 1)) > 0) { - swap(data, middle, end - 1); + if (data.get(m2).compareTo(data.get(m3)) > 0) { + swap(data, m2, m3); + } + if (data.get(m4).compareTo(data.get(m5)) > 0) { + swap(data, m4, m5); + } + if (data.get(m1).compareTo(data.get(m4)) > 0) { + swap(data, m1, m4); + } + if (data.get(m3).compareTo(data.get(m4)) > 0) { + swap(data, m3, m4); + } + if (data.get(m2).compareTo(data.get(m5)) > 0) { + swap(data, m2, m5); + } + if (data.get(m2).compareTo(data.get(m3)) > 0) { + swap(data, m2, m3); + } + if (data.get(m4).compareTo(data.get(m5)) > 0) { + swap(data, m4, m5); } - // TODO: use more candidates for larger arrays? - final T pivot = data.get(middle); + int best = bestPivot(rank, m1, m2, m3, m4, m5); + final T pivot = data.get(best); // Move middle element out of the way, just before end // (Since we already know that "end" is bigger) - swap(data, middle, end - 2); + swap(data, best, end - 1); // Begin partitioning - int i = start + 1, j = end - 3; + int i = start, j = end - 2; // This is classic quicksort stuff - while(true) { - while(data.get(i).compareTo(pivot) <= 0 && i <= j) { + while (true) { + while (i <= j && data.get(i).compareTo(pivot) <= 0) { i++; } - while(data.get(j).compareTo(pivot) >= 0 && j >= i) { + while (j >= i && data.get(j).compareTo(pivot) >= 0) { j--; } - if(i >= j) { + if (i >= j) { break; } swap(data, i, j); } // Move pivot (former middle element) back into the appropriate place - swap(data, i, end - 2); + swap(data, i, end - 1); // In contrast to quicksort, we only need to recurse into the half we are // interested in. Instead of recursion we now use iteration. - if(rank < i) { + if (rank < i) { end = i; - } - else if(rank > i) { + } else if (rank > i) { start = i + 1; - } - else { + } else { break; } } // Loop until rank==i @@ -605,8 +964,8 @@ public class QuickSelect { * @param end Interval end */ private static <T extends Comparable<? super T>> void insertionSort(List<T> data, int start, int end) { - for(int i = start + 1; i < end; i++) { - for(int j = i; j > start && data.get(j - 1).compareTo(data.get(j)) > 0; j--) { + for (int i = start + 1; i < end; i++) { + for (int j = i; j > start && data.get(j - 1).compareTo(data.get(j)) > 0; j--) { swap(data, j, j - 1); } } @@ -731,61 +1090,86 @@ public class QuickSelect { * @param rank rank position we are interested in (starting at 0) */ public static <T> void quickSelect(List<? extends T> data, Comparator<? super T> comparator, int start, int end, int rank) { - while(true) { + while (true) { // Optimization for small arrays // This also ensures a minimum size below - if(start + SMALL > end) { + if (start + SMALL > end) { insertionSort(data, comparator, start, end); return; } - // Pick pivot from three candidates: start, middle, end - // Since we compare them, we can also just "bubble sort" them. - final int middle = (start + end) >> 1; - if(comparator.compare(data.get(start), data.get(middle)) > 0) { - swap(data, start, middle); + // Best of 5 pivot picking: + // Choose pivots by looking at five candidates. + final int len = end - start; + final int seventh = (len >> 3) + (len >> 6) + 1; + final int m3 = (start + end) >> 1; // middle + final int m2 = m3 - seventh; + final int m1 = m2 - seventh; + final int m4 = m3 + seventh; + final int m5 = m4 + seventh; + + // Explicit (and optimal) sorting network for 5 elements + // See Knuth for details. + if (comparator.compare(data.get(m1), data.get(m2)) > 0) { + swap(data, m1, m2); + } + if (comparator.compare(data.get(m1), data.get(m3)) > 0) { + swap(data, m1, m3); + } + if (comparator.compare(data.get(m2), data.get(m3)) > 0) { + swap(data, m2, m3); + } + if (comparator.compare(data.get(m4), data.get(m5)) > 0) { + swap(data, m4, m5); } - if(comparator.compare(data.get(start), data.get(end - 1)) > 0) { - swap(data, start, end - 1); + if (comparator.compare(data.get(m1), data.get(m4)) > 0) { + swap(data, m1, m4); } - if(comparator.compare(data.get(middle), data.get(end - 1)) > 0) { - swap(data, middle, end - 1); + if (comparator.compare(data.get(m3), data.get(m4)) > 0) { + swap(data, m3, m4); + } + if (comparator.compare(data.get(m2), data.get(m5)) > 0) { + swap(data, m2, m5); + } + if (comparator.compare(data.get(m2), data.get(m3)) > 0) { + swap(data, m2, m3); + } + if (comparator.compare(data.get(m4), data.get(m5)) > 0) { + swap(data, m4, m5); } - // TODO: use more candidates for larger arrays? - final T pivot = data.get(middle); + int best = bestPivot(rank, m1, m2, m3, m4, m5); + final T pivot = data.get(best); // Move middle element out of the way, just before end // (Since we already know that "end" is bigger) - swap(data, middle, end - 2); + swap(data, best, end - 1); // Begin partitioning - int i = start + 1, j = end - 3; + int i = start, j = end - 2; // This is classic quicksort stuff - while(true) { - while(comparator.compare(data.get(i), pivot) <= 0 && i <= j) { + while (true) { + while (i <= j && comparator.compare(data.get(i), pivot) <= 0) { i++; } - while(comparator.compare(data.get(j), pivot) >= 0 && j >= i) { + while (j >= i && comparator.compare(data.get(j), pivot) >= 0) { j--; } - if(i >= j) { + if (i >= j) { break; } swap(data, i, j); } // Move pivot (former middle element) back into the appropriate place - swap(data, i, end - 2); + swap(data, i, end - 1); // In contrast to quicksort, we only need to recurse into the half we are // interested in. Instead of recursion we now use iteration. - if(rank < i) { + if (rank < i) { end = i; - } - else if(rank > i) { + } else if (rank > i) { start = i + 1; - } - else { + } else { break; } } // Loop until rank==i @@ -800,8 +1184,8 @@ public class QuickSelect { * @param end Interval end */ private static <T> void insertionSort(List<T> data, Comparator<? super T> comparator, int start, int end) { - for(int i = start + 1; i < end; i++) { - for(int j = i; j > start && comparator.compare(data.get(j - 1), data.get(j)) > 0; j--) { + for (int i = start + 1; i < end; i++) { + for (int j = i; j > start && comparator.compare(data.get(j - 1), data.get(j)) > 0; j--) { swap(data, j, j - 1); } } @@ -882,7 +1266,7 @@ public class QuickSelect { * @param data Data to process * @param comparator Comparator to use * @param begin Begin of valid values - * @param end End of valid values (inclusive!) + * @param end End of valid values (exclusive) * @param quant Quantile to compute * @return Value at quantile */ @@ -904,93 +1288,132 @@ public class QuickSelect { * @param data Data to process * @param comparator Comparator to use * @param start Interval start - * @param end Interval end (inclusive) + * @param end Interval end (exclusive) * @param rank rank position we are interested in (starting at 0) */ public static void quickSelect(ArrayModifiableDBIDs data, Comparator<? super DBIDRef> comparator, int start, int end, int rank) { - while(true) { + DBIDArrayIter refi = data.iter(), refj = data.iter(), pivot = data.iter(); + while (true) { // Optimization for small arrays // This also ensures a minimum size below - if(start + SMALL > end) { - insertionSort(data, comparator, start, end); + if (start + SMALL > end) { + insertionSort(data, comparator, start, end, refi, refj); return; } - // Pick pivot from three candidates: start, middle, end - // Since we compare them, we can also just "bubble sort" them. - final int middle = (start + end) >> 1; - if(comparator.compare(data.get(start), data.get(middle)) > 0) { - data.swap(start, middle); + // Best of 5 pivot picking: + // Choose pivots by looking at five candidates. + final int len = end - start; + final int seventh = (len >> 3) + (len >> 6) + 1; + final int m3 = (start + end) >> 1; // middle + final int m2 = m3 - seventh; + final int m1 = m2 - seventh; + final int m4 = m3 + seventh; + final int m5 = m4 + seventh; + + // Explicit (and optimal) sorting network for 5 elements + // See Knuth for details. + if (compare(refi, m1, refj, m2, comparator) > 0) { + data.swap(m1, m2); + } + if (compare(refi, m1, refj, m3, comparator) > 0) { + data.swap(m1, m3); + } + if (compare(refi, m2, refj, m3, comparator) > 0) { + data.swap(m2, m3); + } + if (compare(refi, m4, refj, m5, comparator) > 0) { + data.swap(m4, m5); } - if(comparator.compare(data.get(start), data.get(end - 1)) > 0) { - data.swap(start, end - 1); + if (compare(refi, m1, refj, m4, comparator) > 0) { + data.swap(m1, m4); } - if(comparator.compare(data.get(middle), data.get(end - 1)) > 0) { - data.swap(middle, end - 1); + if (compare(refi, m3, refj, m4, comparator) > 0) { + data.swap(m3, m4); + } + if (compare(refi, m2, refj, m5, comparator) > 0) { + data.swap(m2, m5); + } + if (compare(refi, m2, refj, m3, comparator) > 0) { + data.swap(m2, m3); + } + if (compare(refi, m4, refj, m5, comparator) > 0) { + data.swap(m4, m5); } - // TODO: use more candidates for larger arrays? - final DBID pivot = data.get(middle); - // Move middle element out of the way, just before end - // (Since we already know that "end" is bigger) - data.swap(middle, end - 2); + int best = bestPivot(rank, m1, m2, m3, m4, m5); + // Move middle element out of the way. + data.swap(best, end - 1); + pivot.seek(end - 1); // Begin partitioning - int i = start + 1, j = end - 3; - DBIDArrayIter refi = data.iter(), refj = data.iter(); + int i = start, j = end - 3; refi.seek(i); refj.seek(j); // This is classic quicksort stuff - while(true) { - while(comparator.compare(refi, pivot) <= 0 && i <= j) { + while (true) { + while (i <= j && comparator.compare(refi, pivot) <= 0) { i++; refi.advance(); } - while(comparator.compare(refj, pivot) >= 0 && j >= i) { + while (j >= i && comparator.compare(refj, pivot) >= 0) { j--; refj.retract(); } - if(i >= j) { + if (i >= j) { break; } data.swap(i, j); } // Move pivot (former middle element) back into the appropriate place - data.swap(i, end - 2); + data.swap(i, end - 1); // In contrast to quicksort, we only need to recurse into the half we are // interested in. Instead of recursion we now use iteration. - if(rank < i) { + if (rank < i) { end = i; - } - else if(rank > i) { + } else if (rank > i) { start = i + 1; - } - else { + } else { break; } } // Loop until rank==i } /** + * Compare two elements. + * + * @param i1 First scratch variable + * @param p1 Value for first + * @param i2 Second scratch variable + * @param p2 Value for second + * @param comp Comparator + * @return Comparison result + */ + private static int compare(DBIDArrayIter i1, int p1, DBIDArrayIter i2, int p2, Comparator<? super DBIDRef> comp) { + i1.seek(p1); + i2.seek(p2); + return comp.compare(i1, i2); + } + + /** * Sort a small array using repetitive insertion sort. * * @param data Data to sort * @param start Interval start * @param end Interval end */ - private static void insertionSort(ArrayModifiableDBIDs data, Comparator<? super DBIDRef> comparator, int start, int end) { - DBIDArrayIter iter1 = data.iter(), iter2 = data.iter(); - for(int i = start + 1; i < end; i++) { - iter1.seek(i - 1); - iter2.seek(i); - for(int j = i; j > start; j--, iter1.retract(), iter2.retract()) { - if(comparator.compare(iter1, iter2) > 0) { + private static void insertionSort(ArrayModifiableDBIDs data, Comparator<? super DBIDRef> comparator, int start, int end, DBIDArrayIter iter1, DBIDArrayIter iter2) { + for (int i = start + 1; i < end; i++) { + for (int j = i; j > start; j--) { + iter1.seek(j - 1); + iter2.seek(j); + if (comparator.compare(iter1, iter2) <= 0) { break; } data.swap(j, j - 1); } } } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ArrayAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ArrayAdapter.java index 969d068d..2c5eeed1 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ArrayAdapter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ArrayAdapter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ArrayDBIDsAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ArrayDBIDsAdapter.java index 5b1b92b5..da831471 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ArrayDBIDsAdapter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ArrayDBIDsAdapter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ArrayLikeUtil.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ArrayLikeUtil.java index a08feb1a..8fab6f2b 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ArrayLikeUtil.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ArrayLikeUtil.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -27,6 +27,7 @@ import java.util.List; import de.lmu.ifi.dbs.elki.data.FeatureVector; import de.lmu.ifi.dbs.elki.data.NumberVector; +import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector; /** * Utility class that allows plug-in use of various "array-like" types such as @@ -42,17 +43,17 @@ public final class ArrayLikeUtil { /** * Static instance for lists. */ - private static final ListArrayAdapter<Object> LISTADAPTER = new ListArrayAdapter<Object>(); + private static final ListArrayAdapter<Object> LISTADAPTER = new ListArrayAdapter<>(); /** * Static instance for lists of numbers. */ - private static final NumberListArrayAdapter<Number> NUMBERLISTADAPTER = new NumberListArrayAdapter<Number>(); + private static final NumberListArrayAdapter<Number> NUMBERLISTADAPTER = new NumberListArrayAdapter<>(); /** * Static instance. */ - private static final IdentityArrayAdapter<?> IDENTITYADAPTER = new IdentityArrayAdapter<Object>(); + private static final IdentityArrayAdapter<?> IDENTITYADAPTER = new IdentityArrayAdapter<>(); /** * Static instance. @@ -83,7 +84,12 @@ public final class ArrayLikeUtil { * Use ArrayDBIDs as array. */ public static final ArrayDBIDsAdapter ARRAYDBIDADAPTER = new ArrayDBIDsAdapter(); - + + /** + * Adapter for vectors. + */ + public static final NumberArrayAdapter<Double, Vector> VECTORADAPTER = new VectorAdapter(); + /** * Fake constructor. Do not instantiate! */ @@ -169,9 +175,9 @@ public final class ArrayLikeUtil { final int size = adapter.size(array); int index = 0; double max = adapter.getDouble(array, 0); - for(int i = 1; i < size; i++) { + for (int i = 1; i < size; i++) { double val = adapter.getDouble(array, i); - if(val > max) { + if (val > max) { max = val; index = i; } @@ -199,8 +205,11 @@ public final class ArrayLikeUtil { * @return primitive double array */ public static <A> double[] toPrimitiveDoubleArray(A array, NumberArrayAdapter<?, ? super A> adapter) { + if (adapter == DOUBLEARRAYADAPTER) { + return ((double[]) array).clone(); + } double[] ret = new double[adapter.size(array)]; - for(int i = 0; i < ret.length; i++) { + for (int i = 0; i < ret.length; i++) { ret[i] = adapter.getDouble(array, i); } return ret; @@ -234,8 +243,11 @@ public final class ArrayLikeUtil { * @return primitive float array */ public static <A> float[] toPrimitiveFloatArray(A array, NumberArrayAdapter<?, ? super A> adapter) { + if (adapter == FLOATARRAYADAPTER) { + return ((float[]) array).clone(); + } float[] ret = new float[adapter.size(array)]; - for(int i = 0; i < ret.length; i++) { + for (int i = 0; i < ret.length; i++) { ret[i] = adapter.getFloat(array, i); } return ret; @@ -260,4 +272,39 @@ public final class ArrayLikeUtil { public static <N extends Number> float[] toPrimitiveFloatArray(NumberVector<N> obj) { return toPrimitiveFloatArray(obj, numberVectorAdapter(obj)); } -}
\ No newline at end of file + + /** + * Convert a numeric array-like to a <code>int[]</code>. + * + * @param array Array-like + * @param adapter Adapter + * @return primitive double array + */ + public static <A> int[] toPrimitiveIntegerArray(A array, NumberArrayAdapter<?, ? super A> adapter) { + int[] ret = new int[adapter.size(array)]; + for (int i = 0; i < ret.length; i++) { + ret[i] = adapter.getInteger(array, i); + } + return ret; + } + + /** + * Convert a list of numbers to <code>int[]</code>. + * + * @param array List of numbers + * @return double array + */ + public static int[] toPrimitiveIntegerArray(List<? extends Number> array) { + return toPrimitiveIntegerArray(array, NUMBERLISTADAPTER); + } + + /** + * Convert a number vector to <code>int[]</code>. + * + * @param obj Object to convert + * @return primitive double array + */ + public static <N extends Number> int[] toPrimitiveIntegerArray(NumberVector<N> obj) { + return toPrimitiveIntegerArray(obj, numberVectorAdapter(obj)); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/DoubleArrayAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/DoubleArrayAdapter.java index 117f3845..0e31a61a 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/DoubleArrayAdapter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/DoubleArrayAdapter.java @@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ExtendedArray.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ExtendedArray.java index 491c4f95..3af14982 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ExtendedArray.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ExtendedArray.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -91,6 +91,6 @@ public class ExtendedArray<T> implements ArrayAdapter<T, ExtendedArray<T>> { */ @SuppressWarnings("unchecked") public static <T, A> ExtendedArray<T> extend(A array, ArrayAdapter<T, A> getter, T extra) { - return new ExtendedArray<T>(array, (ArrayAdapter<T, Object>) getter, extra); + return new ExtendedArray<>(array, (ArrayAdapter<T, Object>) getter, extra); } }
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/FeatureVectorAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/FeatureVectorAdapter.java index 38b662e8..deb5aafc 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/FeatureVectorAdapter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/FeatureVectorAdapter.java @@ -6,7 +6,7 @@ import de.lmu.ifi.dbs.elki.data.FeatureVector; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/FloatArrayAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/FloatArrayAdapter.java index ae501039..831dc929 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/FloatArrayAdapter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/FloatArrayAdapter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/IdentityArrayAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/IdentityArrayAdapter.java index 0c6e03dd..dfde46b7 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/IdentityArrayAdapter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/IdentityArrayAdapter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ListArrayAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ListArrayAdapter.java index cba1e706..729dfab8 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ListArrayAdapter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ListArrayAdapter.java @@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/NumberArrayAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/NumberArrayAdapter.java index 1dc823b1..5ebbcb0d 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/NumberArrayAdapter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/NumberArrayAdapter.java @@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/NumberListArrayAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/NumberListArrayAdapter.java index 89a4e3d6..a2606347 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/NumberListArrayAdapter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/NumberListArrayAdapter.java @@ -6,7 +6,7 @@ import java.util.List; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/NumberVectorAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/NumberVectorAdapter.java index fd1e6636..5e674026 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/NumberVectorAdapter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/NumberVectorAdapter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/SingleSubsetArrayAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/SingleSubsetArrayAdapter.java index d7483e4d..941c6245 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/SingleSubsetArrayAdapter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/SingleSubsetArrayAdapter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/SubsetArrayAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/SubsetArrayAdapter.java index c607759f..746647cc 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/SubsetArrayAdapter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/SubsetArrayAdapter.java @@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/SubsetNumberArrayAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/SubsetNumberArrayAdapter.java index 6719b60e..c394f9b7 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/SubsetNumberArrayAdapter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/SubsetNumberArrayAdapter.java @@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/TDoubleListAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/TDoubleListAdapter.java index cee393ac..a52ff15e 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/TDoubleListAdapter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/TDoubleListAdapter.java @@ -6,7 +6,7 @@ import gnu.trove.list.TDoubleList; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/VectorAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/VectorAdapter.java new file mode 100644 index 00000000..0bb979e9 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/VectorAdapter.java @@ -0,0 +1,85 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector; + +/** + * Adapter to use a feature vector as an array of features. + * + * Use the static instance from {@link ArrayLikeUtil}! + * + * @author Erich Schubert + */ +public class VectorAdapter implements NumberArrayAdapter<Double, Vector> { + /** + * Constructor. + * + * Use the static instance from {@link ArrayLikeUtil}! + */ + protected VectorAdapter() { + super(); + } + + @Override + public int size(Vector array) { + return array.getDimensionality(); + } + + @Override + @Deprecated + public Double get(Vector array, int off) throws IndexOutOfBoundsException { + return array.getValue(off + 1); + } + + @Override + public double getDouble(Vector array, int off) throws IndexOutOfBoundsException { + return array.doubleValue(off); + } + + @Override + public float getFloat(Vector array, int off) throws IndexOutOfBoundsException { + return array.floatValue(off); + } + + @Override + public int getInteger(Vector array, int off) throws IndexOutOfBoundsException { + return array.intValue(off); + } + + @Override + public short getShort(Vector array, int off) throws IndexOutOfBoundsException { + return array.shortValue(off); + } + + @Override + public long getLong(Vector array, int off) throws IndexOutOfBoundsException { + return array.longValue(off); + } + + @Override + public byte getByte(Vector array, int off) throws IndexOutOfBoundsException { + return array.byteValue(off); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/package-info.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/package-info.java index 55627df4..33058cf4 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/package-info.java @@ -5,7 +5,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/IntegerArrayQuickSort.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/IntegerArrayQuickSort.java index a0c93997..eaf47738 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/IntegerArrayQuickSort.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/IntegerArrayQuickSort.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.arrays; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/IntegerComparator.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/IntegerComparator.java index 51164425..0ccd47db 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/IntegerComparator.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/IntegerComparator.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.arrays; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/package-info.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/package-info.java index 3db78032..874a6d44 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arrays/package-info.java @@ -5,7 +5,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2012 +Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/AbstractHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/AbstractHeap.java deleted file mode 100644 index b6f098e6..00000000 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/AbstractHeap.java +++ /dev/null @@ -1,103 +0,0 @@ -package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; - -/* - This file is part of ELKI: - Environment for Developing KDD-Applications Supported by Index-Structures - - Copyright (C) 2012 - Ludwig-Maximilians-Universität München - Lehr- und Forschungseinheit für Datenbanksysteme - ELKI Development Team - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -/** - * Abstract base class for heaps. - * - * @author Erich Schubert - */ -public class AbstractHeap { - /** - * Default initial capacity - */ - public static final int DEFAULT_INITIAL_CAPACITY = 11; - - /** - * Current number of objects - */ - public int size = 0; - - /** - * Indicate up to where the heap is valid - */ - public int validSize = 0; - - /** - * (Structural) modification counter. Used to invalidate iterators. - */ - public int modCount = 0; - - /** - * Constructor. - */ - public AbstractHeap() { - super(); - } - - /** - * Query the size - * - * @return Size - */ - public int size() { - return this.size; - } - - /** - * Delete all elements from the heap. - */ - public void clear() { - this.size = 0; - this.validSize = -1; - heapModified(); - } - - /** - * Test whether we need to resize to have the requested capacity. - * - * @param requiredSize required capacity - * @param capacity Current capacity - * @return new capacity - */ - protected final int desiredSize(int requiredSize, int capacity) { - // Double until 64, then increase by 50% each time. - int newCapacity = ((capacity < 64) ? ((capacity + 1) * 2) : ((capacity / 2) * 3)); - // overflow? - if (newCapacity < 0) { - throw new OutOfMemoryError(); - } - if (requiredSize > newCapacity) { - newCapacity = requiredSize; - } - return newCapacity; - } - - /** - * Called at the end of each heap modification. - */ - protected void heapModified() { - modCount++; - } -} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMaxHeap.java index ab8ef1bb..222fe83a 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMaxHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMaxHeap.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,41 +23,409 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; along with this program. If not, see <http://www.gnu.org/licenses/>. */ +import java.util.Arrays; +import java.util.ConcurrentModificationException; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + /** - * Basic in-memory heap structure. + * Advanced priority queue class, based on a binary heap (for small sizes), + * which will for larger heaps be accompanied by a 4-ary heap (attached below + * the root of the two-ary heap, making the root actually 3-ary). + * + * This code was automatically instantiated for the type: Comparable + * + * This combination was found to work quite well in benchmarks, but YMMV. * - * This heap is built lazily: if you first add many elements, then poll the - * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log - * n). This is implemented via a simple validTo counter. + * Some other observations from benchmarking: + * <ul> + * <li>Bulk loading did not improve things</li> + * <li>Primitive heaps are substantially faster.</li> + * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and + * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> + * <li>Workload makes a huge difference. A load-once, poll-until-empty priority + * queue is something different than e.g. a top-k heap, which will see a lot of + * top element replacements.</li> + * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for + * top-k make a difference.</li> + * <li>Different day, different benchmark results ...</li> + * </ul> * * @author Erich Schubert + * + * @apiviz.has UnsortedIter + * @param <K> Key type */ -public class ComparableMaxHeap<K extends Comparable<K>> extends ObjectHeap<K> { +public class ComparableMaxHeap<K extends Comparable<? super K>> implements ObjectHeap<K> { + /** + * Base heap. + */ + protected Comparable<Object>[] twoheap; + + /** + * Extension heap. + */ + protected Comparable<Object>[] fourheap; + + /** + * Current size of heap. + */ + protected int size; + + /** + * (Structural) modification counter. Used to invalidate iterators. + */ + protected int modCount = 0; + + /** + * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. + */ + private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; + + /** + * Initial size of the 2-ary heap. + */ + private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; + + /** + * Initial size of 4-ary heap when initialized. + * + * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 + * + * 85 = 4-ary heap of height 3: 21 + 4*4*4 + * + * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 + * + * Since we last grew by 255 (to 511), let's use 341. + */ + private final static int FOUR_HEAP_INITIAL_SIZE = 341; + /** - * Constructor with default capacity. + * Constructor, with default size. */ + @SuppressWarnings("unchecked") public ComparableMaxHeap() { - super(DEFAULT_INITIAL_CAPACITY); + super(); + Comparable<Object>[] twoheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, TWO_HEAP_INITIAL_SIZE); + + this.twoheap = twoheap; + this.fourheap = null; + this.size = 0; + this.modCount = 0; } /** - * Constructor with initial capacity. + * Constructor, with given minimum size. * - * @param size initial capacity + * @param minsize Minimum size */ - public ComparableMaxHeap(int size) { - super(size); + @SuppressWarnings("unchecked") + public ComparableMaxHeap(int minsize) { + super(); + if (minsize < TWO_HEAP_MAX_SIZE) { + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + Comparable<Object>[] twoheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, size); + + this.twoheap = twoheap; + this.fourheap = null; + } else { + Comparable<Object>[] twoheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, TWO_HEAP_INITIAL_SIZE); + Comparable<Object>[] fourheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, minsize - TWO_HEAP_MAX_SIZE); + this.twoheap = twoheap; + this.fourheap = fourheap; + } + this.size = 0; + this.modCount = 0; + } + + @Override + public void clear() { + size = 0; + ++modCount; + fourheap = null; + Arrays.fill(twoheap, null); + } + + @Override + public int size() { + return size; + } + + @Override + public boolean isEmpty() { + return (size == 0); + } + + @Override + @SuppressWarnings("unchecked") + public void add(K o) { + final Comparable<Object> co = (Comparable<Object>)o; + // System.err.println("Add: " + o); + if (size < TWO_HEAP_MAX_SIZE) { + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); + } + final int twopos = size; + twoheap[twopos] = co; + ++size; + heapifyUp2(twopos, co); + ++modCount; + } else { + final int fourpos = size - TWO_HEAP_MAX_SIZE; + if (fourheap == null) { + fourheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, FOUR_HEAP_INITIAL_SIZE); + } else if (fourpos >= fourheap.length) { + // Grow extension heap by half. + fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); + } + fourheap[fourpos] = co; + ++size; + heapifyUp4(fourpos, co); + ++modCount; + } + } + + @Override + public void add(K key, int max) { + if (size < max) { + add(key); + } else if (twoheap[0].compareTo(key) >= 0) { + replaceTopElement(key); + } + } + + @Override + @SuppressWarnings("unchecked") + public K replaceTopElement(K reinsert) { + final Comparable<Object> ret = twoheap[0]; + heapifyDown((Comparable<Object>) reinsert); + ++modCount; + return (K)ret; } /** - * Compare two objects + * Heapify-Up method for 2-ary heap. * - * @param o1 First object - * @param o2 Second object + * @param twopos Position in 2-ary heap. + * @param cur Current object */ + private void heapifyUp2(int twopos, Comparable<Object> cur) { + while (twopos > 0) { + final int parent = (twopos - 1) >>> 1; + Comparable<Object> par = twoheap[parent]; + if (cur.compareTo(par) <= 0) { + break; + } + twoheap[twopos] = par; + twopos = parent; + } + twoheap[twopos] = cur; + } + + /** + * Heapify-Up method for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + */ + private void heapifyUp4(int fourpos, Comparable<Object> cur) { + while (fourpos > 0) { + final int parent = (fourpos - 1) >> 2; + Comparable<Object> par = fourheap[parent]; + if (cur.compareTo(par) <= 0) { + break; + } + fourheap[fourpos] = par; + fourpos = parent; + } + if (fourpos == 0 && twoheap[0].compareTo(cur) < 0) { + fourheap[0] = twoheap[0]; + twoheap[0] = cur; + } else { + fourheap[fourpos] = cur; + } + } + @Override @SuppressWarnings("unchecked") - protected boolean comp(Object o1, Object o2) { - return ((K) o1).compareTo((K) o2) < 0; + public K poll() { + final Comparable<Object> ret = twoheap[0]; + --size; + // Replacement object: + if (size >= TWO_HEAP_MAX_SIZE) { + final int last = size - TWO_HEAP_MAX_SIZE; + final Comparable<Object> reinsert = fourheap[last]; + fourheap[last] = null; + heapifyDown(reinsert); + } else if (size > 0) { + final Comparable<Object> reinsert = twoheap[size]; + twoheap[size] = null; + heapifyDown(reinsert); + } else { + twoheap[0] = null; + } + ++modCount; + return (K)ret; + } + + /** + * Invoke heapify-down for the root object. + * + * @param reinsert Object to insert. + */ + private void heapifyDown(Comparable<Object> reinsert) { + if (size > TWO_HEAP_MAX_SIZE) { + // Special case: 3-ary situation. + final int best = (twoheap[1].compareTo(twoheap[2]) >= 0) ? 1 : 2; + if (fourheap[0].compareTo(twoheap[best]) > 0) { + twoheap[0] = fourheap[0]; + heapifyDown4(0, reinsert); + } else { + twoheap[0] = twoheap[best]; + heapifyDown2(best, reinsert); + } + return; + } + heapifyDown2(0, reinsert); + } + + /** + * Heapify-Down for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + */ + private void heapifyDown2(int twopos, Comparable<Object> cur) { + final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + while (twopos < stop) { + int bestchild = (twopos << 1) + 1; + Comparable<Object> best = twoheap[bestchild]; + final int right = bestchild + 1; + if (right < size && best.compareTo(twoheap[right]) < 0) { + bestchild = right; + best = twoheap[right]; + } + if (cur.compareTo(best) >= 0) { + break; + } + twoheap[twopos] = best; + twopos = bestchild; + } + twoheap[twopos] = cur; + } + + /** + * Heapify-Down for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + */ + private void heapifyDown4(int fourpos, Comparable<Object> cur) { + final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; + while (fourpos < stop) { + final int child = (fourpos << 2) + 1; + Comparable<Object> best = fourheap[child]; + int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; + if (size > minsize) { + Comparable<Object> nextchild = fourheap[candidate]; + if (best.compareTo(nextchild) < 0) { + bestchild = candidate; + best = nextchild; + } + + minsize += 2; + if (size >= minsize) { + nextchild = fourheap[++candidate]; + if (best.compareTo(nextchild) < 0) { + bestchild = candidate; + best = nextchild; + } + + if (size > minsize) { + nextchild = fourheap[++candidate]; + if (best.compareTo(nextchild) < 0) { + bestchild = candidate; + best = nextchild; + } + } + } + } + if (cur.compareTo(best) >= 0) { + break; + } + fourheap[fourpos] = best; + fourpos = bestchild; + } + fourheap[fourpos] = cur; + } + + @Override + @SuppressWarnings("unchecked") + public K peek() { + return (K)twoheap[0]; + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append(ComparableMaxHeap.class.getSimpleName()).append(" ["); + for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) { + buf.append(iter.get()).append(','); + } + buf.append(']'); + return buf.toString(); + } + + @Override + public UnsortedIter unsortedIter() { + return new UnsortedIter(); + } + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * Use this class as follows: + * + * <pre> + * {@code + * for (ObjectHeap.UnsortedIter<K> iter = heap.unsortedIter(); iter.valid(); iter.next()) { + * doSomething(iter.get()); + * } + * } + * </pre> + * + * @author Erich Schubert + */ + private class UnsortedIter implements ObjectHeap.UnsortedIter<K> { + /** + * Iterator position. + */ + protected int pos = 0; + + /** + * Modification counter we were initialized at. + */ + protected final int myModCount = modCount; + + @Override + public boolean valid() { + if (modCount != myModCount) { + throw new ConcurrentModificationException(); + } + return pos < size; + } + + @Override + public void advance() { + pos++; + } + + @SuppressWarnings("unchecked") + + @Override + public K get() { + return (K)((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + } } } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMinHeap.java index 06d2cb32..3cc5a02f 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMinHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMinHeap.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,41 +23,409 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; along with this program. If not, see <http://www.gnu.org/licenses/>. */ +import java.util.Arrays; +import java.util.ConcurrentModificationException; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + /** - * Basic in-memory heap structure. + * Advanced priority queue class, based on a binary heap (for small sizes), + * which will for larger heaps be accompanied by a 4-ary heap (attached below + * the root of the two-ary heap, making the root actually 3-ary). + * + * This code was automatically instantiated for the type: Comparable + * + * This combination was found to work quite well in benchmarks, but YMMV. * - * This heap is built lazily: if you first add many elements, then poll the - * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log - * n). This is implemented via a simple validTo counter. + * Some other observations from benchmarking: + * <ul> + * <li>Bulk loading did not improve things</li> + * <li>Primitive heaps are substantially faster.</li> + * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and + * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> + * <li>Workload makes a huge difference. A load-once, poll-until-empty priority + * queue is something different than e.g. a top-k heap, which will see a lot of + * top element replacements.</li> + * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for + * top-k make a difference.</li> + * <li>Different day, different benchmark results ...</li> + * </ul> * * @author Erich Schubert + * + * @apiviz.has UnsortedIter + * @param <K> Key type */ -public class ComparableMinHeap<K extends Comparable<K>> extends ObjectHeap<K> { +public class ComparableMinHeap<K extends Comparable<? super K>> implements ObjectHeap<K> { + /** + * Base heap. + */ + protected Comparable<Object>[] twoheap; + + /** + * Extension heap. + */ + protected Comparable<Object>[] fourheap; + + /** + * Current size of heap. + */ + protected int size; + + /** + * (Structural) modification counter. Used to invalidate iterators. + */ + protected int modCount = 0; + + /** + * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. + */ + private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; + + /** + * Initial size of the 2-ary heap. + */ + private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; + + /** + * Initial size of 4-ary heap when initialized. + * + * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 + * + * 85 = 4-ary heap of height 3: 21 + 4*4*4 + * + * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 + * + * Since we last grew by 255 (to 511), let's use 341. + */ + private final static int FOUR_HEAP_INITIAL_SIZE = 341; + /** - * Constructor with default capacity. + * Constructor, with default size. */ + @SuppressWarnings("unchecked") public ComparableMinHeap() { - super(DEFAULT_INITIAL_CAPACITY); + super(); + Comparable<Object>[] twoheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, TWO_HEAP_INITIAL_SIZE); + + this.twoheap = twoheap; + this.fourheap = null; + this.size = 0; + this.modCount = 0; } /** - * Constructor with initial capacity. + * Constructor, with given minimum size. * - * @param size initial capacity + * @param minsize Minimum size */ - public ComparableMinHeap(int size) { - super(size); + @SuppressWarnings("unchecked") + public ComparableMinHeap(int minsize) { + super(); + if (minsize < TWO_HEAP_MAX_SIZE) { + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + Comparable<Object>[] twoheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, size); + + this.twoheap = twoheap; + this.fourheap = null; + } else { + Comparable<Object>[] twoheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, TWO_HEAP_INITIAL_SIZE); + Comparable<Object>[] fourheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, minsize - TWO_HEAP_MAX_SIZE); + this.twoheap = twoheap; + this.fourheap = fourheap; + } + this.size = 0; + this.modCount = 0; + } + + @Override + public void clear() { + size = 0; + ++modCount; + fourheap = null; + Arrays.fill(twoheap, null); + } + + @Override + public int size() { + return size; + } + + @Override + public boolean isEmpty() { + return (size == 0); + } + + @Override + @SuppressWarnings("unchecked") + public void add(K o) { + final Comparable<Object> co = (Comparable<Object>)o; + // System.err.println("Add: " + o); + if (size < TWO_HEAP_MAX_SIZE) { + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); + } + final int twopos = size; + twoheap[twopos] = co; + ++size; + heapifyUp2(twopos, co); + ++modCount; + } else { + final int fourpos = size - TWO_HEAP_MAX_SIZE; + if (fourheap == null) { + fourheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, FOUR_HEAP_INITIAL_SIZE); + } else if (fourpos >= fourheap.length) { + // Grow extension heap by half. + fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); + } + fourheap[fourpos] = co; + ++size; + heapifyUp4(fourpos, co); + ++modCount; + } + } + + @Override + public void add(K key, int max) { + if (size < max) { + add(key); + } else if (twoheap[0].compareTo(key) <= 0) { + replaceTopElement(key); + } + } + + @Override + @SuppressWarnings("unchecked") + public K replaceTopElement(K reinsert) { + final Comparable<Object> ret = twoheap[0]; + heapifyDown((Comparable<Object>) reinsert); + ++modCount; + return (K)ret; } /** - * Compare two objects + * Heapify-Up method for 2-ary heap. * - * @param o1 First object - * @param o2 Second object + * @param twopos Position in 2-ary heap. + * @param cur Current object */ + private void heapifyUp2(int twopos, Comparable<Object> cur) { + while (twopos > 0) { + final int parent = (twopos - 1) >>> 1; + Comparable<Object> par = twoheap[parent]; + if (cur.compareTo(par) >= 0) { + break; + } + twoheap[twopos] = par; + twopos = parent; + } + twoheap[twopos] = cur; + } + + /** + * Heapify-Up method for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + */ + private void heapifyUp4(int fourpos, Comparable<Object> cur) { + while (fourpos > 0) { + final int parent = (fourpos - 1) >> 2; + Comparable<Object> par = fourheap[parent]; + if (cur.compareTo(par) >= 0) { + break; + } + fourheap[fourpos] = par; + fourpos = parent; + } + if (fourpos == 0 && twoheap[0].compareTo(cur) > 0) { + fourheap[0] = twoheap[0]; + twoheap[0] = cur; + } else { + fourheap[fourpos] = cur; + } + } + @Override @SuppressWarnings("unchecked") - protected boolean comp(Object o1, Object o2) { - return ((K) o1).compareTo((K) o2) > 0; + public K poll() { + final Comparable<Object> ret = twoheap[0]; + --size; + // Replacement object: + if (size >= TWO_HEAP_MAX_SIZE) { + final int last = size - TWO_HEAP_MAX_SIZE; + final Comparable<Object> reinsert = fourheap[last]; + fourheap[last] = null; + heapifyDown(reinsert); + } else if (size > 0) { + final Comparable<Object> reinsert = twoheap[size]; + twoheap[size] = null; + heapifyDown(reinsert); + } else { + twoheap[0] = null; + } + ++modCount; + return (K)ret; + } + + /** + * Invoke heapify-down for the root object. + * + * @param reinsert Object to insert. + */ + private void heapifyDown(Comparable<Object> reinsert) { + if (size > TWO_HEAP_MAX_SIZE) { + // Special case: 3-ary situation. + final int best = (twoheap[1].compareTo(twoheap[2]) <= 0) ? 1 : 2; + if (fourheap[0].compareTo(twoheap[best]) < 0) { + twoheap[0] = fourheap[0]; + heapifyDown4(0, reinsert); + } else { + twoheap[0] = twoheap[best]; + heapifyDown2(best, reinsert); + } + return; + } + heapifyDown2(0, reinsert); + } + + /** + * Heapify-Down for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + */ + private void heapifyDown2(int twopos, Comparable<Object> cur) { + final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + while (twopos < stop) { + int bestchild = (twopos << 1) + 1; + Comparable<Object> best = twoheap[bestchild]; + final int right = bestchild + 1; + if (right < size && best.compareTo(twoheap[right]) > 0) { + bestchild = right; + best = twoheap[right]; + } + if (cur.compareTo(best) <= 0) { + break; + } + twoheap[twopos] = best; + twopos = bestchild; + } + twoheap[twopos] = cur; + } + + /** + * Heapify-Down for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + */ + private void heapifyDown4(int fourpos, Comparable<Object> cur) { + final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; + while (fourpos < stop) { + final int child = (fourpos << 2) + 1; + Comparable<Object> best = fourheap[child]; + int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; + if (size > minsize) { + Comparable<Object> nextchild = fourheap[candidate]; + if (best.compareTo(nextchild) > 0) { + bestchild = candidate; + best = nextchild; + } + + minsize += 2; + if (size >= minsize) { + nextchild = fourheap[++candidate]; + if (best.compareTo(nextchild) > 0) { + bestchild = candidate; + best = nextchild; + } + + if (size > minsize) { + nextchild = fourheap[++candidate]; + if (best.compareTo(nextchild) > 0) { + bestchild = candidate; + best = nextchild; + } + } + } + } + if (cur.compareTo(best) <= 0) { + break; + } + fourheap[fourpos] = best; + fourpos = bestchild; + } + fourheap[fourpos] = cur; + } + + @Override + @SuppressWarnings("unchecked") + public K peek() { + return (K)twoheap[0]; + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append(ComparableMinHeap.class.getSimpleName()).append(" ["); + for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) { + buf.append(iter.get()).append(','); + } + buf.append(']'); + return buf.toString(); + } + + @Override + public UnsortedIter unsortedIter() { + return new UnsortedIter(); + } + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * Use this class as follows: + * + * <pre> + * {@code + * for (ObjectHeap.UnsortedIter<K> iter = heap.unsortedIter(); iter.valid(); iter.next()) { + * doSomething(iter.get()); + * } + * } + * </pre> + * + * @author Erich Schubert + */ + private class UnsortedIter implements ObjectHeap.UnsortedIter<K> { + /** + * Iterator position. + */ + protected int pos = 0; + + /** + * Modification counter we were initialized at. + */ + protected final int myModCount = modCount; + + @Override + public boolean valid() { + if (modCount != myModCount) { + throw new ConcurrentModificationException(); + } + return pos < size; + } + + @Override + public void advance() { + pos++; + } + + @SuppressWarnings("unchecked") + + @Override + public K get() { + return (K)((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + } } } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparatorMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparatorMaxHeap.java new file mode 100644 index 00000000..7b660d31 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparatorMaxHeap.java @@ -0,0 +1,440 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Arrays; +import java.util.ConcurrentModificationException; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + +/** + * Advanced priority queue class, based on a binary heap (for small sizes), + * which will for larger heaps be accompanied by a 4-ary heap (attached below + * the root of the two-ary heap, making the root actually 3-ary). + * + * This code was automatically instantiated for the type: Comparator + * + * This combination was found to work quite well in benchmarks, but YMMV. + * + * Some other observations from benchmarking: + * <ul> + * <li>Bulk loading did not improve things</li> + * <li>Primitive heaps are substantially faster.</li> + * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and + * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> + * <li>Workload makes a huge difference. A load-once, poll-until-empty priority + * queue is something different than e.g. a top-k heap, which will see a lot of + * top element replacements.</li> + * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for + * top-k make a difference.</li> + * <li>Different day, different benchmark results ...</li> + * </ul> + * + * @author Erich Schubert + * + * @apiviz.has UnsortedIter + * @param <K> Key type + */ +public class ComparatorMaxHeap<K> implements ObjectHeap<K> { + /** + * Base heap. + */ + protected Object[] twoheap; + + /** + * Extension heap. + */ + protected Object[] fourheap; + + /** + * Current size of heap. + */ + protected int size; + + /** + * (Structural) modification counter. Used to invalidate iterators. + */ + protected int modCount = 0; + + /** + * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. + */ + private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; + + /** + * Initial size of the 2-ary heap. + */ + private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; + + /** + * Initial size of 4-ary heap when initialized. + * + * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 + * + * 85 = 4-ary heap of height 3: 21 + 4*4*4 + * + * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 + * + * Since we last grew by 255 (to 511), let's use 341. + */ + private final static int FOUR_HEAP_INITIAL_SIZE = 341; + + + /** + * Comparator + */ + protected java.util.Comparator<Object> comparator; + + /** + * Constructor, with default size. + * @param comparator Comparator + */ + @SuppressWarnings("unchecked") + public ComparatorMaxHeap(java.util.Comparator<? super K> comparator) { + super(); + this.comparator = (java.util.Comparator<Object>) java.util.Comparator.class.cast(comparator); + Object[] twoheap = new Object[TWO_HEAP_INITIAL_SIZE]; + + this.twoheap = twoheap; + this.fourheap = null; + this.size = 0; + this.modCount = 0; + } + + /** + * Constructor, with given minimum size. + * + * @param minsize Minimum size + * @param comparator Comparator + */ + @SuppressWarnings("unchecked") + public ComparatorMaxHeap(int minsize, java.util.Comparator<? super K> comparator) { + super(); + this.comparator = (java.util.Comparator<Object>) java.util.Comparator.class.cast(comparator); + if (minsize < TWO_HEAP_MAX_SIZE) { + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + Object[] twoheap = new Object[size]; + + this.twoheap = twoheap; + this.fourheap = null; + } else { + Object[] twoheap = new Object[TWO_HEAP_INITIAL_SIZE]; + Object[] fourheap = new Object[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + this.twoheap = twoheap; + this.fourheap = fourheap; + } + this.size = 0; + this.modCount = 0; + } + + @Override + public void clear() { + size = 0; + ++modCount; + fourheap = null; + Arrays.fill(twoheap, null); + } + + @Override + public int size() { + return size; + } + + @Override + public boolean isEmpty() { + return (size == 0); + } + + @Override + public void add(K o) { + final Object co = o; + // System.err.println("Add: " + o); + if (size < TWO_HEAP_MAX_SIZE) { + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); + } + final int twopos = size; + twoheap[twopos] = co; + ++size; + heapifyUp2(twopos, co); + ++modCount; + } else { + final int fourpos = size - TWO_HEAP_MAX_SIZE; + if (fourheap == null) { + fourheap = new Object[FOUR_HEAP_INITIAL_SIZE]; + } else if (fourpos >= fourheap.length) { + // Grow extension heap by half. + fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); + } + fourheap[fourpos] = co; + ++size; + heapifyUp4(fourpos, co); + ++modCount; + } + } + + @Override + public void add(K key, int max) { + if (size < max) { + add(key); + } else if (comparator.compare(twoheap[0], key) >= 0) { + replaceTopElement(key); + } + } + + @Override + @SuppressWarnings("unchecked") + public K replaceTopElement(K reinsert) { + final Object ret = twoheap[0]; + heapifyDown( reinsert); + ++modCount; + return (K)ret; + } + + /** + * Heapify-Up method for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + */ + private void heapifyUp2(int twopos, Object cur) { + while (twopos > 0) { + final int parent = (twopos - 1) >>> 1; + Object par = twoheap[parent]; + if (comparator.compare(cur, par) <= 0) { + break; + } + twoheap[twopos] = par; + twopos = parent; + } + twoheap[twopos] = cur; + } + + /** + * Heapify-Up method for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + */ + private void heapifyUp4(int fourpos, Object cur) { + while (fourpos > 0) { + final int parent = (fourpos - 1) >> 2; + Object par = fourheap[parent]; + if (comparator.compare(cur, par) <= 0) { + break; + } + fourheap[fourpos] = par; + fourpos = parent; + } + if (fourpos == 0 && comparator.compare(twoheap[0], cur) < 0) { + fourheap[0] = twoheap[0]; + twoheap[0] = cur; + } else { + fourheap[fourpos] = cur; + } + } + + @Override + @SuppressWarnings("unchecked") + public K poll() { + final Object ret = twoheap[0]; + --size; + // Replacement object: + if (size >= TWO_HEAP_MAX_SIZE) { + final int last = size - TWO_HEAP_MAX_SIZE; + final Object reinsert = fourheap[last]; + fourheap[last] = null; + heapifyDown(reinsert); + } else if (size > 0) { + final Object reinsert = twoheap[size]; + twoheap[size] = null; + heapifyDown(reinsert); + } else { + twoheap[0] = null; + } + ++modCount; + return (K)ret; + } + + /** + * Invoke heapify-down for the root object. + * + * @param reinsert Object to insert. + */ + private void heapifyDown(Object reinsert) { + if (size > TWO_HEAP_MAX_SIZE) { + // Special case: 3-ary situation. + final int best = (comparator.compare(twoheap[1], twoheap[2]) >= 0) ? 1 : 2; + if (comparator.compare(fourheap[0], twoheap[best]) > 0) { + twoheap[0] = fourheap[0]; + heapifyDown4(0, reinsert); + } else { + twoheap[0] = twoheap[best]; + heapifyDown2(best, reinsert); + } + return; + } + heapifyDown2(0, reinsert); + } + + /** + * Heapify-Down for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + */ + private void heapifyDown2(int twopos, Object cur) { + final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + while (twopos < stop) { + int bestchild = (twopos << 1) + 1; + Object best = twoheap[bestchild]; + final int right = bestchild + 1; + if (right < size && comparator.compare(best, twoheap[right]) < 0) { + bestchild = right; + best = twoheap[right]; + } + if (comparator.compare(cur, best) >= 0) { + break; + } + twoheap[twopos] = best; + twopos = bestchild; + } + twoheap[twopos] = cur; + } + + /** + * Heapify-Down for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + */ + private void heapifyDown4(int fourpos, Object cur) { + final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; + while (fourpos < stop) { + final int child = (fourpos << 2) + 1; + Object best = fourheap[child]; + int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; + if (size > minsize) { + Object nextchild = fourheap[candidate]; + if (comparator.compare(best, nextchild) < 0) { + bestchild = candidate; + best = nextchild; + } + + minsize += 2; + if (size >= minsize) { + nextchild = fourheap[++candidate]; + if (comparator.compare(best, nextchild) < 0) { + bestchild = candidate; + best = nextchild; + } + + if (size > minsize) { + nextchild = fourheap[++candidate]; + if (comparator.compare(best, nextchild) < 0) { + bestchild = candidate; + best = nextchild; + } + } + } + } + if (comparator.compare(cur, best) >= 0) { + break; + } + fourheap[fourpos] = best; + fourpos = bestchild; + } + fourheap[fourpos] = cur; + } + + @Override + @SuppressWarnings("unchecked") + public K peek() { + return (K)twoheap[0]; + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append(ComparatorMaxHeap.class.getSimpleName()).append(" ["); + for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) { + buf.append(iter.get()).append(','); + } + buf.append(']'); + return buf.toString(); + } + + @Override + public UnsortedIter unsortedIter() { + return new UnsortedIter(); + } + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * Use this class as follows: + * + * <pre> + * {@code + * for (ObjectHeap.UnsortedIter<K> iter = heap.unsortedIter(); iter.valid(); iter.next()) { + * doSomething(iter.get()); + * } + * } + * </pre> + * + * @author Erich Schubert + */ + private class UnsortedIter implements ObjectHeap.UnsortedIter<K> { + /** + * Iterator position. + */ + protected int pos = 0; + + /** + * Modification counter we were initialized at. + */ + protected final int myModCount = modCount; + + @Override + public boolean valid() { + if (modCount != myModCount) { + throw new ConcurrentModificationException(); + } + return pos < size; + } + + @Override + public void advance() { + pos++; + } + + @SuppressWarnings("unchecked") + + @Override + public K get() { + return (K)((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparatorMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparatorMinHeap.java new file mode 100644 index 00000000..e12c5f64 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparatorMinHeap.java @@ -0,0 +1,440 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Arrays; +import java.util.ConcurrentModificationException; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + +/** + * Advanced priority queue class, based on a binary heap (for small sizes), + * which will for larger heaps be accompanied by a 4-ary heap (attached below + * the root of the two-ary heap, making the root actually 3-ary). + * + * This code was automatically instantiated for the type: Comparator + * + * This combination was found to work quite well in benchmarks, but YMMV. + * + * Some other observations from benchmarking: + * <ul> + * <li>Bulk loading did not improve things</li> + * <li>Primitive heaps are substantially faster.</li> + * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and + * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> + * <li>Workload makes a huge difference. A load-once, poll-until-empty priority + * queue is something different than e.g. a top-k heap, which will see a lot of + * top element replacements.</li> + * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for + * top-k make a difference.</li> + * <li>Different day, different benchmark results ...</li> + * </ul> + * + * @author Erich Schubert + * + * @apiviz.has UnsortedIter + * @param <K> Key type + */ +public class ComparatorMinHeap<K> implements ObjectHeap<K> { + /** + * Base heap. + */ + protected Object[] twoheap; + + /** + * Extension heap. + */ + protected Object[] fourheap; + + /** + * Current size of heap. + */ + protected int size; + + /** + * (Structural) modification counter. Used to invalidate iterators. + */ + protected int modCount = 0; + + /** + * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. + */ + private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; + + /** + * Initial size of the 2-ary heap. + */ + private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; + + /** + * Initial size of 4-ary heap when initialized. + * + * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 + * + * 85 = 4-ary heap of height 3: 21 + 4*4*4 + * + * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 + * + * Since we last grew by 255 (to 511), let's use 341. + */ + private final static int FOUR_HEAP_INITIAL_SIZE = 341; + + + /** + * Comparator + */ + protected java.util.Comparator<Object> comparator; + + /** + * Constructor, with default size. + * @param comparator Comparator + */ + @SuppressWarnings("unchecked") + public ComparatorMinHeap(java.util.Comparator<? super K> comparator) { + super(); + this.comparator = (java.util.Comparator<Object>) java.util.Comparator.class.cast(comparator); + Object[] twoheap = new Object[TWO_HEAP_INITIAL_SIZE]; + + this.twoheap = twoheap; + this.fourheap = null; + this.size = 0; + this.modCount = 0; + } + + /** + * Constructor, with given minimum size. + * + * @param minsize Minimum size + * @param comparator Comparator + */ + @SuppressWarnings("unchecked") + public ComparatorMinHeap(int minsize, java.util.Comparator<? super K> comparator) { + super(); + this.comparator = (java.util.Comparator<Object>) java.util.Comparator.class.cast(comparator); + if (minsize < TWO_HEAP_MAX_SIZE) { + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + Object[] twoheap = new Object[size]; + + this.twoheap = twoheap; + this.fourheap = null; + } else { + Object[] twoheap = new Object[TWO_HEAP_INITIAL_SIZE]; + Object[] fourheap = new Object[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + this.twoheap = twoheap; + this.fourheap = fourheap; + } + this.size = 0; + this.modCount = 0; + } + + @Override + public void clear() { + size = 0; + ++modCount; + fourheap = null; + Arrays.fill(twoheap, null); + } + + @Override + public int size() { + return size; + } + + @Override + public boolean isEmpty() { + return (size == 0); + } + + @Override + public void add(K o) { + final Object co = o; + // System.err.println("Add: " + o); + if (size < TWO_HEAP_MAX_SIZE) { + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); + } + final int twopos = size; + twoheap[twopos] = co; + ++size; + heapifyUp2(twopos, co); + ++modCount; + } else { + final int fourpos = size - TWO_HEAP_MAX_SIZE; + if (fourheap == null) { + fourheap = new Object[FOUR_HEAP_INITIAL_SIZE]; + } else if (fourpos >= fourheap.length) { + // Grow extension heap by half. + fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); + } + fourheap[fourpos] = co; + ++size; + heapifyUp4(fourpos, co); + ++modCount; + } + } + + @Override + public void add(K key, int max) { + if (size < max) { + add(key); + } else if (comparator.compare(twoheap[0], key) <= 0) { + replaceTopElement(key); + } + } + + @Override + @SuppressWarnings("unchecked") + public K replaceTopElement(K reinsert) { + final Object ret = twoheap[0]; + heapifyDown( reinsert); + ++modCount; + return (K)ret; + } + + /** + * Heapify-Up method for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + */ + private void heapifyUp2(int twopos, Object cur) { + while (twopos > 0) { + final int parent = (twopos - 1) >>> 1; + Object par = twoheap[parent]; + if (comparator.compare(cur, par) >= 0) { + break; + } + twoheap[twopos] = par; + twopos = parent; + } + twoheap[twopos] = cur; + } + + /** + * Heapify-Up method for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + */ + private void heapifyUp4(int fourpos, Object cur) { + while (fourpos > 0) { + final int parent = (fourpos - 1) >> 2; + Object par = fourheap[parent]; + if (comparator.compare(cur, par) >= 0) { + break; + } + fourheap[fourpos] = par; + fourpos = parent; + } + if (fourpos == 0 && comparator.compare(twoheap[0], cur) > 0) { + fourheap[0] = twoheap[0]; + twoheap[0] = cur; + } else { + fourheap[fourpos] = cur; + } + } + + @Override + @SuppressWarnings("unchecked") + public K poll() { + final Object ret = twoheap[0]; + --size; + // Replacement object: + if (size >= TWO_HEAP_MAX_SIZE) { + final int last = size - TWO_HEAP_MAX_SIZE; + final Object reinsert = fourheap[last]; + fourheap[last] = null; + heapifyDown(reinsert); + } else if (size > 0) { + final Object reinsert = twoheap[size]; + twoheap[size] = null; + heapifyDown(reinsert); + } else { + twoheap[0] = null; + } + ++modCount; + return (K)ret; + } + + /** + * Invoke heapify-down for the root object. + * + * @param reinsert Object to insert. + */ + private void heapifyDown(Object reinsert) { + if (size > TWO_HEAP_MAX_SIZE) { + // Special case: 3-ary situation. + final int best = (comparator.compare(twoheap[1], twoheap[2]) <= 0) ? 1 : 2; + if (comparator.compare(fourheap[0], twoheap[best]) < 0) { + twoheap[0] = fourheap[0]; + heapifyDown4(0, reinsert); + } else { + twoheap[0] = twoheap[best]; + heapifyDown2(best, reinsert); + } + return; + } + heapifyDown2(0, reinsert); + } + + /** + * Heapify-Down for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + */ + private void heapifyDown2(int twopos, Object cur) { + final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + while (twopos < stop) { + int bestchild = (twopos << 1) + 1; + Object best = twoheap[bestchild]; + final int right = bestchild + 1; + if (right < size && comparator.compare(best, twoheap[right]) > 0) { + bestchild = right; + best = twoheap[right]; + } + if (comparator.compare(cur, best) <= 0) { + break; + } + twoheap[twopos] = best; + twopos = bestchild; + } + twoheap[twopos] = cur; + } + + /** + * Heapify-Down for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + */ + private void heapifyDown4(int fourpos, Object cur) { + final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; + while (fourpos < stop) { + final int child = (fourpos << 2) + 1; + Object best = fourheap[child]; + int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; + if (size > minsize) { + Object nextchild = fourheap[candidate]; + if (comparator.compare(best, nextchild) > 0) { + bestchild = candidate; + best = nextchild; + } + + minsize += 2; + if (size >= minsize) { + nextchild = fourheap[++candidate]; + if (comparator.compare(best, nextchild) > 0) { + bestchild = candidate; + best = nextchild; + } + + if (size > minsize) { + nextchild = fourheap[++candidate]; + if (comparator.compare(best, nextchild) > 0) { + bestchild = candidate; + best = nextchild; + } + } + } + } + if (comparator.compare(cur, best) <= 0) { + break; + } + fourheap[fourpos] = best; + fourpos = bestchild; + } + fourheap[fourpos] = cur; + } + + @Override + @SuppressWarnings("unchecked") + public K peek() { + return (K)twoheap[0]; + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append(ComparatorMinHeap.class.getSimpleName()).append(" ["); + for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) { + buf.append(iter.get()).append(','); + } + buf.append(']'); + return buf.toString(); + } + + @Override + public UnsortedIter unsortedIter() { + return new UnsortedIter(); + } + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * Use this class as follows: + * + * <pre> + * {@code + * for (ObjectHeap.UnsortedIter<K> iter = heap.unsortedIter(); iter.valid(); iter.next()) { + * doSomething(iter.get()); + * } + * } + * </pre> + * + * @author Erich Schubert + */ + private class UnsortedIter implements ObjectHeap.UnsortedIter<K> { + /** + * Iterator position. + */ + protected int pos = 0; + + /** + * Modification counter we were initialized at. + */ + protected final int myModCount = modCount; + + @Override + public boolean valid() { + if (modCount != myModCount) { + throw new ConcurrentModificationException(); + } + return pos < size; + } + + @Override + public void advance() { + pos++; + } + + @SuppressWarnings("unchecked") + + @Override + public K get() { + return (K)((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleHeap.java index f9f928bd..acf77d86 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleHeap.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,53 +23,22 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; along with this program. If not, see <http://www.gnu.org/licenses/>. */ -import java.util.Arrays; - -import de.lmu.ifi.dbs.elki.math.MathUtil; +import de.lmu.ifi.dbs.elki.utilities.datastructures.iterator.Iter; /** - * Basic in-memory heap structure. - * - * This heap is built lazily: if you first add many elements, then poll the - * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log - * n). This is implemented via a simple validTo counter. + * Basic in-memory heap for double values. * * @author Erich Schubert + * + * @apiviz.has UnsortedIter */ -public abstract class DoubleHeap extends AbstractHeap { - /** - * Heap storage: queue - */ - protected transient double[] queue; - - /** - * Constructor with initial capacity. - * - * @param size initial capacity - */ - public DoubleHeap(int size) { - super(); - this.size = 0; - this.queue = new double[size]; - } - +public interface DoubleHeap { /** * Add a key-value pair to the heap * * @param key Key */ - public void add(double key) { - // resize when needed - if (size + 1 > queue.length) { - resize(size + 1); - } - // final int pos = size; - this.queue[size] = key; - this.size += 1; - heapifyUp(size - 1, key); - validSize += 1; - heapModified(); - } + void add(double key); /** * Add a key-value pair to the heap, except if the new element is larger than @@ -78,13 +47,7 @@ public abstract class DoubleHeap extends AbstractHeap { * @param key Key * @param max Maximum size of heap */ - public void add(double key, int max) { - if (size < max) { - add(key); - } else if (comp(key, peek())) { - replaceTopElement(key); - } - } + void add(double key, int max); /** * Combined operation that removes the top element, and inserts a new element @@ -93,172 +56,67 @@ public abstract class DoubleHeap extends AbstractHeap { * @param e New element to insert * @return Previous top element of the heap */ - public double replaceTopElement(double e) { - ensureValid(); - double oldroot = queue[0]; - heapifyDown(0, e); - heapModified(); - return oldroot; - } + double replaceTopElement(double e); /** * Get the current top key * * @return Top key */ - public double peek() { - if (size == 0) { - throw new ArrayIndexOutOfBoundsException("Peek() on an empty heap!"); - } - ensureValid(); - return queue[0]; - } + double peek(); /** * Remove the first element * * @return Top element */ - public double poll() { - return removeAt(0); - } + double poll(); /** - * Repair the heap + * Delete all elements from the heap. */ - protected void ensureValid() { - if (validSize != size) { - if (size > 1) { - // Parent of first invalid - int nextmin = validSize > 0 ? ((validSize - 1) >>> 1) : 0; - int curmin = MathUtil.nextAllOnesInt(nextmin); // Next line - int nextmax = curmin - 1; // End of valid line - int pos = (size - 2) >>> 1; // Parent of last element - // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin+", "+nextmin); - while (pos >= nextmin) { - // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin); - while (pos >= curmin) { - if (!heapifyDown(pos, queue[pos])) { - final int parent = (pos - 1) >>> 1; - if (parent < curmin) { - nextmin = Math.min(nextmin, parent); - nextmax = Math.max(nextmax, parent); - } - } - pos--; - } - curmin = nextmin; - pos = Math.min(pos, nextmax); - nextmax = -1; - } - } - validSize = size; - } - } + void clear(); /** - * Remove the element at the given position. + * Query the size * - * @param pos Element position. - * @return Removed element + * @return Size */ - protected double removeAt(int pos) { - if (pos < 0 || pos >= size) { - return 0.0; - } - final double top = queue[0]; - // Replacement object: - final double reinkey = queue[size - 1]; - // Keep heap in sync - if (validSize == size) { - size -= 1; - validSize -= 1; - heapifyDown(pos, reinkey); - } else { - size -= 1; - validSize = Math.min(pos >>> 1, validSize); - queue[pos] = reinkey; - } - heapModified(); - return top; - } - + public int size(); + /** - * Execute a "Heapify Upwards" aka "SiftUp". Used in insertions. + * Is the heap empty? * - * @param pos insertion position - * @param curkey Current key + * @return {@code true} when the size is 0. */ - protected void heapifyUp(int pos, double curkey) { - while (pos > 0) { - final int parent = (pos - 1) >>> 1; - double parkey = queue[parent]; - - if (comp(curkey, parkey)) { // Compare - break; - } - queue[pos] = parkey; - pos = parent; - } - queue[pos] = curkey; - } + public boolean isEmpty(); /** - * Execute a "Heapify Downwards" aka "SiftDown". Used in deletions. + * Get an unsorted iterator to inspect the heap. * - * @param ipos re-insertion position - * @param curkey Current key - * @return true when the order was changed + * @return Iterator */ - protected boolean heapifyDown(final int ipos, double curkey) { - int pos = ipos; - final int half = size >>> 1; - while (pos < half) { - // Get left child (must exist!) - int cpos = (pos << 1) + 1; - double chikey = queue[cpos]; - // Test right child, if present - final int rchild = cpos + 1; - if (rchild < size) { - double right = queue[rchild]; - if (comp(chikey, right)) { // Compare - cpos = rchild; - chikey = right; - } - } - - if (comp(chikey, curkey)) { // Compare - break; - } - queue[pos] = chikey; - pos = cpos; - } - queue[pos] = curkey; - return (pos == ipos); - } + UnsortedIter unsortedIter(); /** - * Test whether we need to resize to have the requested capacity. + * Unsorted iterator - in heap order. Does not poll the heap. * - * @param requiredSize required capacity - */ - protected final void resize(int requiredSize) { - queue = Arrays.copyOf(queue, desiredSize(requiredSize, queue.length)); - } - - /** - * Delete all elements from the heap. + * <pre> + * {@code + * for (DoubleHeap.UnsortedIter iter = heap.unsortedIter(); iter.valid(); iter.next()) { + * doSomething(iter.get()); + * } + * } + * </pre> + * + * @author Erich Schubert */ - @Override - public void clear() { - super.clear(); - for (int i = 0; i < size; i++) { - queue[i] = 0.0; - } + public static interface UnsortedIter extends Iter { + /** + * Get the iterators current object. + * + * @return Current object + */ + double get(); } - - /** - * Compare two objects - */ - abstract protected boolean comp(double o1, double o2); } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerHeap.java new file mode 100644 index 00000000..c3bf85f4 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerHeap.java @@ -0,0 +1,127 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.utilities.datastructures.iterator.Iter; + +/** + * Basic in-memory heap interface, for double keys and int values. + * + * @author Erich Schubert + * + * @apiviz.has UnsortedIter + */ +public interface DoubleIntegerHeap { + /** + * Add a key-value pair to the heap + * + * @param key Key + * @param val Value + */ + void add(double key, int val); + + /** + * Add a key-value pair to the heap if it improves the top. + * + * @param key Key + * @param val Value + * @param k Desired maximum size + */ + void add(double key, int val, int k); + + /** + * Combined operation that removes the top element, and inserts a new element + * instead. + * + * @param key Key of new element + * @param val Value of new element + */ + void replaceTopElement(double key, int val); + + /** + * Get the current top key + * + * @return Top key + */ + double peekKey(); + + /** + * Get the current top value + * + * @return Value + */ + int peekValue(); + + /** + * Remove the first element + */ + void poll(); + + /** + * Clear the heap contents. + */ + void clear(); + + /** + * Query the size + * + * @return Size + */ + public int size(); + + /** + * Is the heap empty? + * + * @return {@code true} when the size is 0. + */ + public boolean isEmpty(); + + /** + * Get an unsorted iterator to inspect the heap. + * + * @return Iterator + */ + UnsortedIter unsortedIter(); + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * @author Erich Schubert + */ + public static interface UnsortedIter extends Iter { + /** + * Get the current key + * + * @return Current key + */ + double getKey(); + + /** + * Get the current value + * + * @return Current value + */ + int getValue(); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerMaxHeap.java new file mode 100644 index 00000000..34f1e889 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerMaxHeap.java @@ -0,0 +1,478 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Arrays; +import java.util.ConcurrentModificationException; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + +/** + * Advanced priority queue class, based on a binary heap (for small sizes), + * which will for larger heaps be accompanied by a 4-ary heap (attached below + * the root of the two-ary heap, making the root actually 3-ary). + * + * This code was automatically instantiated for the types: Double and Integer + * + * This combination was found to work quite well in benchmarks, but YMMV. + * + * Some other observations from benchmarking: + * <ul> + * <li>Bulk loading did not improve things</li> + * <li>Primitive heaps are substantially faster.</li> + * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and + * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> + * <li>Workload makes a huge difference. A load-once, poll-until-empty priority + * queue is something different than e.g. a top-k heap, which will see a lot of + * top element replacements.</li> + * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for + * top-k make a difference.</li> + * <li>Different day, different benchmark results ...</li> + * </ul> + * + * @author Erich Schubert + * + * @apiviz.has UnsortedIter + */ +public class DoubleIntegerMaxHeap implements DoubleIntegerHeap { + /** + * Base heap. + */ + protected double[] twoheap; + + /** + * Base heap values. + */ + protected int[] twovals; + + /** + * Extension heap. + */ + protected double[] fourheap; + + /** + * Extension heapvalues. + */ + protected int[] fourvals; + + /** + * Current size of heap. + */ + protected int size; + + /** + * (Structural) modification counter. Used to invalidate iterators. + */ + protected int modCount = 0; + + /** + * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. + */ + private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; + + /** + * Initial size of the 2-ary heap. + */ + private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; + + /** + * Initial size of 4-ary heap when initialized. + * + * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 + * + * 85 = 4-ary heap of height 3: 21 + 4*4*4 + * + * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 + * + * Since we last grew by 255 (to 511), let's use 341. + */ + private final static int FOUR_HEAP_INITIAL_SIZE = 341; + + /** + * Constructor, with default size. + */ + public DoubleIntegerMaxHeap() { + super(); + double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE]; + int[] twovals = new int[TWO_HEAP_INITIAL_SIZE]; + + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = null; + this.fourvals = null; + this.size = 0; + this.modCount = 0; + } + + /** + * Constructor, with given minimum size. + * + * @param minsize Minimum size + */ + public DoubleIntegerMaxHeap(int minsize) { + super(); + if (minsize < TWO_HEAP_MAX_SIZE) { + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + double[] twoheap = new double[size]; + int[] twovals = new int[size]; + + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = null; + this.fourvals = null; + } else { + double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE]; + int[] twovals = new int[TWO_HEAP_INITIAL_SIZE]; + double[] fourheap = new double[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + int[] fourvals = new int[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = fourheap; + this.fourvals = fourvals; + } + this.size = 0; + this.modCount = 0; + } + + @Override + public void clear() { + size = 0; + ++modCount; + fourheap = null; + fourvals = null; + Arrays.fill(twoheap, 0.0); + Arrays.fill(twovals, 0); + } + + @Override + public int size() { + return size; + } + + @Override + public boolean isEmpty() { + return (size == 0); + } + + @Override + public void add(double o, int v) { + final double co = o; + final int cv = v; + // System.err.println("Add: " + o); + if (size < TWO_HEAP_MAX_SIZE) { + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); + twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1); + } + final int twopos = size; + twoheap[twopos] = co; + twovals[twopos] = cv; + ++size; + heapifyUp2(twopos, co, cv); + ++modCount; + } else { + final int fourpos = size - TWO_HEAP_MAX_SIZE; + if (fourheap == null) { + fourheap = new double[FOUR_HEAP_INITIAL_SIZE]; + fourvals = new int[FOUR_HEAP_INITIAL_SIZE]; + } else if (fourpos >= fourheap.length) { + // Grow extension heap by half. + fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); + fourvals = Arrays.copyOf(fourvals, fourvals.length + (fourvals.length >> 1)); + } + fourheap[fourpos] = co; + fourvals[fourpos] = cv; + ++size; + heapifyUp4(fourpos, co, cv); + ++modCount; + } + } + + @Override + public void add(double key, int val, int max) { + if (size < max) { + add(key, val); + } else if (twoheap[0] >= key) { + replaceTopElement(key, val); + } + } + + @Override + public void replaceTopElement(double reinsert, int val) { + heapifyDown(reinsert, val); + ++modCount; + } + + /** + * Heapify-Up method for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + * @param val Current value + */ + private void heapifyUp2(int twopos, double cur, int val) { + while (twopos > 0) { + final int parent = (twopos - 1) >>> 1; + double par = twoheap[parent]; + if (cur <= par) { + break; + } + twoheap[twopos] = par; + twovals[twopos] = twovals[parent]; + twopos = parent; + } + twoheap[twopos] = cur; + twovals[twopos] = val; + } + + /** + * Heapify-Up method for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + * @param val Current value + */ + private void heapifyUp4(int fourpos, double cur, int val) { + while (fourpos > 0) { + final int parent = (fourpos - 1) >> 2; + double par = fourheap[parent]; + if (cur <= par) { + break; + } + fourheap[fourpos] = par; + fourvals[fourpos] = fourvals[parent]; + fourpos = parent; + } + if (fourpos == 0 && twoheap[0] < cur) { + fourheap[0] = twoheap[0]; + fourvals[0] = twovals[0]; + twoheap[0] = cur; + twovals[0] = val; + } else { + fourheap[fourpos] = cur; + fourvals[fourpos] = val; + } + } + + @Override + public void poll() { + --size; + // Replacement object: + if (size >= TWO_HEAP_MAX_SIZE) { + final int last = size - TWO_HEAP_MAX_SIZE; + final double reinsert = fourheap[last]; + final int reinsertv = fourvals[last]; + fourheap[last] = 0.0; + fourvals[last] = 0; + heapifyDown(reinsert, reinsertv); + } else if (size > 0) { + final double reinsert = twoheap[size]; + final int reinsertv = twovals[size]; + twoheap[size] = 0.0; + twovals[size] = 0; + heapifyDown(reinsert, reinsertv); + } else { + twoheap[0] = 0.0; + twovals[0] = 0; + } + ++modCount; + } + + /** + * Invoke heapify-down for the root object. + * + * @param reinsert Object to insert. + * @param val Value to reinsert. + */ + private void heapifyDown(double reinsert, int val) { + if (size > TWO_HEAP_MAX_SIZE) { + // Special case: 3-ary situation. + final int best = (twoheap[1] >= twoheap[2]) ? 1 : 2; + if (fourheap[0] > twoheap[best]) { + twoheap[0] = fourheap[0]; + twovals[0] = fourvals[0]; + heapifyDown4(0, reinsert, val); + } else { + twoheap[0] = twoheap[best]; + twovals[0] = twovals[best]; + heapifyDown2(best, reinsert, val); + } + return; + } + heapifyDown2(0, reinsert, val); + } + + /** + * Heapify-Down for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + * @param val Value to reinsert. + */ + private void heapifyDown2(int twopos, double cur, int val) { + final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + while (twopos < stop) { + int bestchild = (twopos << 1) + 1; + double best = twoheap[bestchild]; + final int right = bestchild + 1; + if (right < size && best < twoheap[right]) { + bestchild = right; + best = twoheap[right]; + } + if (cur >= best) { + break; + } + twoheap[twopos] = best; + twovals[twopos] = twovals[bestchild]; + twopos = bestchild; + } + twoheap[twopos] = cur; + twovals[twopos] = val; + } + + /** + * Heapify-Down for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + * @param val Value to reinsert. + */ + private void heapifyDown4(int fourpos, double cur, int val) { + final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; + while (fourpos < stop) { + final int child = (fourpos << 2) + 1; + double best = fourheap[child]; + int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; + if (size > minsize) { + double nextchild = fourheap[candidate]; + if (best < nextchild) { + bestchild = candidate; + best = nextchild; + } + + minsize += 2; + if (size >= minsize) { + nextchild = fourheap[++candidate]; + if (best < nextchild) { + bestchild = candidate; + best = nextchild; + } + + if (size > minsize) { + nextchild = fourheap[++candidate]; + if (best < nextchild) { + bestchild = candidate; + best = nextchild; + } + } + } + } + if (cur >= best) { + break; + } + fourheap[fourpos] = best; + fourvals[fourpos] = fourvals[bestchild]; + fourpos = bestchild; + } + fourheap[fourpos] = cur; + fourvals[fourpos] = val; + } + + @Override + public double peekKey() { + return twoheap[0]; + } + + @Override + public int peekValue() { + return twovals[0]; + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append(DoubleIntegerMaxHeap.class.getSimpleName()).append(" ["); + for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) { + buf.append(iter.getKey()).append(':').append(iter.getValue()).append(','); + } + buf.append(']'); + return buf.toString(); + } + + @Override + public UnsortedIter unsortedIter() { + return new UnsortedIter(); + } + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * Use this class as follows: + * + * <pre> + * {@code + * for (DoubleIntegerHeap.UnsortedIter iter = heap.unsortedIter(); iter.valid(); iter.next()) { + * doSomething(iter.get()); + * } + * } + * </pre> + * + * @author Erich Schubert + */ + private class UnsortedIter implements DoubleIntegerHeap.UnsortedIter { + /** + * Iterator position. + */ + protected int pos = 0; + + /** + * Modification counter we were initialized at. + */ + protected final int myModCount = modCount; + + @Override + public boolean valid() { + if (modCount != myModCount) { + throw new ConcurrentModificationException(); + } + return pos < size; + } + + @Override + public void advance() { + pos++; + } + + @Override + public double getKey() { + return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + } + + @Override + public int getValue() { + return ((pos < TWO_HEAP_MAX_SIZE) ? twovals[pos] : fourvals[pos - TWO_HEAP_MAX_SIZE]); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerMinHeap.java new file mode 100644 index 00000000..ca6192ad --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerMinHeap.java @@ -0,0 +1,478 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Arrays; +import java.util.ConcurrentModificationException; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + +/** + * Advanced priority queue class, based on a binary heap (for small sizes), + * which will for larger heaps be accompanied by a 4-ary heap (attached below + * the root of the two-ary heap, making the root actually 3-ary). + * + * This code was automatically instantiated for the types: Double and Integer + * + * This combination was found to work quite well in benchmarks, but YMMV. + * + * Some other observations from benchmarking: + * <ul> + * <li>Bulk loading did not improve things</li> + * <li>Primitive heaps are substantially faster.</li> + * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and + * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> + * <li>Workload makes a huge difference. A load-once, poll-until-empty priority + * queue is something different than e.g. a top-k heap, which will see a lot of + * top element replacements.</li> + * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for + * top-k make a difference.</li> + * <li>Different day, different benchmark results ...</li> + * </ul> + * + * @author Erich Schubert + * + * @apiviz.has UnsortedIter + */ +public class DoubleIntegerMinHeap implements DoubleIntegerHeap { + /** + * Base heap. + */ + protected double[] twoheap; + + /** + * Base heap values. + */ + protected int[] twovals; + + /** + * Extension heap. + */ + protected double[] fourheap; + + /** + * Extension heapvalues. + */ + protected int[] fourvals; + + /** + * Current size of heap. + */ + protected int size; + + /** + * (Structural) modification counter. Used to invalidate iterators. + */ + protected int modCount = 0; + + /** + * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. + */ + private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; + + /** + * Initial size of the 2-ary heap. + */ + private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; + + /** + * Initial size of 4-ary heap when initialized. + * + * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 + * + * 85 = 4-ary heap of height 3: 21 + 4*4*4 + * + * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 + * + * Since we last grew by 255 (to 511), let's use 341. + */ + private final static int FOUR_HEAP_INITIAL_SIZE = 341; + + /** + * Constructor, with default size. + */ + public DoubleIntegerMinHeap() { + super(); + double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE]; + int[] twovals = new int[TWO_HEAP_INITIAL_SIZE]; + + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = null; + this.fourvals = null; + this.size = 0; + this.modCount = 0; + } + + /** + * Constructor, with given minimum size. + * + * @param minsize Minimum size + */ + public DoubleIntegerMinHeap(int minsize) { + super(); + if (minsize < TWO_HEAP_MAX_SIZE) { + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + double[] twoheap = new double[size]; + int[] twovals = new int[size]; + + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = null; + this.fourvals = null; + } else { + double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE]; + int[] twovals = new int[TWO_HEAP_INITIAL_SIZE]; + double[] fourheap = new double[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + int[] fourvals = new int[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = fourheap; + this.fourvals = fourvals; + } + this.size = 0; + this.modCount = 0; + } + + @Override + public void clear() { + size = 0; + ++modCount; + fourheap = null; + fourvals = null; + Arrays.fill(twoheap, 0.0); + Arrays.fill(twovals, 0); + } + + @Override + public int size() { + return size; + } + + @Override + public boolean isEmpty() { + return (size == 0); + } + + @Override + public void add(double o, int v) { + final double co = o; + final int cv = v; + // System.err.println("Add: " + o); + if (size < TWO_HEAP_MAX_SIZE) { + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); + twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1); + } + final int twopos = size; + twoheap[twopos] = co; + twovals[twopos] = cv; + ++size; + heapifyUp2(twopos, co, cv); + ++modCount; + } else { + final int fourpos = size - TWO_HEAP_MAX_SIZE; + if (fourheap == null) { + fourheap = new double[FOUR_HEAP_INITIAL_SIZE]; + fourvals = new int[FOUR_HEAP_INITIAL_SIZE]; + } else if (fourpos >= fourheap.length) { + // Grow extension heap by half. + fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); + fourvals = Arrays.copyOf(fourvals, fourvals.length + (fourvals.length >> 1)); + } + fourheap[fourpos] = co; + fourvals[fourpos] = cv; + ++size; + heapifyUp4(fourpos, co, cv); + ++modCount; + } + } + + @Override + public void add(double key, int val, int max) { + if (size < max) { + add(key, val); + } else if (twoheap[0] <= key) { + replaceTopElement(key, val); + } + } + + @Override + public void replaceTopElement(double reinsert, int val) { + heapifyDown(reinsert, val); + ++modCount; + } + + /** + * Heapify-Up method for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + * @param val Current value + */ + private void heapifyUp2(int twopos, double cur, int val) { + while (twopos > 0) { + final int parent = (twopos - 1) >>> 1; + double par = twoheap[parent]; + if (cur >= par) { + break; + } + twoheap[twopos] = par; + twovals[twopos] = twovals[parent]; + twopos = parent; + } + twoheap[twopos] = cur; + twovals[twopos] = val; + } + + /** + * Heapify-Up method for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + * @param val Current value + */ + private void heapifyUp4(int fourpos, double cur, int val) { + while (fourpos > 0) { + final int parent = (fourpos - 1) >> 2; + double par = fourheap[parent]; + if (cur >= par) { + break; + } + fourheap[fourpos] = par; + fourvals[fourpos] = fourvals[parent]; + fourpos = parent; + } + if (fourpos == 0 && twoheap[0] > cur) { + fourheap[0] = twoheap[0]; + fourvals[0] = twovals[0]; + twoheap[0] = cur; + twovals[0] = val; + } else { + fourheap[fourpos] = cur; + fourvals[fourpos] = val; + } + } + + @Override + public void poll() { + --size; + // Replacement object: + if (size >= TWO_HEAP_MAX_SIZE) { + final int last = size - TWO_HEAP_MAX_SIZE; + final double reinsert = fourheap[last]; + final int reinsertv = fourvals[last]; + fourheap[last] = 0.0; + fourvals[last] = 0; + heapifyDown(reinsert, reinsertv); + } else if (size > 0) { + final double reinsert = twoheap[size]; + final int reinsertv = twovals[size]; + twoheap[size] = 0.0; + twovals[size] = 0; + heapifyDown(reinsert, reinsertv); + } else { + twoheap[0] = 0.0; + twovals[0] = 0; + } + ++modCount; + } + + /** + * Invoke heapify-down for the root object. + * + * @param reinsert Object to insert. + * @param val Value to reinsert. + */ + private void heapifyDown(double reinsert, int val) { + if (size > TWO_HEAP_MAX_SIZE) { + // Special case: 3-ary situation. + final int best = (twoheap[1] <= twoheap[2]) ? 1 : 2; + if (fourheap[0] < twoheap[best]) { + twoheap[0] = fourheap[0]; + twovals[0] = fourvals[0]; + heapifyDown4(0, reinsert, val); + } else { + twoheap[0] = twoheap[best]; + twovals[0] = twovals[best]; + heapifyDown2(best, reinsert, val); + } + return; + } + heapifyDown2(0, reinsert, val); + } + + /** + * Heapify-Down for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + * @param val Value to reinsert. + */ + private void heapifyDown2(int twopos, double cur, int val) { + final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + while (twopos < stop) { + int bestchild = (twopos << 1) + 1; + double best = twoheap[bestchild]; + final int right = bestchild + 1; + if (right < size && best > twoheap[right]) { + bestchild = right; + best = twoheap[right]; + } + if (cur <= best) { + break; + } + twoheap[twopos] = best; + twovals[twopos] = twovals[bestchild]; + twopos = bestchild; + } + twoheap[twopos] = cur; + twovals[twopos] = val; + } + + /** + * Heapify-Down for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + * @param val Value to reinsert. + */ + private void heapifyDown4(int fourpos, double cur, int val) { + final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; + while (fourpos < stop) { + final int child = (fourpos << 2) + 1; + double best = fourheap[child]; + int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; + if (size > minsize) { + double nextchild = fourheap[candidate]; + if (best > nextchild) { + bestchild = candidate; + best = nextchild; + } + + minsize += 2; + if (size >= minsize) { + nextchild = fourheap[++candidate]; + if (best > nextchild) { + bestchild = candidate; + best = nextchild; + } + + if (size > minsize) { + nextchild = fourheap[++candidate]; + if (best > nextchild) { + bestchild = candidate; + best = nextchild; + } + } + } + } + if (cur <= best) { + break; + } + fourheap[fourpos] = best; + fourvals[fourpos] = fourvals[bestchild]; + fourpos = bestchild; + } + fourheap[fourpos] = cur; + fourvals[fourpos] = val; + } + + @Override + public double peekKey() { + return twoheap[0]; + } + + @Override + public int peekValue() { + return twovals[0]; + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append(DoubleIntegerMinHeap.class.getSimpleName()).append(" ["); + for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) { + buf.append(iter.getKey()).append(':').append(iter.getValue()).append(','); + } + buf.append(']'); + return buf.toString(); + } + + @Override + public UnsortedIter unsortedIter() { + return new UnsortedIter(); + } + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * Use this class as follows: + * + * <pre> + * {@code + * for (DoubleIntegerHeap.UnsortedIter iter = heap.unsortedIter(); iter.valid(); iter.next()) { + * doSomething(iter.get()); + * } + * } + * </pre> + * + * @author Erich Schubert + */ + private class UnsortedIter implements DoubleIntegerHeap.UnsortedIter { + /** + * Iterator position. + */ + protected int pos = 0; + + /** + * Modification counter we were initialized at. + */ + protected final int myModCount = modCount; + + @Override + public boolean valid() { + if (modCount != myModCount) { + throw new ConcurrentModificationException(); + } + return pos < size; + } + + @Override + public void advance() { + pos++; + } + + @Override + public double getKey() { + return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + } + + @Override + public int getValue() { + return ((pos < TWO_HEAP_MAX_SIZE) ? twovals[pos] : fourvals[pos - TWO_HEAP_MAX_SIZE]); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleLongHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleLongHeap.java new file mode 100644 index 00000000..b93adafa --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleLongHeap.java @@ -0,0 +1,127 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.utilities.datastructures.iterator.Iter; + +/** + * Basic in-memory heap interface, for double keys and long values. + * + * @author Erich Schubert + * + * @apiviz.has UnsortedIter + */ +public interface DoubleLongHeap { + /** + * Add a key-value pair to the heap + * + * @param key Key + * @param val Value + */ + void add(double key, long val); + + /** + * Add a key-value pair to the heap if it improves the top. + * + * @param key Key + * @param val Value + * @param k Desired maximum size + */ + void add(double key, long val, int k); + + /** + * Combined operation that removes the top element, and inserts a new element + * instead. + * + * @param key Key of new element + * @param val Value of new element + */ + void replaceTopElement(double key, long val); + + /** + * Get the current top key + * + * @return Top key + */ + double peekKey(); + + /** + * Get the current top value + * + * @return Value + */ + long peekValue(); + + /** + * Remove the first element + */ + void poll(); + + /** + * Clear the heap contents. + */ + void clear(); + + /** + * Query the size + * + * @return Size + */ + public int size(); + + /** + * Is the heap empty? + * + * @return {@code true} when the size is 0. + */ + public boolean isEmpty(); + + /** + * Get an unsorted iterator to inspect the heap. + * + * @return Iterator + */ + UnsortedIter unsortedIter(); + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * @author Erich Schubert + */ + public static interface UnsortedIter extends Iter { + /** + * Get the current key + * + * @return Current key + */ + double getKey(); + + /** + * Get the current value + * + * @return Current value + */ + long getValue(); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleLongMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleLongMaxHeap.java new file mode 100644 index 00000000..6d15656c --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleLongMaxHeap.java @@ -0,0 +1,478 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Arrays; +import java.util.ConcurrentModificationException; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + +/** + * Advanced priority queue class, based on a binary heap (for small sizes), + * which will for larger heaps be accompanied by a 4-ary heap (attached below + * the root of the two-ary heap, making the root actually 3-ary). + * + * This code was automatically instantiated for the types: Double and Long + * + * This combination was found to work quite well in benchmarks, but YMMV. + * + * Some other observations from benchmarking: + * <ul> + * <li>Bulk loading did not improve things</li> + * <li>Primitive heaps are substantially faster.</li> + * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and + * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> + * <li>Workload makes a huge difference. A load-once, poll-until-empty priority + * queue is something different than e.g. a top-k heap, which will see a lot of + * top element replacements.</li> + * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for + * top-k make a difference.</li> + * <li>Different day, different benchmark results ...</li> + * </ul> + * + * @author Erich Schubert + * + * @apiviz.has UnsortedIter + */ +public class DoubleLongMaxHeap implements DoubleLongHeap { + /** + * Base heap. + */ + protected double[] twoheap; + + /** + * Base heap values. + */ + protected long[] twovals; + + /** + * Extension heap. + */ + protected double[] fourheap; + + /** + * Extension heapvalues. + */ + protected long[] fourvals; + + /** + * Current size of heap. + */ + protected int size; + + /** + * (Structural) modification counter. Used to invalidate iterators. + */ + protected int modCount = 0; + + /** + * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. + */ + private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; + + /** + * Initial size of the 2-ary heap. + */ + private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; + + /** + * Initial size of 4-ary heap when initialized. + * + * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 + * + * 85 = 4-ary heap of height 3: 21 + 4*4*4 + * + * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 + * + * Since we last grew by 255 (to 511), let's use 341. + */ + private final static int FOUR_HEAP_INITIAL_SIZE = 341; + + /** + * Constructor, with default size. + */ + public DoubleLongMaxHeap() { + super(); + double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE]; + long[] twovals = new long[TWO_HEAP_INITIAL_SIZE]; + + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = null; + this.fourvals = null; + this.size = 0; + this.modCount = 0; + } + + /** + * Constructor, with given minimum size. + * + * @param minsize Minimum size + */ + public DoubleLongMaxHeap(int minsize) { + super(); + if (minsize < TWO_HEAP_MAX_SIZE) { + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + double[] twoheap = new double[size]; + long[] twovals = new long[size]; + + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = null; + this.fourvals = null; + } else { + double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE]; + long[] twovals = new long[TWO_HEAP_INITIAL_SIZE]; + double[] fourheap = new double[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + long[] fourvals = new long[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = fourheap; + this.fourvals = fourvals; + } + this.size = 0; + this.modCount = 0; + } + + @Override + public void clear() { + size = 0; + ++modCount; + fourheap = null; + fourvals = null; + Arrays.fill(twoheap, 0.0); + Arrays.fill(twovals, 0); + } + + @Override + public int size() { + return size; + } + + @Override + public boolean isEmpty() { + return (size == 0); + } + + @Override + public void add(double o, long v) { + final double co = o; + final long cv = v; + // System.err.println("Add: " + o); + if (size < TWO_HEAP_MAX_SIZE) { + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); + twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1); + } + final int twopos = size; + twoheap[twopos] = co; + twovals[twopos] = cv; + ++size; + heapifyUp2(twopos, co, cv); + ++modCount; + } else { + final int fourpos = size - TWO_HEAP_MAX_SIZE; + if (fourheap == null) { + fourheap = new double[FOUR_HEAP_INITIAL_SIZE]; + fourvals = new long[FOUR_HEAP_INITIAL_SIZE]; + } else if (fourpos >= fourheap.length) { + // Grow extension heap by half. + fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); + fourvals = Arrays.copyOf(fourvals, fourvals.length + (fourvals.length >> 1)); + } + fourheap[fourpos] = co; + fourvals[fourpos] = cv; + ++size; + heapifyUp4(fourpos, co, cv); + ++modCount; + } + } + + @Override + public void add(double key, long val, int max) { + if (size < max) { + add(key, val); + } else if (twoheap[0] >= key) { + replaceTopElement(key, val); + } + } + + @Override + public void replaceTopElement(double reinsert, long val) { + heapifyDown(reinsert, val); + ++modCount; + } + + /** + * Heapify-Up method for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + * @param val Current value + */ + private void heapifyUp2(int twopos, double cur, long val) { + while (twopos > 0) { + final int parent = (twopos - 1) >>> 1; + double par = twoheap[parent]; + if (cur <= par) { + break; + } + twoheap[twopos] = par; + twovals[twopos] = twovals[parent]; + twopos = parent; + } + twoheap[twopos] = cur; + twovals[twopos] = val; + } + + /** + * Heapify-Up method for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + * @param val Current value + */ + private void heapifyUp4(int fourpos, double cur, long val) { + while (fourpos > 0) { + final int parent = (fourpos - 1) >> 2; + double par = fourheap[parent]; + if (cur <= par) { + break; + } + fourheap[fourpos] = par; + fourvals[fourpos] = fourvals[parent]; + fourpos = parent; + } + if (fourpos == 0 && twoheap[0] < cur) { + fourheap[0] = twoheap[0]; + fourvals[0] = twovals[0]; + twoheap[0] = cur; + twovals[0] = val; + } else { + fourheap[fourpos] = cur; + fourvals[fourpos] = val; + } + } + + @Override + public void poll() { + --size; + // Replacement object: + if (size >= TWO_HEAP_MAX_SIZE) { + final int last = size - TWO_HEAP_MAX_SIZE; + final double reinsert = fourheap[last]; + final long reinsertv = fourvals[last]; + fourheap[last] = 0.0; + fourvals[last] = 0; + heapifyDown(reinsert, reinsertv); + } else if (size > 0) { + final double reinsert = twoheap[size]; + final long reinsertv = twovals[size]; + twoheap[size] = 0.0; + twovals[size] = 0; + heapifyDown(reinsert, reinsertv); + } else { + twoheap[0] = 0.0; + twovals[0] = 0; + } + ++modCount; + } + + /** + * Invoke heapify-down for the root object. + * + * @param reinsert Object to insert. + * @param val Value to reinsert. + */ + private void heapifyDown(double reinsert, long val) { + if (size > TWO_HEAP_MAX_SIZE) { + // Special case: 3-ary situation. + final int best = (twoheap[1] >= twoheap[2]) ? 1 : 2; + if (fourheap[0] > twoheap[best]) { + twoheap[0] = fourheap[0]; + twovals[0] = fourvals[0]; + heapifyDown4(0, reinsert, val); + } else { + twoheap[0] = twoheap[best]; + twovals[0] = twovals[best]; + heapifyDown2(best, reinsert, val); + } + return; + } + heapifyDown2(0, reinsert, val); + } + + /** + * Heapify-Down for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + * @param val Value to reinsert. + */ + private void heapifyDown2(int twopos, double cur, long val) { + final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + while (twopos < stop) { + int bestchild = (twopos << 1) + 1; + double best = twoheap[bestchild]; + final int right = bestchild + 1; + if (right < size && best < twoheap[right]) { + bestchild = right; + best = twoheap[right]; + } + if (cur >= best) { + break; + } + twoheap[twopos] = best; + twovals[twopos] = twovals[bestchild]; + twopos = bestchild; + } + twoheap[twopos] = cur; + twovals[twopos] = val; + } + + /** + * Heapify-Down for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + * @param val Value to reinsert. + */ + private void heapifyDown4(int fourpos, double cur, long val) { + final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; + while (fourpos < stop) { + final int child = (fourpos << 2) + 1; + double best = fourheap[child]; + int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; + if (size > minsize) { + double nextchild = fourheap[candidate]; + if (best < nextchild) { + bestchild = candidate; + best = nextchild; + } + + minsize += 2; + if (size >= minsize) { + nextchild = fourheap[++candidate]; + if (best < nextchild) { + bestchild = candidate; + best = nextchild; + } + + if (size > minsize) { + nextchild = fourheap[++candidate]; + if (best < nextchild) { + bestchild = candidate; + best = nextchild; + } + } + } + } + if (cur >= best) { + break; + } + fourheap[fourpos] = best; + fourvals[fourpos] = fourvals[bestchild]; + fourpos = bestchild; + } + fourheap[fourpos] = cur; + fourvals[fourpos] = val; + } + + @Override + public double peekKey() { + return twoheap[0]; + } + + @Override + public long peekValue() { + return twovals[0]; + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append(DoubleLongMaxHeap.class.getSimpleName()).append(" ["); + for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) { + buf.append(iter.getKey()).append(':').append(iter.getValue()).append(','); + } + buf.append(']'); + return buf.toString(); + } + + @Override + public UnsortedIter unsortedIter() { + return new UnsortedIter(); + } + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * Use this class as follows: + * + * <pre> + * {@code + * for (DoubleLongHeap.UnsortedIter iter = heap.unsortedIter(); iter.valid(); iter.next()) { + * doSomething(iter.get()); + * } + * } + * </pre> + * + * @author Erich Schubert + */ + private class UnsortedIter implements DoubleLongHeap.UnsortedIter { + /** + * Iterator position. + */ + protected int pos = 0; + + /** + * Modification counter we were initialized at. + */ + protected final int myModCount = modCount; + + @Override + public boolean valid() { + if (modCount != myModCount) { + throw new ConcurrentModificationException(); + } + return pos < size; + } + + @Override + public void advance() { + pos++; + } + + @Override + public double getKey() { + return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + } + + @Override + public long getValue() { + return ((pos < TWO_HEAP_MAX_SIZE) ? twovals[pos] : fourvals[pos - TWO_HEAP_MAX_SIZE]); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleLongMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleLongMinHeap.java new file mode 100644 index 00000000..d38eb6e3 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleLongMinHeap.java @@ -0,0 +1,478 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Arrays; +import java.util.ConcurrentModificationException; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + +/** + * Advanced priority queue class, based on a binary heap (for small sizes), + * which will for larger heaps be accompanied by a 4-ary heap (attached below + * the root of the two-ary heap, making the root actually 3-ary). + * + * This code was automatically instantiated for the types: Double and Long + * + * This combination was found to work quite well in benchmarks, but YMMV. + * + * Some other observations from benchmarking: + * <ul> + * <li>Bulk loading did not improve things</li> + * <li>Primitive heaps are substantially faster.</li> + * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and + * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> + * <li>Workload makes a huge difference. A load-once, poll-until-empty priority + * queue is something different than e.g. a top-k heap, which will see a lot of + * top element replacements.</li> + * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for + * top-k make a difference.</li> + * <li>Different day, different benchmark results ...</li> + * </ul> + * + * @author Erich Schubert + * + * @apiviz.has UnsortedIter + */ +public class DoubleLongMinHeap implements DoubleLongHeap { + /** + * Base heap. + */ + protected double[] twoheap; + + /** + * Base heap values. + */ + protected long[] twovals; + + /** + * Extension heap. + */ + protected double[] fourheap; + + /** + * Extension heapvalues. + */ + protected long[] fourvals; + + /** + * Current size of heap. + */ + protected int size; + + /** + * (Structural) modification counter. Used to invalidate iterators. + */ + protected int modCount = 0; + + /** + * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. + */ + private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; + + /** + * Initial size of the 2-ary heap. + */ + private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; + + /** + * Initial size of 4-ary heap when initialized. + * + * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 + * + * 85 = 4-ary heap of height 3: 21 + 4*4*4 + * + * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 + * + * Since we last grew by 255 (to 511), let's use 341. + */ + private final static int FOUR_HEAP_INITIAL_SIZE = 341; + + /** + * Constructor, with default size. + */ + public DoubleLongMinHeap() { + super(); + double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE]; + long[] twovals = new long[TWO_HEAP_INITIAL_SIZE]; + + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = null; + this.fourvals = null; + this.size = 0; + this.modCount = 0; + } + + /** + * Constructor, with given minimum size. + * + * @param minsize Minimum size + */ + public DoubleLongMinHeap(int minsize) { + super(); + if (minsize < TWO_HEAP_MAX_SIZE) { + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + double[] twoheap = new double[size]; + long[] twovals = new long[size]; + + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = null; + this.fourvals = null; + } else { + double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE]; + long[] twovals = new long[TWO_HEAP_INITIAL_SIZE]; + double[] fourheap = new double[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + long[] fourvals = new long[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = fourheap; + this.fourvals = fourvals; + } + this.size = 0; + this.modCount = 0; + } + + @Override + public void clear() { + size = 0; + ++modCount; + fourheap = null; + fourvals = null; + Arrays.fill(twoheap, 0.0); + Arrays.fill(twovals, 0); + } + + @Override + public int size() { + return size; + } + + @Override + public boolean isEmpty() { + return (size == 0); + } + + @Override + public void add(double o, long v) { + final double co = o; + final long cv = v; + // System.err.println("Add: " + o); + if (size < TWO_HEAP_MAX_SIZE) { + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); + twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1); + } + final int twopos = size; + twoheap[twopos] = co; + twovals[twopos] = cv; + ++size; + heapifyUp2(twopos, co, cv); + ++modCount; + } else { + final int fourpos = size - TWO_HEAP_MAX_SIZE; + if (fourheap == null) { + fourheap = new double[FOUR_HEAP_INITIAL_SIZE]; + fourvals = new long[FOUR_HEAP_INITIAL_SIZE]; + } else if (fourpos >= fourheap.length) { + // Grow extension heap by half. + fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); + fourvals = Arrays.copyOf(fourvals, fourvals.length + (fourvals.length >> 1)); + } + fourheap[fourpos] = co; + fourvals[fourpos] = cv; + ++size; + heapifyUp4(fourpos, co, cv); + ++modCount; + } + } + + @Override + public void add(double key, long val, int max) { + if (size < max) { + add(key, val); + } else if (twoheap[0] <= key) { + replaceTopElement(key, val); + } + } + + @Override + public void replaceTopElement(double reinsert, long val) { + heapifyDown(reinsert, val); + ++modCount; + } + + /** + * Heapify-Up method for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + * @param val Current value + */ + private void heapifyUp2(int twopos, double cur, long val) { + while (twopos > 0) { + final int parent = (twopos - 1) >>> 1; + double par = twoheap[parent]; + if (cur >= par) { + break; + } + twoheap[twopos] = par; + twovals[twopos] = twovals[parent]; + twopos = parent; + } + twoheap[twopos] = cur; + twovals[twopos] = val; + } + + /** + * Heapify-Up method for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + * @param val Current value + */ + private void heapifyUp4(int fourpos, double cur, long val) { + while (fourpos > 0) { + final int parent = (fourpos - 1) >> 2; + double par = fourheap[parent]; + if (cur >= par) { + break; + } + fourheap[fourpos] = par; + fourvals[fourpos] = fourvals[parent]; + fourpos = parent; + } + if (fourpos == 0 && twoheap[0] > cur) { + fourheap[0] = twoheap[0]; + fourvals[0] = twovals[0]; + twoheap[0] = cur; + twovals[0] = val; + } else { + fourheap[fourpos] = cur; + fourvals[fourpos] = val; + } + } + + @Override + public void poll() { + --size; + // Replacement object: + if (size >= TWO_HEAP_MAX_SIZE) { + final int last = size - TWO_HEAP_MAX_SIZE; + final double reinsert = fourheap[last]; + final long reinsertv = fourvals[last]; + fourheap[last] = 0.0; + fourvals[last] = 0; + heapifyDown(reinsert, reinsertv); + } else if (size > 0) { + final double reinsert = twoheap[size]; + final long reinsertv = twovals[size]; + twoheap[size] = 0.0; + twovals[size] = 0; + heapifyDown(reinsert, reinsertv); + } else { + twoheap[0] = 0.0; + twovals[0] = 0; + } + ++modCount; + } + + /** + * Invoke heapify-down for the root object. + * + * @param reinsert Object to insert. + * @param val Value to reinsert. + */ + private void heapifyDown(double reinsert, long val) { + if (size > TWO_HEAP_MAX_SIZE) { + // Special case: 3-ary situation. + final int best = (twoheap[1] <= twoheap[2]) ? 1 : 2; + if (fourheap[0] < twoheap[best]) { + twoheap[0] = fourheap[0]; + twovals[0] = fourvals[0]; + heapifyDown4(0, reinsert, val); + } else { + twoheap[0] = twoheap[best]; + twovals[0] = twovals[best]; + heapifyDown2(best, reinsert, val); + } + return; + } + heapifyDown2(0, reinsert, val); + } + + /** + * Heapify-Down for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + * @param val Value to reinsert. + */ + private void heapifyDown2(int twopos, double cur, long val) { + final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + while (twopos < stop) { + int bestchild = (twopos << 1) + 1; + double best = twoheap[bestchild]; + final int right = bestchild + 1; + if (right < size && best > twoheap[right]) { + bestchild = right; + best = twoheap[right]; + } + if (cur <= best) { + break; + } + twoheap[twopos] = best; + twovals[twopos] = twovals[bestchild]; + twopos = bestchild; + } + twoheap[twopos] = cur; + twovals[twopos] = val; + } + + /** + * Heapify-Down for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + * @param val Value to reinsert. + */ + private void heapifyDown4(int fourpos, double cur, long val) { + final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; + while (fourpos < stop) { + final int child = (fourpos << 2) + 1; + double best = fourheap[child]; + int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; + if (size > minsize) { + double nextchild = fourheap[candidate]; + if (best > nextchild) { + bestchild = candidate; + best = nextchild; + } + + minsize += 2; + if (size >= minsize) { + nextchild = fourheap[++candidate]; + if (best > nextchild) { + bestchild = candidate; + best = nextchild; + } + + if (size > minsize) { + nextchild = fourheap[++candidate]; + if (best > nextchild) { + bestchild = candidate; + best = nextchild; + } + } + } + } + if (cur <= best) { + break; + } + fourheap[fourpos] = best; + fourvals[fourpos] = fourvals[bestchild]; + fourpos = bestchild; + } + fourheap[fourpos] = cur; + fourvals[fourpos] = val; + } + + @Override + public double peekKey() { + return twoheap[0]; + } + + @Override + public long peekValue() { + return twovals[0]; + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append(DoubleLongMinHeap.class.getSimpleName()).append(" ["); + for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) { + buf.append(iter.getKey()).append(':').append(iter.getValue()).append(','); + } + buf.append(']'); + return buf.toString(); + } + + @Override + public UnsortedIter unsortedIter() { + return new UnsortedIter(); + } + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * Use this class as follows: + * + * <pre> + * {@code + * for (DoubleLongHeap.UnsortedIter iter = heap.unsortedIter(); iter.valid(); iter.next()) { + * doSomething(iter.get()); + * } + * } + * </pre> + * + * @author Erich Schubert + */ + private class UnsortedIter implements DoubleLongHeap.UnsortedIter { + /** + * Iterator position. + */ + protected int pos = 0; + + /** + * Modification counter we were initialized at. + */ + protected final int myModCount = modCount; + + @Override + public boolean valid() { + if (modCount != myModCount) { + throw new ConcurrentModificationException(); + } + return pos < size; + } + + @Override + public void advance() { + pos++; + } + + @Override + public double getKey() { + return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + } + + @Override + public long getValue() { + return ((pos < TWO_HEAP_MAX_SIZE) ? twovals[pos] : fourvals[pos - TWO_HEAP_MAX_SIZE]); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMaxHeap.java index 1b7d6037..7ea28f14 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMaxHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMaxHeap.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,40 +23,400 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; along with this program. If not, see <http://www.gnu.org/licenses/>. */ +import java.util.Arrays; +import java.util.ConcurrentModificationException; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + /** - * Basic in-memory heap structure. + * Advanced priority queue class, based on a binary heap (for small sizes), + * which will for larger heaps be accompanied by a 4-ary heap (attached below + * the root of the two-ary heap, making the root actually 3-ary). + * + * This code was automatically instantiated for the type: Double * - * This heap is built lazily: if you first add many elements, then poll the - * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log - * n). This is implemented via a simple validTo counter. + * This combination was found to work quite well in benchmarks, but YMMV. + * + * Some other observations from benchmarking: + * <ul> + * <li>Bulk loading did not improve things</li> + * <li>Primitive heaps are substantially faster.</li> + * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and + * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> + * <li>Workload makes a huge difference. A load-once, poll-until-empty priority + * queue is something different than e.g. a top-k heap, which will see a lot of + * top element replacements.</li> + * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for + * top-k make a difference.</li> + * <li>Different day, different benchmark results ...</li> + * </ul> * * @author Erich Schubert + * + * @apiviz.has UnsortedIter */ -public class DoubleMaxHeap extends DoubleHeap { +public class DoubleMaxHeap implements DoubleHeap { + /** + * Base heap. + */ + protected double[] twoheap; + + /** + * Extension heap. + */ + protected double[] fourheap; + + /** + * Current size of heap. + */ + protected int size; + + /** + * (Structural) modification counter. Used to invalidate iterators. + */ + protected int modCount = 0; + + /** + * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. + */ + private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; + /** - * Constructor with default capacity. + * Initial size of the 2-ary heap. + */ + private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; + + /** + * Initial size of 4-ary heap when initialized. + * + * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 + * + * 85 = 4-ary heap of height 3: 21 + 4*4*4 + * + * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 + * + * Since we last grew by 255 (to 511), let's use 341. + */ + private final static int FOUR_HEAP_INITIAL_SIZE = 341; + + /** + * Constructor, with default size. */ public DoubleMaxHeap() { - super(DEFAULT_INITIAL_CAPACITY); + super(); + double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE]; + + this.twoheap = twoheap; + this.fourheap = null; + this.size = 0; + this.modCount = 0; } /** - * Constructor with initial capacity. + * Constructor, with given minimum size. * - * @param size initial capacity + * @param minsize Minimum size */ - public DoubleMaxHeap(int size) { - super(size); + public DoubleMaxHeap(int minsize) { + super(); + if (minsize < TWO_HEAP_MAX_SIZE) { + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + double[] twoheap = new double[size]; + + this.twoheap = twoheap; + this.fourheap = null; + } else { + double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE]; + double[] fourheap = new double[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + this.twoheap = twoheap; + this.fourheap = fourheap; + } + this.size = 0; + this.modCount = 0; + } + + @Override + public void clear() { + size = 0; + ++modCount; + fourheap = null; + Arrays.fill(twoheap, 0.0); + } + + @Override + public int size() { + return size; + } + + @Override + public boolean isEmpty() { + return (size == 0); + } + + @Override + public void add(double o) { + final double co = o; + // System.err.println("Add: " + o); + if (size < TWO_HEAP_MAX_SIZE) { + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); + } + final int twopos = size; + twoheap[twopos] = co; + ++size; + heapifyUp2(twopos, co); + ++modCount; + } else { + final int fourpos = size - TWO_HEAP_MAX_SIZE; + if (fourheap == null) { + fourheap = new double[FOUR_HEAP_INITIAL_SIZE]; + } else if (fourpos >= fourheap.length) { + // Grow extension heap by half. + fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); + } + fourheap[fourpos] = co; + ++size; + heapifyUp4(fourpos, co); + ++modCount; + } + } + + @Override + public void add(double key, int max) { + if (size < max) { + add(key); + } else if (twoheap[0] >= key) { + replaceTopElement(key); + } + } + + @Override + public double replaceTopElement(double reinsert) { + final double ret = twoheap[0]; + heapifyDown( reinsert); + ++modCount; + return ret; + } + + /** + * Heapify-Up method for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + */ + private void heapifyUp2(int twopos, double cur) { + while (twopos > 0) { + final int parent = (twopos - 1) >>> 1; + double par = twoheap[parent]; + if (cur <= par) { + break; + } + twoheap[twopos] = par; + twopos = parent; + } + twoheap[twopos] = cur; + } + + /** + * Heapify-Up method for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + */ + private void heapifyUp4(int fourpos, double cur) { + while (fourpos > 0) { + final int parent = (fourpos - 1) >> 2; + double par = fourheap[parent]; + if (cur <= par) { + break; + } + fourheap[fourpos] = par; + fourpos = parent; + } + if (fourpos == 0 && twoheap[0] < cur) { + fourheap[0] = twoheap[0]; + twoheap[0] = cur; + } else { + fourheap[fourpos] = cur; + } + } + + @Override + public double poll() { + final double ret = twoheap[0]; + --size; + // Replacement object: + if (size >= TWO_HEAP_MAX_SIZE) { + final int last = size - TWO_HEAP_MAX_SIZE; + final double reinsert = fourheap[last]; + fourheap[last] = 0.0; + heapifyDown(reinsert); + } else if (size > 0) { + final double reinsert = twoheap[size]; + twoheap[size] = 0.0; + heapifyDown(reinsert); + } else { + twoheap[0] = 0.0; + } + ++modCount; + return ret; + } + + /** + * Invoke heapify-down for the root object. + * + * @param reinsert Object to insert. + */ + private void heapifyDown(double reinsert) { + if (size > TWO_HEAP_MAX_SIZE) { + // Special case: 3-ary situation. + final int best = (twoheap[1] >= twoheap[2]) ? 1 : 2; + if (fourheap[0] > twoheap[best]) { + twoheap[0] = fourheap[0]; + heapifyDown4(0, reinsert); + } else { + twoheap[0] = twoheap[best]; + heapifyDown2(best, reinsert); + } + return; + } + heapifyDown2(0, reinsert); + } + + /** + * Heapify-Down for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + */ + private void heapifyDown2(int twopos, double cur) { + final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + while (twopos < stop) { + int bestchild = (twopos << 1) + 1; + double best = twoheap[bestchild]; + final int right = bestchild + 1; + if (right < size && best < twoheap[right]) { + bestchild = right; + best = twoheap[right]; + } + if (cur >= best) { + break; + } + twoheap[twopos] = best; + twopos = bestchild; + } + twoheap[twopos] = cur; } /** - * Compare two objects + * Heapify-Down for 4-ary heap. * - * @param o1 First object - * @param o2 Second object + * @param fourpos Position in 4-ary heap. + * @param cur Current object */ + private void heapifyDown4(int fourpos, double cur) { + final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; + while (fourpos < stop) { + final int child = (fourpos << 2) + 1; + double best = fourheap[child]; + int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; + if (size > minsize) { + double nextchild = fourheap[candidate]; + if (best < nextchild) { + bestchild = candidate; + best = nextchild; + } + + minsize += 2; + if (size >= minsize) { + nextchild = fourheap[++candidate]; + if (best < nextchild) { + bestchild = candidate; + best = nextchild; + } + + if (size > minsize) { + nextchild = fourheap[++candidate]; + if (best < nextchild) { + bestchild = candidate; + best = nextchild; + } + } + } + } + if (cur >= best) { + break; + } + fourheap[fourpos] = best; + fourpos = bestchild; + } + fourheap[fourpos] = cur; + } + + @Override + public double peek() { + return twoheap[0]; + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append(DoubleMaxHeap.class.getSimpleName()).append(" ["); + for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) { + buf.append(iter.get()).append(','); + } + buf.append(']'); + return buf.toString(); + } + @Override - protected boolean comp(double o1, double o2) { - return o1 < o2; + public UnsortedIter unsortedIter() { + return new UnsortedIter(); + } + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * Use this class as follows: + * + * <pre> + * {@code + * for (DoubleHeap.UnsortedIter iter = heap.unsortedIter(); iter.valid(); iter.next()) { + * doSomething(iter.get()); + * } + * } + * </pre> + * + * @author Erich Schubert + */ + private class UnsortedIter implements DoubleHeap.UnsortedIter { + /** + * Iterator position. + */ + protected int pos = 0; + + /** + * Modification counter we were initialized at. + */ + protected final int myModCount = modCount; + + @Override + public boolean valid() { + if (modCount != myModCount) { + throw new ConcurrentModificationException(); + } + return pos < size; + } + + @Override + public void advance() { + pos++; + } + + @Override + public double get() { + return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + } } } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMinHeap.java index 2ce05ff9..e9334153 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMinHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMinHeap.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,40 +23,400 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; along with this program. If not, see <http://www.gnu.org/licenses/>. */ +import java.util.Arrays; +import java.util.ConcurrentModificationException; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + /** - * Basic in-memory heap structure. + * Advanced priority queue class, based on a binary heap (for small sizes), + * which will for larger heaps be accompanied by a 4-ary heap (attached below + * the root of the two-ary heap, making the root actually 3-ary). + * + * This code was automatically instantiated for the type: Double * - * This heap is built lazily: if you first add many elements, then poll the - * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log - * n). This is implemented via a simple validTo counter. + * This combination was found to work quite well in benchmarks, but YMMV. + * + * Some other observations from benchmarking: + * <ul> + * <li>Bulk loading did not improve things</li> + * <li>Primitive heaps are substantially faster.</li> + * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and + * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> + * <li>Workload makes a huge difference. A load-once, poll-until-empty priority + * queue is something different than e.g. a top-k heap, which will see a lot of + * top element replacements.</li> + * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for + * top-k make a difference.</li> + * <li>Different day, different benchmark results ...</li> + * </ul> * * @author Erich Schubert + * + * @apiviz.has UnsortedIter */ -public class DoubleMinHeap extends DoubleHeap { +public class DoubleMinHeap implements DoubleHeap { + /** + * Base heap. + */ + protected double[] twoheap; + + /** + * Extension heap. + */ + protected double[] fourheap; + + /** + * Current size of heap. + */ + protected int size; + + /** + * (Structural) modification counter. Used to invalidate iterators. + */ + protected int modCount = 0; + + /** + * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. + */ + private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; + /** - * Constructor with default capacity. + * Initial size of the 2-ary heap. + */ + private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; + + /** + * Initial size of 4-ary heap when initialized. + * + * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 + * + * 85 = 4-ary heap of height 3: 21 + 4*4*4 + * + * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 + * + * Since we last grew by 255 (to 511), let's use 341. + */ + private final static int FOUR_HEAP_INITIAL_SIZE = 341; + + /** + * Constructor, with default size. */ public DoubleMinHeap() { - super(DEFAULT_INITIAL_CAPACITY); + super(); + double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE]; + + this.twoheap = twoheap; + this.fourheap = null; + this.size = 0; + this.modCount = 0; } /** - * Constructor with initial capacity. + * Constructor, with given minimum size. * - * @param size initial capacity + * @param minsize Minimum size */ - public DoubleMinHeap(int size) { - super(size); + public DoubleMinHeap(int minsize) { + super(); + if (minsize < TWO_HEAP_MAX_SIZE) { + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + double[] twoheap = new double[size]; + + this.twoheap = twoheap; + this.fourheap = null; + } else { + double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE]; + double[] fourheap = new double[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + this.twoheap = twoheap; + this.fourheap = fourheap; + } + this.size = 0; + this.modCount = 0; + } + + @Override + public void clear() { + size = 0; + ++modCount; + fourheap = null; + Arrays.fill(twoheap, 0.0); + } + + @Override + public int size() { + return size; + } + + @Override + public boolean isEmpty() { + return (size == 0); + } + + @Override + public void add(double o) { + final double co = o; + // System.err.println("Add: " + o); + if (size < TWO_HEAP_MAX_SIZE) { + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); + } + final int twopos = size; + twoheap[twopos] = co; + ++size; + heapifyUp2(twopos, co); + ++modCount; + } else { + final int fourpos = size - TWO_HEAP_MAX_SIZE; + if (fourheap == null) { + fourheap = new double[FOUR_HEAP_INITIAL_SIZE]; + } else if (fourpos >= fourheap.length) { + // Grow extension heap by half. + fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); + } + fourheap[fourpos] = co; + ++size; + heapifyUp4(fourpos, co); + ++modCount; + } + } + + @Override + public void add(double key, int max) { + if (size < max) { + add(key); + } else if (twoheap[0] <= key) { + replaceTopElement(key); + } + } + + @Override + public double replaceTopElement(double reinsert) { + final double ret = twoheap[0]; + heapifyDown( reinsert); + ++modCount; + return ret; + } + + /** + * Heapify-Up method for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + */ + private void heapifyUp2(int twopos, double cur) { + while (twopos > 0) { + final int parent = (twopos - 1) >>> 1; + double par = twoheap[parent]; + if (cur >= par) { + break; + } + twoheap[twopos] = par; + twopos = parent; + } + twoheap[twopos] = cur; + } + + /** + * Heapify-Up method for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + */ + private void heapifyUp4(int fourpos, double cur) { + while (fourpos > 0) { + final int parent = (fourpos - 1) >> 2; + double par = fourheap[parent]; + if (cur >= par) { + break; + } + fourheap[fourpos] = par; + fourpos = parent; + } + if (fourpos == 0 && twoheap[0] > cur) { + fourheap[0] = twoheap[0]; + twoheap[0] = cur; + } else { + fourheap[fourpos] = cur; + } + } + + @Override + public double poll() { + final double ret = twoheap[0]; + --size; + // Replacement object: + if (size >= TWO_HEAP_MAX_SIZE) { + final int last = size - TWO_HEAP_MAX_SIZE; + final double reinsert = fourheap[last]; + fourheap[last] = 0.0; + heapifyDown(reinsert); + } else if (size > 0) { + final double reinsert = twoheap[size]; + twoheap[size] = 0.0; + heapifyDown(reinsert); + } else { + twoheap[0] = 0.0; + } + ++modCount; + return ret; + } + + /** + * Invoke heapify-down for the root object. + * + * @param reinsert Object to insert. + */ + private void heapifyDown(double reinsert) { + if (size > TWO_HEAP_MAX_SIZE) { + // Special case: 3-ary situation. + final int best = (twoheap[1] <= twoheap[2]) ? 1 : 2; + if (fourheap[0] < twoheap[best]) { + twoheap[0] = fourheap[0]; + heapifyDown4(0, reinsert); + } else { + twoheap[0] = twoheap[best]; + heapifyDown2(best, reinsert); + } + return; + } + heapifyDown2(0, reinsert); + } + + /** + * Heapify-Down for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + */ + private void heapifyDown2(int twopos, double cur) { + final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + while (twopos < stop) { + int bestchild = (twopos << 1) + 1; + double best = twoheap[bestchild]; + final int right = bestchild + 1; + if (right < size && best > twoheap[right]) { + bestchild = right; + best = twoheap[right]; + } + if (cur <= best) { + break; + } + twoheap[twopos] = best; + twopos = bestchild; + } + twoheap[twopos] = cur; } /** - * Compare two objects + * Heapify-Down for 4-ary heap. * - * @param o1 First object - * @param o2 Second object + * @param fourpos Position in 4-ary heap. + * @param cur Current object */ + private void heapifyDown4(int fourpos, double cur) { + final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; + while (fourpos < stop) { + final int child = (fourpos << 2) + 1; + double best = fourheap[child]; + int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; + if (size > minsize) { + double nextchild = fourheap[candidate]; + if (best > nextchild) { + bestchild = candidate; + best = nextchild; + } + + minsize += 2; + if (size >= minsize) { + nextchild = fourheap[++candidate]; + if (best > nextchild) { + bestchild = candidate; + best = nextchild; + } + + if (size > minsize) { + nextchild = fourheap[++candidate]; + if (best > nextchild) { + bestchild = candidate; + best = nextchild; + } + } + } + } + if (cur <= best) { + break; + } + fourheap[fourpos] = best; + fourpos = bestchild; + } + fourheap[fourpos] = cur; + } + + @Override + public double peek() { + return twoheap[0]; + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append(DoubleMinHeap.class.getSimpleName()).append(" ["); + for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) { + buf.append(iter.get()).append(','); + } + buf.append(']'); + return buf.toString(); + } + @Override - protected boolean comp(double o1, double o2) { - return o1 > o2; + public UnsortedIter unsortedIter() { + return new UnsortedIter(); + } + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * Use this class as follows: + * + * <pre> + * {@code + * for (DoubleHeap.UnsortedIter iter = heap.unsortedIter(); iter.valid(); iter.next()) { + * doSomething(iter.get()); + * } + * } + * </pre> + * + * @author Erich Schubert + */ + private class UnsortedIter implements DoubleHeap.UnsortedIter { + /** + * Iterator position. + */ + protected int pos = 0; + + /** + * Modification counter we were initialized at. + */ + protected final int myModCount = modCount; + + @Override + public boolean valid() { + if (modCount != myModCount) { + throw new ConcurrentModificationException(); + } + return pos < size; + } + + @Override + public void advance() { + pos++; + } + + @Override + public double get() { + return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + } } } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjMaxHeap.java deleted file mode 100644 index 8417309a..00000000 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjMaxHeap.java +++ /dev/null @@ -1,328 +0,0 @@ -package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; - -/* - This file is part of ELKI: - Environment for Developing KDD-Applications Supported by Index-Structures - - Copyright (C) 2012 - Ludwig-Maximilians-Universität München - Lehr- und Forschungseinheit für Datenbanksysteme - ELKI Development Team - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -import java.util.Arrays; -import java.util.Comparator; - -import de.lmu.ifi.dbs.elki.math.MathUtil; - -/** - * Basic in-memory heap structure. - * - * This heap is built lazily: if you first add many elements, then poll the - * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log - * n). This is implemented via a simple validTo counter. - * - * @author Erich Schubert - * - * @param <V> value type - */ -public class DoubleObjMaxHeap<V> { - /** - * Heap storage: keys - */ - protected double[] keys; - - /** - * Heap storage: values - */ - protected Object[] values; - - /** - * Current number of objects - */ - protected int size = 0; - - /** - * Indicate up to where the heap is valid - */ - protected int validSize = 0; - - /** - * (Structural) modification counter. Used to invalidate iterators. - */ - public transient int modCount = 0; - - /** - * Default initial capacity - */ - private static final int DEFAULT_INITIAL_CAPACITY = 11; - - /** - * Default constructor: default capacity, natural ordering. - */ - public DoubleObjMaxHeap() { - this(DEFAULT_INITIAL_CAPACITY); - } - - /** - * Constructor with initial capacity and {@link Comparator}. - * - * @param size initial capacity - */ - public DoubleObjMaxHeap(int size) { - super(); - this.size = 0; - this.keys = new double[size]; - this.values = new Object[size]; - } - - /** - * Add a key-value pair to the heap - * - * @param key Key - * @param val Value - * @return Success code - */ - public boolean add(double key, V val) { - // resize when needed - if(size + 1 > keys.length) { - resize(size + 1); - } - // final int pos = size; - this.keys[size] = key; - this.values[size] = val; - this.size += 1; - heapifyUp(size - 1, key, val); - validSize += 1; - // We have changed - return true according to {@link Collection#put} - modCount++; - return true; - } - - /** - * Get the current top key - * - * @return Top key - */ - public double peekKey() { - if(size == 0) { - throw new ArrayIndexOutOfBoundsException("Peek() on an empty heap!"); - } - ensureValid(); - return keys[0]; - } - - /** - * Get the current top value - * - * @return Value - */ - @SuppressWarnings("unchecked") - public V peekValue() { - if(size == 0) { - throw new ArrayIndexOutOfBoundsException("Peek() on an empty heap!"); - } - ensureValid(); - return (V) values[0]; - } - - /** - * Remove the first element - */ - public void poll() { - removeAt(0); - } - - /** - * Repair the heap - */ - protected void ensureValid() { - if(validSize != size) { - if(size > 1) { - // Parent of first invalid - int nextmin = validSize > 0 ? ((validSize - 1) >>> 1) : 0; - int curmin = MathUtil.nextAllOnesInt(nextmin); // Next line - int nextmax = curmin - 1; // End of valid line - int pos = (size - 2) >>> 1; // Parent of last element - // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin+", "+nextmin); - while(pos >= nextmin) { - // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin); - while(pos >= curmin) { - if(!heapifyDown(pos, keys[pos], values[pos])) { - final int parent = (pos - 1) >>> 1; - if(parent < curmin) { - nextmin = Math.min(nextmin, parent); - nextmax = Math.max(nextmax, parent); - } - } - pos--; - } - curmin = nextmin; - pos = Math.min(pos, nextmax); - nextmax = -1; - } - } - validSize = size; - } - } - - /** - * Remove the element at the given position. - * - * @param pos Element position. - */ - protected void removeAt(int pos) { - if(pos < 0 || pos >= size) { - return; - } - // Replacement object: - final double reinkey = keys[size - 1]; - final Object reinval = values[size - 1]; - values[size - 1] = null; - // Keep heap in sync - if(validSize == size) { - size -= 1; - validSize -= 1; - heapifyDown(pos, reinkey, reinval); - } - else { - size -= 1; - validSize = Math.min(pos >>> 1, validSize); - keys[pos] = reinkey; - values[pos] = reinval; - } - modCount++; - } - - /** - * Execute a "Heapify Upwards" aka "SiftUp". Used in insertions. - * - * @param pos insertion position - * @param curkey Current key - * @param curval Current value - */ - protected void heapifyUp(int pos, double curkey, Object curval) { - while(pos > 0) { - final int parent = (pos - 1) >>> 1; - double parkey = keys[parent]; - - if(curkey <= parkey) { // Compare - break; - } - keys[pos] = parkey; - values[pos] = values[parent]; - pos = parent; - } - keys[pos] = curkey; - values[pos] = curval; - } - - /** - * Execute a "Heapify Downwards" aka "SiftDown". Used in deletions. - * - * @param ipos re-insertion position - * @param curkey Current key - * @param curval Current value - * @return true when the order was changed - */ - protected boolean heapifyDown(final int ipos, double curkey, Object curval) { - int pos = ipos; - final int half = size >>> 1; - while(pos < half) { - // Get left child (must exist!) - int cpos = (pos << 1) + 1; - double chikey = keys[cpos]; - Object chival = values[cpos]; - // Test right child, if present - final int rchild = cpos + 1; - if(rchild < size) { - double right = keys[rchild]; - if(chikey < right) { // Compare - cpos = rchild; - chikey = right; - chival = values[rchild]; - } - } - - if(curkey >= chikey) { // Compare - break; - } - keys[pos] = chikey; - values[pos] = chival; - pos = cpos; - } - keys[pos] = curkey; - values[pos] = curval; - return (pos == ipos); - } - - /** - * Query the size - * - * @return Size - */ - public int size() { - return this.size; - } - - /** - * Test whether we need to resize to have the requested capacity. - * - * @param requiredSize required capacity - */ - protected final void resize(int requiredSize) { - // Double until 64, then increase by 50% each time. - int newCapacity = ((keys.length < 64) ? ((keys.length + 1) << 1) : ((keys.length >> 1) * 3)); - // overflow? - if(newCapacity < 0) { - throw new OutOfMemoryError(); - } - if(requiredSize > newCapacity) { - newCapacity = requiredSize; - } - keys = Arrays.copyOf(keys, newCapacity); - values = Arrays.copyOf(values, newCapacity); - } - - /** - * Delete all elements from the heap. - */ - public void clear() { - // clean up references in the array for memory management - Arrays.fill(values, null); - this.size = 0; - this.validSize = -1; - modCount++; - } - - /** - * Test whether the heap is still valid. - * - * Debug method. - * - * @return {@code null} when the heap is correct - */ - protected String checkHeap() { - ensureValid(); - for(int i = 1; i < size; i++) { - final int parent = (i - 1) >>> 1; - if(keys[parent] < keys[i]) { // Compare - return "@" + parent + ": " + keys[parent] + " < @" + i + ": " + keys[i]; - } - } - return null; - } -} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjMinHeap.java deleted file mode 100644 index 244277e8..00000000 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjMinHeap.java +++ /dev/null @@ -1,328 +0,0 @@ -package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; - -/* - This file is part of ELKI: - Environment for Developing KDD-Applications Supported by Index-Structures - - Copyright (C) 2012 - Ludwig-Maximilians-Universität München - Lehr- und Forschungseinheit für Datenbanksysteme - ELKI Development Team - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -import java.util.Arrays; -import java.util.Comparator; - -import de.lmu.ifi.dbs.elki.math.MathUtil; - -/** - * Basic in-memory heap structure. - * - * This heap is built lazily: if you first add many elements, then poll the - * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log - * n). This is implemented via a simple validTo counter. - * - * @author Erich Schubert - * - * @param <V> value type - */ -public class DoubleObjMinHeap<V> { - /** - * Heap storage: keys - */ - protected double[] keys; - - /** - * Heap storage: values - */ - protected Object[] values; - - /** - * Current number of objects - */ - protected int size = 0; - - /** - * Indicate up to where the heap is valid - */ - protected int validSize = 0; - - /** - * (Structural) modification counter. Used to invalidate iterators. - */ - public transient int modCount = 0; - - /** - * Default initial capacity - */ - private static final int DEFAULT_INITIAL_CAPACITY = 11; - - /** - * Default constructor: default capacity, natural ordering. - */ - public DoubleObjMinHeap() { - this(DEFAULT_INITIAL_CAPACITY); - } - - /** - * Constructor with initial capacity and {@link Comparator}. - * - * @param size initial capacity - */ - public DoubleObjMinHeap(int size) { - super(); - this.size = 0; - this.keys = new double[size]; - this.values = new Object[size]; - } - - /** - * Add a key-value pair to the heap - * - * @param key Key - * @param val Value - * @return Success code - */ - public boolean add(double key, V val) { - // resize when needed - if(size + 1 > keys.length) { - resize(size + 1); - } - // final int pos = size; - this.keys[size] = key; - this.values[size] = val; - this.size += 1; - heapifyUp(size - 1, key, val); - validSize += 1; - // We have changed - return true according to {@link Collection#put} - modCount++; - return true; - } - - /** - * Get the current top key - * - * @return Top key - */ - public double peekKey() { - if(size == 0) { - throw new ArrayIndexOutOfBoundsException("Peek() on an empty heap!"); - } - ensureValid(); - return keys[0]; - } - - /** - * Get the current top value - * - * @return Value - */ - @SuppressWarnings("unchecked") - public V peekValue() { - if(size == 0) { - throw new ArrayIndexOutOfBoundsException("Peek() on an empty heap!"); - } - ensureValid(); - return (V) values[0]; - } - - /** - * Remove the first element - */ - public void poll() { - removeAt(0); - } - - /** - * Repair the heap - */ - protected void ensureValid() { - if(validSize != size) { - if(size > 1) { - // Parent of first invalid - int nextmin = validSize > 0 ? ((validSize - 1) >>> 1) : 0; - int curmin = MathUtil.nextAllOnesInt(nextmin); // Next line - int nextmax = curmin - 1; // End of valid line - int pos = (size - 2) >>> 1; // Parent of last element - // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin+", "+nextmin); - while(pos >= nextmin) { - // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin); - while(pos >= curmin) { - if(!heapifyDown(pos, keys[pos], values[pos])) { - final int parent = (pos - 1) >>> 1; - if(parent < curmin) { - nextmin = Math.min(nextmin, parent); - nextmax = Math.max(nextmax, parent); - } - } - pos--; - } - curmin = nextmin; - pos = Math.min(pos, nextmax); - nextmax = -1; - } - } - validSize = size; - } - } - - /** - * Remove the element at the given position. - * - * @param pos Element position. - */ - protected void removeAt(int pos) { - if(pos < 0 || pos >= size) { - return; - } - // Replacement object: - final double reinkey = keys[size - 1]; - final Object reinval = values[size - 1]; - values[size - 1] = null; - // Keep heap in sync - if(validSize == size) { - size -= 1; - validSize -= 1; - heapifyDown(pos, reinkey, reinval); - } - else { - size -= 1; - validSize = Math.min(pos >>> 1, validSize); - keys[pos] = reinkey; - values[pos] = reinval; - } - modCount++; - } - - /** - * Execute a "Heapify Upwards" aka "SiftUp". Used in insertions. - * - * @param pos insertion position - * @param curkey Current key - * @param curval Current value - */ - protected void heapifyUp(int pos, double curkey, Object curval) { - while(pos > 0) { - final int parent = (pos - 1) >>> 1; - double parkey = keys[parent]; - - if(curkey >= parkey) { // Compare - break; - } - keys[pos] = parkey; - values[pos] = values[parent]; - pos = parent; - } - keys[pos] = curkey; - values[pos] = curval; - } - - /** - * Execute a "Heapify Downwards" aka "SiftDown". Used in deletions. - * - * @param ipos re-insertion position - * @param curkey Current key - * @param curval Current value - * @return true when the order was changed - */ - protected boolean heapifyDown(final int ipos, double curkey, Object curval) { - int pos = ipos; - final int half = size >>> 1; - while(pos < half) { - // Get left child (must exist!) - int cpos = (pos << 1) + 1; - double chikey = keys[cpos]; - Object chival = values[cpos]; - // Test right child, if present - final int rchild = cpos + 1; - if(rchild < size) { - double right = keys[rchild]; - if(chikey > right) { // Compare - cpos = rchild; - chikey = right; - chival = values[rchild]; - } - } - - if(curkey <= chikey) { // Compare - break; - } - keys[pos] = chikey; - values[pos] = chival; - pos = cpos; - } - keys[pos] = curkey; - values[pos] = curval; - return (pos == ipos); - } - - /** - * Query the size - * - * @return Size - */ - public int size() { - return this.size; - } - - /** - * Test whether we need to resize to have the requested capacity. - * - * @param requiredSize required capacity - */ - protected final void resize(int requiredSize) { - // Double until 64, then increase by 50% each time. - int newCapacity = ((keys.length < 64) ? ((keys.length + 1) << 1) : ((keys.length >> 1) * 3)); - // overflow? - if(newCapacity < 0) { - throw new OutOfMemoryError(); - } - if(requiredSize > newCapacity) { - newCapacity = requiredSize; - } - keys = Arrays.copyOf(keys, newCapacity); - values = Arrays.copyOf(values, newCapacity); - } - - /** - * Delete all elements from the heap. - */ - public void clear() { - // clean up references in the array for memory management - Arrays.fill(values, null); - this.size = 0; - this.validSize = -1; - modCount++; - } - - /** - * Test whether the heap is still valid. - * - * Debug method. - * - * @return {@code null} when the heap is correct - */ - protected String checkHeap() { - ensureValid(); - for(int i = 1; i < size; i++) { - final int parent = (i - 1) >>> 1; - if(keys[parent] > keys[i]) { // Compare - return "@" + parent + ": " + keys[parent] + " < @" + i + ": " + keys[i]; - } - } - return null; - } -} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectHeap.java new file mode 100644 index 00000000..db65ce81 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectHeap.java @@ -0,0 +1,129 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.utilities.datastructures.iterator.Iter; + +/** + * Basic in-memory heap interface, for double keys and V values. + * + * @author Erich Schubert + * + * @apiviz.has UnsortedIter + * @param <V> Value type + */ +public interface DoubleObjectHeap<V> { + /** + * Add a key-value pair to the heap + * + * @param key Key + * @param val Value + */ + void add(double key, V val); + + /** + * Add a key-value pair to the heap if it improves the top. + * + * @param key Key + * @param val Value + * @param k Desired maximum size + */ + void add(double key, V val, int k); + + /** + * Combined operation that removes the top element, and inserts a new element + * instead. + * + * @param key Key of new element + * @param val Value of new element + */ + void replaceTopElement(double key, V val); + + /** + * Get the current top key + * + * @return Top key + */ + double peekKey(); + + /** + * Get the current top value + * + * @return Value + */ + V peekValue(); + + /** + * Remove the first element + */ + void poll(); + + /** + * Clear the heap contents. + */ + void clear(); + + /** + * Query the size + * + * @return Size + */ + public int size(); + + /** + * Is the heap empty? + * + * @return {@code true} when the size is 0. + */ + public boolean isEmpty(); + + /** + * Get an unsorted iterator to inspect the heap. + * + * @return Iterator + */ + UnsortedIter<V> unsortedIter(); + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * @author Erich Schubert + * @param <V> Value type + */ + public static interface UnsortedIter<V> extends Iter { + /** + * Get the current key + * + * @return Current key + */ + double getKey(); + + /** + * Get the current value + * + * @return Current value + */ + V getValue(); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectMaxHeap.java new file mode 100644 index 00000000..dd89573c --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectMaxHeap.java @@ -0,0 +1,482 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Arrays; +import java.util.ConcurrentModificationException; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + +/** + * Advanced priority queue class, based on a binary heap (for small sizes), + * which will for larger heaps be accompanied by a 4-ary heap (attached below + * the root of the two-ary heap, making the root actually 3-ary). + * + * This code was automatically instantiated for the types: Double and Object + * + * This combination was found to work quite well in benchmarks, but YMMV. + * + * Some other observations from benchmarking: + * <ul> + * <li>Bulk loading did not improve things</li> + * <li>Primitive heaps are substantially faster.</li> + * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and + * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> + * <li>Workload makes a huge difference. A load-once, poll-until-empty priority + * queue is something different than e.g. a top-k heap, which will see a lot of + * top element replacements.</li> + * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for + * top-k make a difference.</li> + * <li>Different day, different benchmark results ...</li> + * </ul> + * + * @author Erich Schubert + * + * @apiviz.has UnsortedIter + * @param <V> Value type + */ +public class DoubleObjectMaxHeap<V> implements DoubleObjectHeap<V> { + /** + * Base heap. + */ + protected double[] twoheap; + + /** + * Base heap values. + */ + protected Object[] twovals; + + /** + * Extension heap. + */ + protected double[] fourheap; + + /** + * Extension heapvalues. + */ + protected Object[] fourvals; + + /** + * Current size of heap. + */ + protected int size; + + /** + * (Structural) modification counter. Used to invalidate iterators. + */ + protected int modCount = 0; + + /** + * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. + */ + private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; + + /** + * Initial size of the 2-ary heap. + */ + private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; + + /** + * Initial size of 4-ary heap when initialized. + * + * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 + * + * 85 = 4-ary heap of height 3: 21 + 4*4*4 + * + * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 + * + * Since we last grew by 255 (to 511), let's use 341. + */ + private final static int FOUR_HEAP_INITIAL_SIZE = 341; + + /** + * Constructor, with default size. + */ + public DoubleObjectMaxHeap() { + super(); + double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE]; + Object[] twovals = new Object[TWO_HEAP_INITIAL_SIZE]; + + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = null; + this.fourvals = null; + this.size = 0; + this.modCount = 0; + } + + /** + * Constructor, with given minimum size. + * + * @param minsize Minimum size + */ + public DoubleObjectMaxHeap(int minsize) { + super(); + if (minsize < TWO_HEAP_MAX_SIZE) { + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + double[] twoheap = new double[size]; + Object[] twovals = new Object[size]; + + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = null; + this.fourvals = null; + } else { + double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE]; + Object[] twovals = new Object[TWO_HEAP_INITIAL_SIZE]; + double[] fourheap = new double[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + Object[] fourvals = new Object[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = fourheap; + this.fourvals = fourvals; + } + this.size = 0; + this.modCount = 0; + } + + @Override + public void clear() { + size = 0; + ++modCount; + fourheap = null; + fourvals = null; + Arrays.fill(twoheap, 0.0); + Arrays.fill(twovals, null); + } + + @Override + public int size() { + return size; + } + + @Override + public boolean isEmpty() { + return (size == 0); + } + + @Override + public void add(double o, V v) { + final double co = o; + final Object cv = (Object)v; + // System.err.println("Add: " + o); + if (size < TWO_HEAP_MAX_SIZE) { + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); + twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1); + } + final int twopos = size; + twoheap[twopos] = co; + twovals[twopos] = cv; + ++size; + heapifyUp2(twopos, co, cv); + ++modCount; + } else { + final int fourpos = size - TWO_HEAP_MAX_SIZE; + if (fourheap == null) { + fourheap = new double[FOUR_HEAP_INITIAL_SIZE]; + fourvals = new Object[FOUR_HEAP_INITIAL_SIZE]; + } else if (fourpos >= fourheap.length) { + // Grow extension heap by half. + fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); + fourvals = Arrays.copyOf(fourvals, fourvals.length + (fourvals.length >> 1)); + } + fourheap[fourpos] = co; + fourvals[fourpos] = cv; + ++size; + heapifyUp4(fourpos, co, cv); + ++modCount; + } + } + + @Override + public void add(double key, V val, int max) { + if (size < max) { + add(key, val); + } else if (twoheap[0] >= key) { + replaceTopElement(key, val); + } + } + + @Override + public void replaceTopElement(double reinsert, V val) { + heapifyDown(reinsert, (Object)val); + ++modCount; + } + + /** + * Heapify-Up method for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + * @param val Current value + */ + private void heapifyUp2(int twopos, double cur, Object val) { + while (twopos > 0) { + final int parent = (twopos - 1) >>> 1; + double par = twoheap[parent]; + if (cur <= par) { + break; + } + twoheap[twopos] = par; + twovals[twopos] = twovals[parent]; + twopos = parent; + } + twoheap[twopos] = cur; + twovals[twopos] = val; + } + + /** + * Heapify-Up method for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + * @param val Current value + */ + private void heapifyUp4(int fourpos, double cur, Object val) { + while (fourpos > 0) { + final int parent = (fourpos - 1) >> 2; + double par = fourheap[parent]; + if (cur <= par) { + break; + } + fourheap[fourpos] = par; + fourvals[fourpos] = fourvals[parent]; + fourpos = parent; + } + if (fourpos == 0 && twoheap[0] < cur) { + fourheap[0] = twoheap[0]; + fourvals[0] = twovals[0]; + twoheap[0] = cur; + twovals[0] = val; + } else { + fourheap[fourpos] = cur; + fourvals[fourpos] = val; + } + } + + @Override + public void poll() { + --size; + // Replacement object: + if (size >= TWO_HEAP_MAX_SIZE) { + final int last = size - TWO_HEAP_MAX_SIZE; + final double reinsert = fourheap[last]; + final Object reinsertv = fourvals[last]; + fourheap[last] = 0.0; + fourvals[last] = null; + heapifyDown(reinsert, reinsertv); + } else if (size > 0) { + final double reinsert = twoheap[size]; + final Object reinsertv = twovals[size]; + twoheap[size] = 0.0; + twovals[size] = null; + heapifyDown(reinsert, reinsertv); + } else { + twoheap[0] = 0.0; + twovals[0] = null; + } + ++modCount; + } + + /** + * Invoke heapify-down for the root object. + * + * @param reinsert Object to insert. + * @param val Value to reinsert. + */ + private void heapifyDown(double reinsert, Object val) { + if (size > TWO_HEAP_MAX_SIZE) { + // Special case: 3-ary situation. + final int best = (twoheap[1] >= twoheap[2]) ? 1 : 2; + if (fourheap[0] > twoheap[best]) { + twoheap[0] = fourheap[0]; + twovals[0] = fourvals[0]; + heapifyDown4(0, reinsert, val); + } else { + twoheap[0] = twoheap[best]; + twovals[0] = twovals[best]; + heapifyDown2(best, reinsert, val); + } + return; + } + heapifyDown2(0, reinsert, val); + } + + /** + * Heapify-Down for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + * @param val Value to reinsert. + */ + private void heapifyDown2(int twopos, double cur, Object val) { + final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + while (twopos < stop) { + int bestchild = (twopos << 1) + 1; + double best = twoheap[bestchild]; + final int right = bestchild + 1; + if (right < size && best < twoheap[right]) { + bestchild = right; + best = twoheap[right]; + } + if (cur >= best) { + break; + } + twoheap[twopos] = best; + twovals[twopos] = twovals[bestchild]; + twopos = bestchild; + } + twoheap[twopos] = cur; + twovals[twopos] = val; + } + + /** + * Heapify-Down for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + * @param val Value to reinsert. + */ + private void heapifyDown4(int fourpos, double cur, Object val) { + final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; + while (fourpos < stop) { + final int child = (fourpos << 2) + 1; + double best = fourheap[child]; + int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; + if (size > minsize) { + double nextchild = fourheap[candidate]; + if (best < nextchild) { + bestchild = candidate; + best = nextchild; + } + + minsize += 2; + if (size >= minsize) { + nextchild = fourheap[++candidate]; + if (best < nextchild) { + bestchild = candidate; + best = nextchild; + } + + if (size > minsize) { + nextchild = fourheap[++candidate]; + if (best < nextchild) { + bestchild = candidate; + best = nextchild; + } + } + } + } + if (cur >= best) { + break; + } + fourheap[fourpos] = best; + fourvals[fourpos] = fourvals[bestchild]; + fourpos = bestchild; + } + fourheap[fourpos] = cur; + fourvals[fourpos] = val; + } + + @Override + public double peekKey() { + return twoheap[0]; + } + + @Override + @SuppressWarnings("unchecked") + public V peekValue() { + return (V)twovals[0]; + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append(DoubleObjectMaxHeap.class.getSimpleName()).append(" ["); + for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) { + buf.append(iter.getKey()).append(':').append(iter.getValue()).append(','); + } + buf.append(']'); + return buf.toString(); + } + + @Override + public UnsortedIter unsortedIter() { + return new UnsortedIter(); + } + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * Use this class as follows: + * + * <pre> + * {@code + * for (DoubleObjectHeap.UnsortedIter<V> iter = heap.unsortedIter(); iter.valid(); iter.next()) { + * doSomething(iter.get()); + * } + * } + * </pre> + * + * @author Erich Schubert + */ + private class UnsortedIter implements DoubleObjectHeap.UnsortedIter<V> { + /** + * Iterator position. + */ + protected int pos = 0; + + /** + * Modification counter we were initialized at. + */ + protected final int myModCount = modCount; + + @Override + public boolean valid() { + if (modCount != myModCount) { + throw new ConcurrentModificationException(); + } + return pos < size; + } + + @Override + public void advance() { + pos++; + } + + @Override + public double getKey() { + return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + } + + @SuppressWarnings("unchecked") + + @Override + public V getValue() { + return (V)((pos < TWO_HEAP_MAX_SIZE) ? twovals[pos] : fourvals[pos - TWO_HEAP_MAX_SIZE]); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectMinHeap.java new file mode 100644 index 00000000..905cdedb --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectMinHeap.java @@ -0,0 +1,482 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Arrays; +import java.util.ConcurrentModificationException; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + +/** + * Advanced priority queue class, based on a binary heap (for small sizes), + * which will for larger heaps be accompanied by a 4-ary heap (attached below + * the root of the two-ary heap, making the root actually 3-ary). + * + * This code was automatically instantiated for the types: Double and Object + * + * This combination was found to work quite well in benchmarks, but YMMV. + * + * Some other observations from benchmarking: + * <ul> + * <li>Bulk loading did not improve things</li> + * <li>Primitive heaps are substantially faster.</li> + * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and + * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> + * <li>Workload makes a huge difference. A load-once, poll-until-empty priority + * queue is something different than e.g. a top-k heap, which will see a lot of + * top element replacements.</li> + * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for + * top-k make a difference.</li> + * <li>Different day, different benchmark results ...</li> + * </ul> + * + * @author Erich Schubert + * + * @apiviz.has UnsortedIter + * @param <V> Value type + */ +public class DoubleObjectMinHeap<V> implements DoubleObjectHeap<V> { + /** + * Base heap. + */ + protected double[] twoheap; + + /** + * Base heap values. + */ + protected Object[] twovals; + + /** + * Extension heap. + */ + protected double[] fourheap; + + /** + * Extension heapvalues. + */ + protected Object[] fourvals; + + /** + * Current size of heap. + */ + protected int size; + + /** + * (Structural) modification counter. Used to invalidate iterators. + */ + protected int modCount = 0; + + /** + * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. + */ + private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; + + /** + * Initial size of the 2-ary heap. + */ + private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; + + /** + * Initial size of 4-ary heap when initialized. + * + * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 + * + * 85 = 4-ary heap of height 3: 21 + 4*4*4 + * + * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 + * + * Since we last grew by 255 (to 511), let's use 341. + */ + private final static int FOUR_HEAP_INITIAL_SIZE = 341; + + /** + * Constructor, with default size. + */ + public DoubleObjectMinHeap() { + super(); + double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE]; + Object[] twovals = new Object[TWO_HEAP_INITIAL_SIZE]; + + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = null; + this.fourvals = null; + this.size = 0; + this.modCount = 0; + } + + /** + * Constructor, with given minimum size. + * + * @param minsize Minimum size + */ + public DoubleObjectMinHeap(int minsize) { + super(); + if (minsize < TWO_HEAP_MAX_SIZE) { + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + double[] twoheap = new double[size]; + Object[] twovals = new Object[size]; + + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = null; + this.fourvals = null; + } else { + double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE]; + Object[] twovals = new Object[TWO_HEAP_INITIAL_SIZE]; + double[] fourheap = new double[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + Object[] fourvals = new Object[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = fourheap; + this.fourvals = fourvals; + } + this.size = 0; + this.modCount = 0; + } + + @Override + public void clear() { + size = 0; + ++modCount; + fourheap = null; + fourvals = null; + Arrays.fill(twoheap, 0.0); + Arrays.fill(twovals, null); + } + + @Override + public int size() { + return size; + } + + @Override + public boolean isEmpty() { + return (size == 0); + } + + @Override + public void add(double o, V v) { + final double co = o; + final Object cv = (Object)v; + // System.err.println("Add: " + o); + if (size < TWO_HEAP_MAX_SIZE) { + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); + twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1); + } + final int twopos = size; + twoheap[twopos] = co; + twovals[twopos] = cv; + ++size; + heapifyUp2(twopos, co, cv); + ++modCount; + } else { + final int fourpos = size - TWO_HEAP_MAX_SIZE; + if (fourheap == null) { + fourheap = new double[FOUR_HEAP_INITIAL_SIZE]; + fourvals = new Object[FOUR_HEAP_INITIAL_SIZE]; + } else if (fourpos >= fourheap.length) { + // Grow extension heap by half. + fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); + fourvals = Arrays.copyOf(fourvals, fourvals.length + (fourvals.length >> 1)); + } + fourheap[fourpos] = co; + fourvals[fourpos] = cv; + ++size; + heapifyUp4(fourpos, co, cv); + ++modCount; + } + } + + @Override + public void add(double key, V val, int max) { + if (size < max) { + add(key, val); + } else if (twoheap[0] <= key) { + replaceTopElement(key, val); + } + } + + @Override + public void replaceTopElement(double reinsert, V val) { + heapifyDown(reinsert, (Object)val); + ++modCount; + } + + /** + * Heapify-Up method for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + * @param val Current value + */ + private void heapifyUp2(int twopos, double cur, Object val) { + while (twopos > 0) { + final int parent = (twopos - 1) >>> 1; + double par = twoheap[parent]; + if (cur >= par) { + break; + } + twoheap[twopos] = par; + twovals[twopos] = twovals[parent]; + twopos = parent; + } + twoheap[twopos] = cur; + twovals[twopos] = val; + } + + /** + * Heapify-Up method for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + * @param val Current value + */ + private void heapifyUp4(int fourpos, double cur, Object val) { + while (fourpos > 0) { + final int parent = (fourpos - 1) >> 2; + double par = fourheap[parent]; + if (cur >= par) { + break; + } + fourheap[fourpos] = par; + fourvals[fourpos] = fourvals[parent]; + fourpos = parent; + } + if (fourpos == 0 && twoheap[0] > cur) { + fourheap[0] = twoheap[0]; + fourvals[0] = twovals[0]; + twoheap[0] = cur; + twovals[0] = val; + } else { + fourheap[fourpos] = cur; + fourvals[fourpos] = val; + } + } + + @Override + public void poll() { + --size; + // Replacement object: + if (size >= TWO_HEAP_MAX_SIZE) { + final int last = size - TWO_HEAP_MAX_SIZE; + final double reinsert = fourheap[last]; + final Object reinsertv = fourvals[last]; + fourheap[last] = 0.0; + fourvals[last] = null; + heapifyDown(reinsert, reinsertv); + } else if (size > 0) { + final double reinsert = twoheap[size]; + final Object reinsertv = twovals[size]; + twoheap[size] = 0.0; + twovals[size] = null; + heapifyDown(reinsert, reinsertv); + } else { + twoheap[0] = 0.0; + twovals[0] = null; + } + ++modCount; + } + + /** + * Invoke heapify-down for the root object. + * + * @param reinsert Object to insert. + * @param val Value to reinsert. + */ + private void heapifyDown(double reinsert, Object val) { + if (size > TWO_HEAP_MAX_SIZE) { + // Special case: 3-ary situation. + final int best = (twoheap[1] <= twoheap[2]) ? 1 : 2; + if (fourheap[0] < twoheap[best]) { + twoheap[0] = fourheap[0]; + twovals[0] = fourvals[0]; + heapifyDown4(0, reinsert, val); + } else { + twoheap[0] = twoheap[best]; + twovals[0] = twovals[best]; + heapifyDown2(best, reinsert, val); + } + return; + } + heapifyDown2(0, reinsert, val); + } + + /** + * Heapify-Down for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + * @param val Value to reinsert. + */ + private void heapifyDown2(int twopos, double cur, Object val) { + final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + while (twopos < stop) { + int bestchild = (twopos << 1) + 1; + double best = twoheap[bestchild]; + final int right = bestchild + 1; + if (right < size && best > twoheap[right]) { + bestchild = right; + best = twoheap[right]; + } + if (cur <= best) { + break; + } + twoheap[twopos] = best; + twovals[twopos] = twovals[bestchild]; + twopos = bestchild; + } + twoheap[twopos] = cur; + twovals[twopos] = val; + } + + /** + * Heapify-Down for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + * @param val Value to reinsert. + */ + private void heapifyDown4(int fourpos, double cur, Object val) { + final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; + while (fourpos < stop) { + final int child = (fourpos << 2) + 1; + double best = fourheap[child]; + int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; + if (size > minsize) { + double nextchild = fourheap[candidate]; + if (best > nextchild) { + bestchild = candidate; + best = nextchild; + } + + minsize += 2; + if (size >= minsize) { + nextchild = fourheap[++candidate]; + if (best > nextchild) { + bestchild = candidate; + best = nextchild; + } + + if (size > minsize) { + nextchild = fourheap[++candidate]; + if (best > nextchild) { + bestchild = candidate; + best = nextchild; + } + } + } + } + if (cur <= best) { + break; + } + fourheap[fourpos] = best; + fourvals[fourpos] = fourvals[bestchild]; + fourpos = bestchild; + } + fourheap[fourpos] = cur; + fourvals[fourpos] = val; + } + + @Override + public double peekKey() { + return twoheap[0]; + } + + @Override + @SuppressWarnings("unchecked") + public V peekValue() { + return (V)twovals[0]; + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append(DoubleObjectMinHeap.class.getSimpleName()).append(" ["); + for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) { + buf.append(iter.getKey()).append(':').append(iter.getValue()).append(','); + } + buf.append(']'); + return buf.toString(); + } + + @Override + public UnsortedIter unsortedIter() { + return new UnsortedIter(); + } + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * Use this class as follows: + * + * <pre> + * {@code + * for (DoubleObjectHeap.UnsortedIter<V> iter = heap.unsortedIter(); iter.valid(); iter.next()) { + * doSomething(iter.get()); + * } + * } + * </pre> + * + * @author Erich Schubert + */ + private class UnsortedIter implements DoubleObjectHeap.UnsortedIter<V> { + /** + * Iterator position. + */ + protected int pos = 0; + + /** + * Modification counter we were initialized at. + */ + protected final int myModCount = modCount; + + @Override + public boolean valid() { + if (modCount != myModCount) { + throw new ConcurrentModificationException(); + } + return pos < size; + } + + @Override + public void advance() { + pos++; + } + + @Override + public double getKey() { + return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + } + + @SuppressWarnings("unchecked") + + @Override + public V getValue() { + return (V)((pos < TWO_HEAP_MAX_SIZE) ? twovals[pos] : fourvals[pos - TWO_HEAP_MAX_SIZE]); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoublePriorityObject.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoublePriorityObject.java index 92d548cb..82453885 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoublePriorityObject.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoublePriorityObject.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/Heap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/Heap.java index 86d3ae08..2c278110 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/Heap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/Heap.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -25,11 +25,6 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; import java.util.Arrays; import java.util.Comparator; -import java.util.ConcurrentModificationException; -import java.util.Iterator; -import java.util.NoSuchElementException; - -import de.lmu.ifi.dbs.elki.math.MathUtil; /** * Basic in-memory heap structure. Closely related to a @@ -45,11 +40,11 @@ import de.lmu.ifi.dbs.elki.math.MathUtil; * @param <E> Element type. Should be {@link java.lang.Comparable} or a * {@link java.util.Comparator} needs to be given. */ -public class Heap<E> implements Iterable<E> { +public class Heap<E> { /** * Heap storage. */ - protected transient Object[] queue; + protected Object[] queue; /** * Current number of objects. @@ -57,11 +52,6 @@ public class Heap<E> implements Iterable<E> { protected int size = 0; /** - * Indicate up to where the heap is valid. - */ - protected int validSize = 0; - - /** * The comparator or {@code null}. */ protected final Comparator<Object> comparator; @@ -69,7 +59,7 @@ public class Heap<E> implements Iterable<E> { /** * (Structural) modification counter. Used to invalidate iterators. */ - private transient int modCount = 0; + private int modCount = 0; /** * Default initial capacity. @@ -126,10 +116,8 @@ public class Heap<E> implements Iterable<E> { resize(size + 1); } // final int pos = size; - this.queue[size] = e; this.size += 1; heapifyUp(size - 1, e); - validSize += 1; heapModified(); } @@ -142,7 +130,6 @@ public class Heap<E> implements Iterable<E> { */ @SuppressWarnings("unchecked") public E replaceTopElement(E e) { - ensureValid(); E oldroot = (E) queue[0]; heapifyDown(0, e); heapModified(); @@ -159,7 +146,6 @@ public class Heap<E> implements Iterable<E> { if (size == 0) { return null; } - ensureValid(); return (E) queue[0]; } @@ -169,70 +155,10 @@ public class Heap<E> implements Iterable<E> { * @return Top element. */ public E poll() { - ensureValid(); return removeAt(0); } /** - * Perform pending heap repair operations in a single bulk operation. - */ - protected void ensureValid() { - if (validSize != size) { - if (size > 1) { - // Bottom up heap update. - if (comparator != null) { - // Parent of first invalid - int nextmin = validSize > 0 ? ((validSize - 1) >>> 1) : 0; - int curmin = MathUtil.nextAllOnesInt(nextmin); // Next line - int nextmax = curmin - 1; // End of valid line - int pos = (size - 2) >>> 1; // Parent of last element - // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin+", "+nextmin); - while (pos >= nextmin) { - // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin); - while (pos >= curmin) { - if (!heapifyDownComparator(pos, queue[pos])) { - final int parent = (pos - 1) >>> 1; - if (parent < curmin) { - nextmin = Math.min(nextmin, parent); - nextmax = Math.max(nextmax, parent); - } - } - pos--; - } - curmin = nextmin; - pos = Math.min(pos, nextmax); - nextmax = -1; - } - } else { - // Parent of first invalid - int nextmin = validSize > 0 ? ((validSize - 1) >>> 1) : 0; - int curmin = MathUtil.nextAllOnesInt(nextmin); // Next line - int nextmax = curmin - 1; // End of valid line - int pos = (size - 2) >>> 1; // Parent of last element - // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin+", "+nextmin); - while (pos >= nextmin) { - // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin); - while (pos >= curmin) { - if (!heapifyDownComparable(pos, queue[pos])) { - final int parent = (pos - 1) >>> 1; - if (parent < curmin) { - nextmin = Math.min(nextmin, parent); - nextmax = Math.max(nextmax, parent); - } - } - pos--; - } - curmin = nextmin; - pos = Math.min(pos, nextmax); - nextmax = -1; - } - } - } - validSize = size; - } - } - - /** * Remove the element at the given position. * * @param pos Element position. @@ -247,16 +173,8 @@ public class Heap<E> implements Iterable<E> { // Replacement object: final Object reinsert = queue[size - 1]; queue[size - 1] = null; - // Keep heap in sync - if (validSize == size) { - size -= 1; - validSize -= 1; - heapifyDown(pos, reinsert); - } else { - size -= 1; - validSize = Math.min(pos >>> 1, validSize); - queue[pos] = reinsert; - } + size--; + heapifyDown(pos, reinsert); heapModified(); return ret; } @@ -367,7 +285,7 @@ public class Heap<E> implements Iterable<E> { pos = cpos; } queue[pos] = cur; - return (pos == ipos); + return (pos != ipos); } /** @@ -405,7 +323,7 @@ public class Heap<E> implements Iterable<E> { pos = min; } queue[pos] = cur; - return (pos == ipos); + return (pos != ipos); } /** @@ -453,15 +371,9 @@ public class Heap<E> implements Iterable<E> { queue[i] = null; } this.size = 0; - this.validSize = -1; heapModified(); } - @Override - public Iterator<E> iterator() { - return new Itr(); - } - /** * Called at the end of each heap modification. */ @@ -470,52 +382,12 @@ public class Heap<E> implements Iterable<E> { } /** - * Iterator over queue elements. No particular order (i.e. heap order!) + * Get an unordered heap iterator. * - * @author Erich Schubert - * - * @apiviz.exclude + * @return Iterator. */ - protected final class Itr implements Iterator<E> { - /** - * Cursor position. - */ - private int cursor = 0; - - /** - * Modification counter this iterator is valid for. - */ - private int expectedModCount = modCount; - - @Override - public boolean hasNext() { - return cursor < size; - } - - @SuppressWarnings("unchecked") - @Override - public E next() { - if (expectedModCount != modCount) { - throw new ConcurrentModificationException(); - } - if (cursor < size) { - return (E) queue[cursor++]; - } - throw new NoSuchElementException(); - } - - @Override - public void remove() { - if (expectedModCount != modCount) { - throw new ConcurrentModificationException(); - } - if (cursor > 0) { - cursor--; - } else { - throw new IllegalStateException(); - } - expectedModCount = modCount; - } + public UnorderedIter unorderedIter() { + return new UnorderedIter(); } /** @@ -526,7 +398,6 @@ public class Heap<E> implements Iterable<E> { * @return {@code null} when the heap is correct */ protected String checkHeap() { - ensureValid(); if (comparator == null) { for (int i = 1; i < size; i++) { final int parent = (i - 1) >>> 1; @@ -546,4 +417,43 @@ public class Heap<E> implements Iterable<E> { } return null; } + + /** + * Heap iterator. + * + * @author Erich Schubert + */ + public class UnorderedIter implements de.lmu.ifi.dbs.elki.utilities.datastructures.iterator.Iter { + /** + * Current iterator position. + */ + int pos = 0; + + /** + * Constructor. + */ + protected UnorderedIter() { + super(); + } + + @Override + public boolean valid() { + return pos < size(); + } + + @Override + public void advance() { + pos++; + } + + /** + * Get the current queue element. + * + * @return Element + */ + @SuppressWarnings("unchecked") + public E get() { + return (E) queue[pos]; + } + } } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerHeap.java index 6203ad96..3235926b 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerHeap.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,53 +23,22 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; along with this program. If not, see <http://www.gnu.org/licenses/>. */ -import java.util.Arrays; - -import de.lmu.ifi.dbs.elki.math.MathUtil; +import de.lmu.ifi.dbs.elki.utilities.datastructures.iterator.Iter; /** - * Basic in-memory heap structure. - * - * This heap is built lazily: if you first add many elements, then poll the - * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log - * n). This is implemented via a simple validTo counter. + * Basic in-memory heap for int values. * * @author Erich Schubert + * + * @apiviz.has UnsortedIter */ -public abstract class IntegerHeap extends AbstractHeap { - /** - * Heap storage: queue - */ - protected transient int[] queue; - - /** - * Constructor with initial capacity. - * - * @param size initial capacity - */ - public IntegerHeap(int size) { - super(); - this.size = 0; - this.queue = new int[size]; - } - +public interface IntegerHeap { /** * Add a key-value pair to the heap * * @param key Key */ - public void add(int key) { - // resize when needed - if (size + 1 > queue.length) { - resize(size + 1); - } - // final int pos = size; - this.queue[size] = key; - this.size += 1; - heapifyUp(size - 1, key); - validSize += 1; - heapModified(); - } + void add(int key); /** * Add a key-value pair to the heap, except if the new element is larger than @@ -78,13 +47,7 @@ public abstract class IntegerHeap extends AbstractHeap { * @param key Key * @param max Maximum size of heap */ - public void add(int key, int max) { - if (size < max) { - add(key); - } else if (comp(key, peek())) { - replaceTopElement(key); - } - } + void add(int key, int max); /** * Combined operation that removes the top element, and inserts a new element @@ -93,172 +56,67 @@ public abstract class IntegerHeap extends AbstractHeap { * @param e New element to insert * @return Previous top element of the heap */ - public int replaceTopElement(int e) { - ensureValid(); - int oldroot = queue[0]; - heapifyDown(0, e); - heapModified(); - return oldroot; - } + int replaceTopElement(int e); /** * Get the current top key * * @return Top key */ - public int peek() { - if (size == 0) { - throw new ArrayIndexOutOfBoundsException("Peek() on an empty heap!"); - } - ensureValid(); - return queue[0]; - } + int peek(); /** * Remove the first element * * @return Top element */ - public int poll() { - return removeAt(0); - } + int poll(); /** - * Repair the heap + * Delete all elements from the heap. */ - protected void ensureValid() { - if (validSize != size) { - if (size > 1) { - // Parent of first invalid - int nextmin = validSize > 0 ? ((validSize - 1) >>> 1) : 0; - int curmin = MathUtil.nextAllOnesInt(nextmin); // Next line - int nextmax = curmin - 1; // End of valid line - int pos = (size - 2) >>> 1; // Parent of last element - // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin+", "+nextmin); - while (pos >= nextmin) { - // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin); - while (pos >= curmin) { - if (!heapifyDown(pos, queue[pos])) { - final int parent = (pos - 1) >>> 1; - if (parent < curmin) { - nextmin = Math.min(nextmin, parent); - nextmax = Math.max(nextmax, parent); - } - } - pos--; - } - curmin = nextmin; - pos = Math.min(pos, nextmax); - nextmax = -1; - } - } - validSize = size; - } - } + void clear(); /** - * Remove the element at the given position. + * Query the size * - * @param pos Element position. - * @return Removed element + * @return Size */ - protected int removeAt(int pos) { - if (pos < 0 || pos >= size) { - return 0; - } - final int top = queue[0]; - // Replacement object: - final int reinkey = queue[size - 1]; - // Keep heap in sync - if (validSize == size) { - size -= 1; - validSize -= 1; - heapifyDown(pos, reinkey); - } else { - size -= 1; - validSize = Math.min(pos >>> 1, validSize); - queue[pos] = reinkey; - } - heapModified(); - return top; - } - + public int size(); + /** - * Execute a "Heapify Upwards" aka "SiftUp". Used in insertions. + * Is the heap empty? * - * @param pos insertion position - * @param curkey Current key + * @return {@code true} when the size is 0. */ - protected void heapifyUp(int pos, int curkey) { - while (pos > 0) { - final int parent = (pos - 1) >>> 1; - int parkey = queue[parent]; - - if (comp(curkey, parkey)) { // Compare - break; - } - queue[pos] = parkey; - pos = parent; - } - queue[pos] = curkey; - } + public boolean isEmpty(); /** - * Execute a "Heapify Downwards" aka "SiftDown". Used in deletions. + * Get an unsorted iterator to inspect the heap. * - * @param ipos re-insertion position - * @param curkey Current key - * @return true when the order was changed + * @return Iterator */ - protected boolean heapifyDown(final int ipos, int curkey) { - int pos = ipos; - final int half = size >>> 1; - while (pos < half) { - // Get left child (must exist!) - int cpos = (pos << 1) + 1; - int chikey = queue[cpos]; - // Test right child, if present - final int rchild = cpos + 1; - if (rchild < size) { - int right = queue[rchild]; - if (comp(chikey, right)) { // Compare - cpos = rchild; - chikey = right; - } - } - - if (comp(chikey, curkey)) { // Compare - break; - } - queue[pos] = chikey; - pos = cpos; - } - queue[pos] = curkey; - return (pos == ipos); - } + UnsortedIter unsortedIter(); /** - * Test whether we need to resize to have the requested capacity. + * Unsorted iterator - in heap order. Does not poll the heap. * - * @param requiredSize required capacity - */ - protected final void resize(int requiredSize) { - queue = Arrays.copyOf(queue, desiredSize(requiredSize, queue.length)); - } - - /** - * Delete all elements from the heap. + * <pre> + * {@code + * for (IntegerHeap.UnsortedIter iter = heap.unsortedIter(); iter.valid(); iter.next()) { + * doSomething(iter.get()); + * } + * } + * </pre> + * + * @author Erich Schubert */ - @Override - public void clear() { - super.clear(); - for (int i = 0; i < size; i++) { - queue[i] = 0; - } + public static interface UnsortedIter extends Iter { + /** + * Get the iterators current object. + * + * @return Current object + */ + int get(); } - - /** - * Compare two objects - */ - abstract protected boolean comp(int o1, int o2); } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMaxHeap.java index 383eb727..60f61d99 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMaxHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMaxHeap.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,40 +23,400 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; along with this program. If not, see <http://www.gnu.org/licenses/>. */ +import java.util.Arrays; +import java.util.ConcurrentModificationException; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + /** - * Basic in-memory heap structure. + * Advanced priority queue class, based on a binary heap (for small sizes), + * which will for larger heaps be accompanied by a 4-ary heap (attached below + * the root of the two-ary heap, making the root actually 3-ary). + * + * This code was automatically instantiated for the type: Integer * - * This heap is built lazily: if you first add many elements, then poll the - * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log - * n). This is implemented via a simple validTo counter. + * This combination was found to work quite well in benchmarks, but YMMV. + * + * Some other observations from benchmarking: + * <ul> + * <li>Bulk loading did not improve things</li> + * <li>Primitive heaps are substantially faster.</li> + * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and + * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> + * <li>Workload makes a huge difference. A load-once, poll-until-empty priority + * queue is something different than e.g. a top-k heap, which will see a lot of + * top element replacements.</li> + * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for + * top-k make a difference.</li> + * <li>Different day, different benchmark results ...</li> + * </ul> * * @author Erich Schubert + * + * @apiviz.has UnsortedIter */ -public class IntegerMaxHeap extends IntegerHeap { +public class IntegerMaxHeap implements IntegerHeap { + /** + * Base heap. + */ + protected int[] twoheap; + + /** + * Extension heap. + */ + protected int[] fourheap; + + /** + * Current size of heap. + */ + protected int size; + + /** + * (Structural) modification counter. Used to invalidate iterators. + */ + protected int modCount = 0; + + /** + * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. + */ + private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; + /** - * Constructor with default capacity. + * Initial size of the 2-ary heap. + */ + private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; + + /** + * Initial size of 4-ary heap when initialized. + * + * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 + * + * 85 = 4-ary heap of height 3: 21 + 4*4*4 + * + * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 + * + * Since we last grew by 255 (to 511), let's use 341. + */ + private final static int FOUR_HEAP_INITIAL_SIZE = 341; + + /** + * Constructor, with default size. */ public IntegerMaxHeap() { - super(DEFAULT_INITIAL_CAPACITY); + super(); + int[] twoheap = new int[TWO_HEAP_INITIAL_SIZE]; + + this.twoheap = twoheap; + this.fourheap = null; + this.size = 0; + this.modCount = 0; } /** - * Constructor with initial capacity. + * Constructor, with given minimum size. * - * @param size initial capacity + * @param minsize Minimum size */ - public IntegerMaxHeap(int size) { - super(size); + public IntegerMaxHeap(int minsize) { + super(); + if (minsize < TWO_HEAP_MAX_SIZE) { + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + int[] twoheap = new int[size]; + + this.twoheap = twoheap; + this.fourheap = null; + } else { + int[] twoheap = new int[TWO_HEAP_INITIAL_SIZE]; + int[] fourheap = new int[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + this.twoheap = twoheap; + this.fourheap = fourheap; + } + this.size = 0; + this.modCount = 0; + } + + @Override + public void clear() { + size = 0; + ++modCount; + fourheap = null; + Arrays.fill(twoheap, 0); + } + + @Override + public int size() { + return size; + } + + @Override + public boolean isEmpty() { + return (size == 0); + } + + @Override + public void add(int o) { + final int co = o; + // System.err.println("Add: " + o); + if (size < TWO_HEAP_MAX_SIZE) { + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); + } + final int twopos = size; + twoheap[twopos] = co; + ++size; + heapifyUp2(twopos, co); + ++modCount; + } else { + final int fourpos = size - TWO_HEAP_MAX_SIZE; + if (fourheap == null) { + fourheap = new int[FOUR_HEAP_INITIAL_SIZE]; + } else if (fourpos >= fourheap.length) { + // Grow extension heap by half. + fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); + } + fourheap[fourpos] = co; + ++size; + heapifyUp4(fourpos, co); + ++modCount; + } + } + + @Override + public void add(int key, int max) { + if (size < max) { + add(key); + } else if (twoheap[0] >= key) { + replaceTopElement(key); + } + } + + @Override + public int replaceTopElement(int reinsert) { + final int ret = twoheap[0]; + heapifyDown( reinsert); + ++modCount; + return ret; + } + + /** + * Heapify-Up method for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + */ + private void heapifyUp2(int twopos, int cur) { + while (twopos > 0) { + final int parent = (twopos - 1) >>> 1; + int par = twoheap[parent]; + if (cur <= par) { + break; + } + twoheap[twopos] = par; + twopos = parent; + } + twoheap[twopos] = cur; + } + + /** + * Heapify-Up method for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + */ + private void heapifyUp4(int fourpos, int cur) { + while (fourpos > 0) { + final int parent = (fourpos - 1) >> 2; + int par = fourheap[parent]; + if (cur <= par) { + break; + } + fourheap[fourpos] = par; + fourpos = parent; + } + if (fourpos == 0 && twoheap[0] < cur) { + fourheap[0] = twoheap[0]; + twoheap[0] = cur; + } else { + fourheap[fourpos] = cur; + } + } + + @Override + public int poll() { + final int ret = twoheap[0]; + --size; + // Replacement object: + if (size >= TWO_HEAP_MAX_SIZE) { + final int last = size - TWO_HEAP_MAX_SIZE; + final int reinsert = fourheap[last]; + fourheap[last] = 0; + heapifyDown(reinsert); + } else if (size > 0) { + final int reinsert = twoheap[size]; + twoheap[size] = 0; + heapifyDown(reinsert); + } else { + twoheap[0] = 0; + } + ++modCount; + return ret; + } + + /** + * Invoke heapify-down for the root object. + * + * @param reinsert Object to insert. + */ + private void heapifyDown(int reinsert) { + if (size > TWO_HEAP_MAX_SIZE) { + // Special case: 3-ary situation. + final int best = (twoheap[1] >= twoheap[2]) ? 1 : 2; + if (fourheap[0] > twoheap[best]) { + twoheap[0] = fourheap[0]; + heapifyDown4(0, reinsert); + } else { + twoheap[0] = twoheap[best]; + heapifyDown2(best, reinsert); + } + return; + } + heapifyDown2(0, reinsert); + } + + /** + * Heapify-Down for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + */ + private void heapifyDown2(int twopos, int cur) { + final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + while (twopos < stop) { + int bestchild = (twopos << 1) + 1; + int best = twoheap[bestchild]; + final int right = bestchild + 1; + if (right < size && best < twoheap[right]) { + bestchild = right; + best = twoheap[right]; + } + if (cur >= best) { + break; + } + twoheap[twopos] = best; + twopos = bestchild; + } + twoheap[twopos] = cur; } /** - * Compare two objects + * Heapify-Down for 4-ary heap. * - * @param o1 First object - * @param o2 Second object + * @param fourpos Position in 4-ary heap. + * @param cur Current object */ + private void heapifyDown4(int fourpos, int cur) { + final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; + while (fourpos < stop) { + final int child = (fourpos << 2) + 1; + int best = fourheap[child]; + int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; + if (size > minsize) { + int nextchild = fourheap[candidate]; + if (best < nextchild) { + bestchild = candidate; + best = nextchild; + } + + minsize += 2; + if (size >= minsize) { + nextchild = fourheap[++candidate]; + if (best < nextchild) { + bestchild = candidate; + best = nextchild; + } + + if (size > minsize) { + nextchild = fourheap[++candidate]; + if (best < nextchild) { + bestchild = candidate; + best = nextchild; + } + } + } + } + if (cur >= best) { + break; + } + fourheap[fourpos] = best; + fourpos = bestchild; + } + fourheap[fourpos] = cur; + } + + @Override + public int peek() { + return twoheap[0]; + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append(IntegerMaxHeap.class.getSimpleName()).append(" ["); + for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) { + buf.append(iter.get()).append(','); + } + buf.append(']'); + return buf.toString(); + } + @Override - protected boolean comp(int o1, int o2) { - return o1 < o2; + public UnsortedIter unsortedIter() { + return new UnsortedIter(); + } + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * Use this class as follows: + * + * <pre> + * {@code + * for (IntegerHeap.UnsortedIter iter = heap.unsortedIter(); iter.valid(); iter.next()) { + * doSomething(iter.get()); + * } + * } + * </pre> + * + * @author Erich Schubert + */ + private class UnsortedIter implements IntegerHeap.UnsortedIter { + /** + * Iterator position. + */ + protected int pos = 0; + + /** + * Modification counter we were initialized at. + */ + protected final int myModCount = modCount; + + @Override + public boolean valid() { + if (modCount != myModCount) { + throw new ConcurrentModificationException(); + } + return pos < size; + } + + @Override + public void advance() { + pos++; + } + + @Override + public int get() { + return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + } } } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMinHeap.java index f81fe275..c352ece4 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMinHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMinHeap.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,40 +23,400 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; along with this program. If not, see <http://www.gnu.org/licenses/>. */ +import java.util.Arrays; +import java.util.ConcurrentModificationException; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + /** - * Basic in-memory heap structure. + * Advanced priority queue class, based on a binary heap (for small sizes), + * which will for larger heaps be accompanied by a 4-ary heap (attached below + * the root of the two-ary heap, making the root actually 3-ary). + * + * This code was automatically instantiated for the type: Integer * - * This heap is built lazily: if you first add many elements, then poll the - * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log - * n). This is implemented via a simple validTo counter. + * This combination was found to work quite well in benchmarks, but YMMV. + * + * Some other observations from benchmarking: + * <ul> + * <li>Bulk loading did not improve things</li> + * <li>Primitive heaps are substantially faster.</li> + * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and + * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> + * <li>Workload makes a huge difference. A load-once, poll-until-empty priority + * queue is something different than e.g. a top-k heap, which will see a lot of + * top element replacements.</li> + * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for + * top-k make a difference.</li> + * <li>Different day, different benchmark results ...</li> + * </ul> * * @author Erich Schubert + * + * @apiviz.has UnsortedIter */ -public class IntegerMinHeap extends IntegerHeap { +public class IntegerMinHeap implements IntegerHeap { + /** + * Base heap. + */ + protected int[] twoheap; + + /** + * Extension heap. + */ + protected int[] fourheap; + + /** + * Current size of heap. + */ + protected int size; + + /** + * (Structural) modification counter. Used to invalidate iterators. + */ + protected int modCount = 0; + + /** + * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. + */ + private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; + /** - * Constructor with default capacity. + * Initial size of the 2-ary heap. + */ + private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; + + /** + * Initial size of 4-ary heap when initialized. + * + * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 + * + * 85 = 4-ary heap of height 3: 21 + 4*4*4 + * + * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 + * + * Since we last grew by 255 (to 511), let's use 341. + */ + private final static int FOUR_HEAP_INITIAL_SIZE = 341; + + /** + * Constructor, with default size. */ public IntegerMinHeap() { - super(DEFAULT_INITIAL_CAPACITY); + super(); + int[] twoheap = new int[TWO_HEAP_INITIAL_SIZE]; + + this.twoheap = twoheap; + this.fourheap = null; + this.size = 0; + this.modCount = 0; } /** - * Constructor with initial capacity. + * Constructor, with given minimum size. * - * @param size initial capacity + * @param minsize Minimum size */ - public IntegerMinHeap(int size) { - super(size); + public IntegerMinHeap(int minsize) { + super(); + if (minsize < TWO_HEAP_MAX_SIZE) { + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + int[] twoheap = new int[size]; + + this.twoheap = twoheap; + this.fourheap = null; + } else { + int[] twoheap = new int[TWO_HEAP_INITIAL_SIZE]; + int[] fourheap = new int[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + this.twoheap = twoheap; + this.fourheap = fourheap; + } + this.size = 0; + this.modCount = 0; + } + + @Override + public void clear() { + size = 0; + ++modCount; + fourheap = null; + Arrays.fill(twoheap, 0); + } + + @Override + public int size() { + return size; + } + + @Override + public boolean isEmpty() { + return (size == 0); + } + + @Override + public void add(int o) { + final int co = o; + // System.err.println("Add: " + o); + if (size < TWO_HEAP_MAX_SIZE) { + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); + } + final int twopos = size; + twoheap[twopos] = co; + ++size; + heapifyUp2(twopos, co); + ++modCount; + } else { + final int fourpos = size - TWO_HEAP_MAX_SIZE; + if (fourheap == null) { + fourheap = new int[FOUR_HEAP_INITIAL_SIZE]; + } else if (fourpos >= fourheap.length) { + // Grow extension heap by half. + fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); + } + fourheap[fourpos] = co; + ++size; + heapifyUp4(fourpos, co); + ++modCount; + } + } + + @Override + public void add(int key, int max) { + if (size < max) { + add(key); + } else if (twoheap[0] <= key) { + replaceTopElement(key); + } + } + + @Override + public int replaceTopElement(int reinsert) { + final int ret = twoheap[0]; + heapifyDown( reinsert); + ++modCount; + return ret; + } + + /** + * Heapify-Up method for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + */ + private void heapifyUp2(int twopos, int cur) { + while (twopos > 0) { + final int parent = (twopos - 1) >>> 1; + int par = twoheap[parent]; + if (cur >= par) { + break; + } + twoheap[twopos] = par; + twopos = parent; + } + twoheap[twopos] = cur; + } + + /** + * Heapify-Up method for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + */ + private void heapifyUp4(int fourpos, int cur) { + while (fourpos > 0) { + final int parent = (fourpos - 1) >> 2; + int par = fourheap[parent]; + if (cur >= par) { + break; + } + fourheap[fourpos] = par; + fourpos = parent; + } + if (fourpos == 0 && twoheap[0] > cur) { + fourheap[0] = twoheap[0]; + twoheap[0] = cur; + } else { + fourheap[fourpos] = cur; + } + } + + @Override + public int poll() { + final int ret = twoheap[0]; + --size; + // Replacement object: + if (size >= TWO_HEAP_MAX_SIZE) { + final int last = size - TWO_HEAP_MAX_SIZE; + final int reinsert = fourheap[last]; + fourheap[last] = 0; + heapifyDown(reinsert); + } else if (size > 0) { + final int reinsert = twoheap[size]; + twoheap[size] = 0; + heapifyDown(reinsert); + } else { + twoheap[0] = 0; + } + ++modCount; + return ret; + } + + /** + * Invoke heapify-down for the root object. + * + * @param reinsert Object to insert. + */ + private void heapifyDown(int reinsert) { + if (size > TWO_HEAP_MAX_SIZE) { + // Special case: 3-ary situation. + final int best = (twoheap[1] <= twoheap[2]) ? 1 : 2; + if (fourheap[0] < twoheap[best]) { + twoheap[0] = fourheap[0]; + heapifyDown4(0, reinsert); + } else { + twoheap[0] = twoheap[best]; + heapifyDown2(best, reinsert); + } + return; + } + heapifyDown2(0, reinsert); + } + + /** + * Heapify-Down for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + */ + private void heapifyDown2(int twopos, int cur) { + final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + while (twopos < stop) { + int bestchild = (twopos << 1) + 1; + int best = twoheap[bestchild]; + final int right = bestchild + 1; + if (right < size && best > twoheap[right]) { + bestchild = right; + best = twoheap[right]; + } + if (cur <= best) { + break; + } + twoheap[twopos] = best; + twopos = bestchild; + } + twoheap[twopos] = cur; } /** - * Compare two objects + * Heapify-Down for 4-ary heap. * - * @param o1 First object - * @param o2 Second object + * @param fourpos Position in 4-ary heap. + * @param cur Current object */ + private void heapifyDown4(int fourpos, int cur) { + final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; + while (fourpos < stop) { + final int child = (fourpos << 2) + 1; + int best = fourheap[child]; + int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; + if (size > minsize) { + int nextchild = fourheap[candidate]; + if (best > nextchild) { + bestchild = candidate; + best = nextchild; + } + + minsize += 2; + if (size >= minsize) { + nextchild = fourheap[++candidate]; + if (best > nextchild) { + bestchild = candidate; + best = nextchild; + } + + if (size > minsize) { + nextchild = fourheap[++candidate]; + if (best > nextchild) { + bestchild = candidate; + best = nextchild; + } + } + } + } + if (cur <= best) { + break; + } + fourheap[fourpos] = best; + fourpos = bestchild; + } + fourheap[fourpos] = cur; + } + + @Override + public int peek() { + return twoheap[0]; + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append(IntegerMinHeap.class.getSimpleName()).append(" ["); + for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) { + buf.append(iter.get()).append(','); + } + buf.append(']'); + return buf.toString(); + } + @Override - protected boolean comp(int o1, int o2) { - return o1 > o2; + public UnsortedIter unsortedIter() { + return new UnsortedIter(); + } + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * Use this class as follows: + * + * <pre> + * {@code + * for (IntegerHeap.UnsortedIter iter = heap.unsortedIter(); iter.valid(); iter.next()) { + * doSomething(iter.get()); + * } + * } + * </pre> + * + * @author Erich Schubert + */ + private class UnsortedIter implements IntegerHeap.UnsortedIter { + /** + * Iterator position. + */ + protected int pos = 0; + + /** + * Modification counter we were initialized at. + */ + protected final int myModCount = modCount; + + @Override + public boolean valid() { + if (modCount != myModCount) { + throw new ConcurrentModificationException(); + } + return pos < size; + } + + @Override + public void advance() { + pos++; + } + + @Override + public int get() { + return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + } } } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectHeap.java new file mode 100644 index 00000000..01f7aea0 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectHeap.java @@ -0,0 +1,129 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.utilities.datastructures.iterator.Iter; + +/** + * Basic in-memory heap interface, for int keys and V values. + * + * @author Erich Schubert + * + * @apiviz.has UnsortedIter + * @param <V> Value type + */ +public interface IntegerObjectHeap<V> { + /** + * Add a key-value pair to the heap + * + * @param key Key + * @param val Value + */ + void add(int key, V val); + + /** + * Add a key-value pair to the heap if it improves the top. + * + * @param key Key + * @param val Value + * @param k Desired maximum size + */ + void add(int key, V val, int k); + + /** + * Combined operation that removes the top element, and inserts a new element + * instead. + * + * @param key Key of new element + * @param val Value of new element + */ + void replaceTopElement(int key, V val); + + /** + * Get the current top key + * + * @return Top key + */ + int peekKey(); + + /** + * Get the current top value + * + * @return Value + */ + V peekValue(); + + /** + * Remove the first element + */ + void poll(); + + /** + * Clear the heap contents. + */ + void clear(); + + /** + * Query the size + * + * @return Size + */ + public int size(); + + /** + * Is the heap empty? + * + * @return {@code true} when the size is 0. + */ + public boolean isEmpty(); + + /** + * Get an unsorted iterator to inspect the heap. + * + * @return Iterator + */ + UnsortedIter<V> unsortedIter(); + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * @author Erich Schubert + * @param <V> Value type + */ + public static interface UnsortedIter<V> extends Iter { + /** + * Get the current key + * + * @return Current key + */ + int getKey(); + + /** + * Get the current value + * + * @return Current value + */ + V getValue(); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectMaxHeap.java new file mode 100644 index 00000000..93a4e75a --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectMaxHeap.java @@ -0,0 +1,482 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Arrays; +import java.util.ConcurrentModificationException; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + +/** + * Advanced priority queue class, based on a binary heap (for small sizes), + * which will for larger heaps be accompanied by a 4-ary heap (attached below + * the root of the two-ary heap, making the root actually 3-ary). + * + * This code was automatically instantiated for the types: Integer and Object + * + * This combination was found to work quite well in benchmarks, but YMMV. + * + * Some other observations from benchmarking: + * <ul> + * <li>Bulk loading did not improve things</li> + * <li>Primitive heaps are substantially faster.</li> + * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and + * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> + * <li>Workload makes a huge difference. A load-once, poll-until-empty priority + * queue is something different than e.g. a top-k heap, which will see a lot of + * top element replacements.</li> + * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for + * top-k make a difference.</li> + * <li>Different day, different benchmark results ...</li> + * </ul> + * + * @author Erich Schubert + * + * @apiviz.has UnsortedIter + * @param <V> Value type + */ +public class IntegerObjectMaxHeap<V> implements IntegerObjectHeap<V> { + /** + * Base heap. + */ + protected int[] twoheap; + + /** + * Base heap values. + */ + protected Object[] twovals; + + /** + * Extension heap. + */ + protected int[] fourheap; + + /** + * Extension heapvalues. + */ + protected Object[] fourvals; + + /** + * Current size of heap. + */ + protected int size; + + /** + * (Structural) modification counter. Used to invalidate iterators. + */ + protected int modCount = 0; + + /** + * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. + */ + private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; + + /** + * Initial size of the 2-ary heap. + */ + private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; + + /** + * Initial size of 4-ary heap when initialized. + * + * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 + * + * 85 = 4-ary heap of height 3: 21 + 4*4*4 + * + * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 + * + * Since we last grew by 255 (to 511), let's use 341. + */ + private final static int FOUR_HEAP_INITIAL_SIZE = 341; + + /** + * Constructor, with default size. + */ + public IntegerObjectMaxHeap() { + super(); + int[] twoheap = new int[TWO_HEAP_INITIAL_SIZE]; + Object[] twovals = new Object[TWO_HEAP_INITIAL_SIZE]; + + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = null; + this.fourvals = null; + this.size = 0; + this.modCount = 0; + } + + /** + * Constructor, with given minimum size. + * + * @param minsize Minimum size + */ + public IntegerObjectMaxHeap(int minsize) { + super(); + if (minsize < TWO_HEAP_MAX_SIZE) { + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + int[] twoheap = new int[size]; + Object[] twovals = new Object[size]; + + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = null; + this.fourvals = null; + } else { + int[] twoheap = new int[TWO_HEAP_INITIAL_SIZE]; + Object[] twovals = new Object[TWO_HEAP_INITIAL_SIZE]; + int[] fourheap = new int[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + Object[] fourvals = new Object[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = fourheap; + this.fourvals = fourvals; + } + this.size = 0; + this.modCount = 0; + } + + @Override + public void clear() { + size = 0; + ++modCount; + fourheap = null; + fourvals = null; + Arrays.fill(twoheap, 0); + Arrays.fill(twovals, null); + } + + @Override + public int size() { + return size; + } + + @Override + public boolean isEmpty() { + return (size == 0); + } + + @Override + public void add(int o, V v) { + final int co = o; + final Object cv = (Object)v; + // System.err.println("Add: " + o); + if (size < TWO_HEAP_MAX_SIZE) { + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); + twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1); + } + final int twopos = size; + twoheap[twopos] = co; + twovals[twopos] = cv; + ++size; + heapifyUp2(twopos, co, cv); + ++modCount; + } else { + final int fourpos = size - TWO_HEAP_MAX_SIZE; + if (fourheap == null) { + fourheap = new int[FOUR_HEAP_INITIAL_SIZE]; + fourvals = new Object[FOUR_HEAP_INITIAL_SIZE]; + } else if (fourpos >= fourheap.length) { + // Grow extension heap by half. + fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); + fourvals = Arrays.copyOf(fourvals, fourvals.length + (fourvals.length >> 1)); + } + fourheap[fourpos] = co; + fourvals[fourpos] = cv; + ++size; + heapifyUp4(fourpos, co, cv); + ++modCount; + } + } + + @Override + public void add(int key, V val, int max) { + if (size < max) { + add(key, val); + } else if (twoheap[0] >= key) { + replaceTopElement(key, val); + } + } + + @Override + public void replaceTopElement(int reinsert, V val) { + heapifyDown(reinsert, (Object)val); + ++modCount; + } + + /** + * Heapify-Up method for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + * @param val Current value + */ + private void heapifyUp2(int twopos, int cur, Object val) { + while (twopos > 0) { + final int parent = (twopos - 1) >>> 1; + int par = twoheap[parent]; + if (cur <= par) { + break; + } + twoheap[twopos] = par; + twovals[twopos] = twovals[parent]; + twopos = parent; + } + twoheap[twopos] = cur; + twovals[twopos] = val; + } + + /** + * Heapify-Up method for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + * @param val Current value + */ + private void heapifyUp4(int fourpos, int cur, Object val) { + while (fourpos > 0) { + final int parent = (fourpos - 1) >> 2; + int par = fourheap[parent]; + if (cur <= par) { + break; + } + fourheap[fourpos] = par; + fourvals[fourpos] = fourvals[parent]; + fourpos = parent; + } + if (fourpos == 0 && twoheap[0] < cur) { + fourheap[0] = twoheap[0]; + fourvals[0] = twovals[0]; + twoheap[0] = cur; + twovals[0] = val; + } else { + fourheap[fourpos] = cur; + fourvals[fourpos] = val; + } + } + + @Override + public void poll() { + --size; + // Replacement object: + if (size >= TWO_HEAP_MAX_SIZE) { + final int last = size - TWO_HEAP_MAX_SIZE; + final int reinsert = fourheap[last]; + final Object reinsertv = fourvals[last]; + fourheap[last] = 0; + fourvals[last] = null; + heapifyDown(reinsert, reinsertv); + } else if (size > 0) { + final int reinsert = twoheap[size]; + final Object reinsertv = twovals[size]; + twoheap[size] = 0; + twovals[size] = null; + heapifyDown(reinsert, reinsertv); + } else { + twoheap[0] = 0; + twovals[0] = null; + } + ++modCount; + } + + /** + * Invoke heapify-down for the root object. + * + * @param reinsert Object to insert. + * @param val Value to reinsert. + */ + private void heapifyDown(int reinsert, Object val) { + if (size > TWO_HEAP_MAX_SIZE) { + // Special case: 3-ary situation. + final int best = (twoheap[1] >= twoheap[2]) ? 1 : 2; + if (fourheap[0] > twoheap[best]) { + twoheap[0] = fourheap[0]; + twovals[0] = fourvals[0]; + heapifyDown4(0, reinsert, val); + } else { + twoheap[0] = twoheap[best]; + twovals[0] = twovals[best]; + heapifyDown2(best, reinsert, val); + } + return; + } + heapifyDown2(0, reinsert, val); + } + + /** + * Heapify-Down for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + * @param val Value to reinsert. + */ + private void heapifyDown2(int twopos, int cur, Object val) { + final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + while (twopos < stop) { + int bestchild = (twopos << 1) + 1; + int best = twoheap[bestchild]; + final int right = bestchild + 1; + if (right < size && best < twoheap[right]) { + bestchild = right; + best = twoheap[right]; + } + if (cur >= best) { + break; + } + twoheap[twopos] = best; + twovals[twopos] = twovals[bestchild]; + twopos = bestchild; + } + twoheap[twopos] = cur; + twovals[twopos] = val; + } + + /** + * Heapify-Down for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + * @param val Value to reinsert. + */ + private void heapifyDown4(int fourpos, int cur, Object val) { + final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; + while (fourpos < stop) { + final int child = (fourpos << 2) + 1; + int best = fourheap[child]; + int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; + if (size > minsize) { + int nextchild = fourheap[candidate]; + if (best < nextchild) { + bestchild = candidate; + best = nextchild; + } + + minsize += 2; + if (size >= minsize) { + nextchild = fourheap[++candidate]; + if (best < nextchild) { + bestchild = candidate; + best = nextchild; + } + + if (size > minsize) { + nextchild = fourheap[++candidate]; + if (best < nextchild) { + bestchild = candidate; + best = nextchild; + } + } + } + } + if (cur >= best) { + break; + } + fourheap[fourpos] = best; + fourvals[fourpos] = fourvals[bestchild]; + fourpos = bestchild; + } + fourheap[fourpos] = cur; + fourvals[fourpos] = val; + } + + @Override + public int peekKey() { + return twoheap[0]; + } + + @Override + @SuppressWarnings("unchecked") + public V peekValue() { + return (V)twovals[0]; + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append(IntegerObjectMaxHeap.class.getSimpleName()).append(" ["); + for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) { + buf.append(iter.getKey()).append(':').append(iter.getValue()).append(','); + } + buf.append(']'); + return buf.toString(); + } + + @Override + public UnsortedIter unsortedIter() { + return new UnsortedIter(); + } + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * Use this class as follows: + * + * <pre> + * {@code + * for (IntegerObjectHeap.UnsortedIter<V> iter = heap.unsortedIter(); iter.valid(); iter.next()) { + * doSomething(iter.get()); + * } + * } + * </pre> + * + * @author Erich Schubert + */ + private class UnsortedIter implements IntegerObjectHeap.UnsortedIter<V> { + /** + * Iterator position. + */ + protected int pos = 0; + + /** + * Modification counter we were initialized at. + */ + protected final int myModCount = modCount; + + @Override + public boolean valid() { + if (modCount != myModCount) { + throw new ConcurrentModificationException(); + } + return pos < size; + } + + @Override + public void advance() { + pos++; + } + + @Override + public int getKey() { + return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + } + + @SuppressWarnings("unchecked") + + @Override + public V getValue() { + return (V)((pos < TWO_HEAP_MAX_SIZE) ? twovals[pos] : fourvals[pos - TWO_HEAP_MAX_SIZE]); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectMinHeap.java new file mode 100644 index 00000000..e54c7d28 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectMinHeap.java @@ -0,0 +1,482 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Arrays; +import java.util.ConcurrentModificationException; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + +/** + * Advanced priority queue class, based on a binary heap (for small sizes), + * which will for larger heaps be accompanied by a 4-ary heap (attached below + * the root of the two-ary heap, making the root actually 3-ary). + * + * This code was automatically instantiated for the types: Integer and Object + * + * This combination was found to work quite well in benchmarks, but YMMV. + * + * Some other observations from benchmarking: + * <ul> + * <li>Bulk loading did not improve things</li> + * <li>Primitive heaps are substantially faster.</li> + * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and + * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li> + * <li>Workload makes a huge difference. A load-once, poll-until-empty priority + * queue is something different than e.g. a top-k heap, which will see a lot of + * top element replacements.</li> + * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for + * top-k make a difference.</li> + * <li>Different day, different benchmark results ...</li> + * </ul> + * + * @author Erich Schubert + * + * @apiviz.has UnsortedIter + * @param <V> Value type + */ +public class IntegerObjectMinHeap<V> implements IntegerObjectHeap<V> { + /** + * Base heap. + */ + protected int[] twoheap; + + /** + * Base heap values. + */ + protected Object[] twovals; + + /** + * Extension heap. + */ + protected int[] fourheap; + + /** + * Extension heapvalues. + */ + protected Object[] fourvals; + + /** + * Current size of heap. + */ + protected int size; + + /** + * (Structural) modification counter. Used to invalidate iterators. + */ + protected int modCount = 0; + + /** + * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements. + */ + private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1; + + /** + * Initial size of the 2-ary heap. + */ + private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1; + + /** + * Initial size of 4-ary heap when initialized. + * + * 21 = 4-ary heap of height 2: 1 + 4 + 4*4 + * + * 85 = 4-ary heap of height 3: 21 + 4*4*4 + * + * 341 = 4-ary heap of height 4: 85 + 4*4*4*4 + * + * Since we last grew by 255 (to 511), let's use 341. + */ + private final static int FOUR_HEAP_INITIAL_SIZE = 341; + + /** + * Constructor, with default size. + */ + public IntegerObjectMinHeap() { + super(); + int[] twoheap = new int[TWO_HEAP_INITIAL_SIZE]; + Object[] twovals = new Object[TWO_HEAP_INITIAL_SIZE]; + + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = null; + this.fourvals = null; + this.size = 0; + this.modCount = 0; + } + + /** + * Constructor, with given minimum size. + * + * @param minsize Minimum size + */ + public IntegerObjectMinHeap(int minsize) { + super(); + if (minsize < TWO_HEAP_MAX_SIZE) { + final int size = MathUtil.nextPow2Int(minsize + 1) - 1; + int[] twoheap = new int[size]; + Object[] twovals = new Object[size]; + + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = null; + this.fourvals = null; + } else { + int[] twoheap = new int[TWO_HEAP_INITIAL_SIZE]; + Object[] twovals = new Object[TWO_HEAP_INITIAL_SIZE]; + int[] fourheap = new int[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + Object[] fourvals = new Object[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)]; + this.twoheap = twoheap; + this.twovals = twovals; + this.fourheap = fourheap; + this.fourvals = fourvals; + } + this.size = 0; + this.modCount = 0; + } + + @Override + public void clear() { + size = 0; + ++modCount; + fourheap = null; + fourvals = null; + Arrays.fill(twoheap, 0); + Arrays.fill(twovals, null); + } + + @Override + public int size() { + return size; + } + + @Override + public boolean isEmpty() { + return (size == 0); + } + + @Override + public void add(int o, V v) { + final int co = o; + final Object cv = (Object)v; + // System.err.println("Add: " + o); + if (size < TWO_HEAP_MAX_SIZE) { + if (size >= twoheap.length) { + // Grow by one layer. + twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1); + twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1); + } + final int twopos = size; + twoheap[twopos] = co; + twovals[twopos] = cv; + ++size; + heapifyUp2(twopos, co, cv); + ++modCount; + } else { + final int fourpos = size - TWO_HEAP_MAX_SIZE; + if (fourheap == null) { + fourheap = new int[FOUR_HEAP_INITIAL_SIZE]; + fourvals = new Object[FOUR_HEAP_INITIAL_SIZE]; + } else if (fourpos >= fourheap.length) { + // Grow extension heap by half. + fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1)); + fourvals = Arrays.copyOf(fourvals, fourvals.length + (fourvals.length >> 1)); + } + fourheap[fourpos] = co; + fourvals[fourpos] = cv; + ++size; + heapifyUp4(fourpos, co, cv); + ++modCount; + } + } + + @Override + public void add(int key, V val, int max) { + if (size < max) { + add(key, val); + } else if (twoheap[0] <= key) { + replaceTopElement(key, val); + } + } + + @Override + public void replaceTopElement(int reinsert, V val) { + heapifyDown(reinsert, (Object)val); + ++modCount; + } + + /** + * Heapify-Up method for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + * @param val Current value + */ + private void heapifyUp2(int twopos, int cur, Object val) { + while (twopos > 0) { + final int parent = (twopos - 1) >>> 1; + int par = twoheap[parent]; + if (cur >= par) { + break; + } + twoheap[twopos] = par; + twovals[twopos] = twovals[parent]; + twopos = parent; + } + twoheap[twopos] = cur; + twovals[twopos] = val; + } + + /** + * Heapify-Up method for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + * @param val Current value + */ + private void heapifyUp4(int fourpos, int cur, Object val) { + while (fourpos > 0) { + final int parent = (fourpos - 1) >> 2; + int par = fourheap[parent]; + if (cur >= par) { + break; + } + fourheap[fourpos] = par; + fourvals[fourpos] = fourvals[parent]; + fourpos = parent; + } + if (fourpos == 0 && twoheap[0] > cur) { + fourheap[0] = twoheap[0]; + fourvals[0] = twovals[0]; + twoheap[0] = cur; + twovals[0] = val; + } else { + fourheap[fourpos] = cur; + fourvals[fourpos] = val; + } + } + + @Override + public void poll() { + --size; + // Replacement object: + if (size >= TWO_HEAP_MAX_SIZE) { + final int last = size - TWO_HEAP_MAX_SIZE; + final int reinsert = fourheap[last]; + final Object reinsertv = fourvals[last]; + fourheap[last] = 0; + fourvals[last] = null; + heapifyDown(reinsert, reinsertv); + } else if (size > 0) { + final int reinsert = twoheap[size]; + final Object reinsertv = twovals[size]; + twoheap[size] = 0; + twovals[size] = null; + heapifyDown(reinsert, reinsertv); + } else { + twoheap[0] = 0; + twovals[0] = null; + } + ++modCount; + } + + /** + * Invoke heapify-down for the root object. + * + * @param reinsert Object to insert. + * @param val Value to reinsert. + */ + private void heapifyDown(int reinsert, Object val) { + if (size > TWO_HEAP_MAX_SIZE) { + // Special case: 3-ary situation. + final int best = (twoheap[1] <= twoheap[2]) ? 1 : 2; + if (fourheap[0] < twoheap[best]) { + twoheap[0] = fourheap[0]; + twovals[0] = fourvals[0]; + heapifyDown4(0, reinsert, val); + } else { + twoheap[0] = twoheap[best]; + twovals[0] = twovals[best]; + heapifyDown2(best, reinsert, val); + } + return; + } + heapifyDown2(0, reinsert, val); + } + + /** + * Heapify-Down for 2-ary heap. + * + * @param twopos Position in 2-ary heap. + * @param cur Current object + * @param val Value to reinsert. + */ + private void heapifyDown2(int twopos, int cur, Object val) { + final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1; + while (twopos < stop) { + int bestchild = (twopos << 1) + 1; + int best = twoheap[bestchild]; + final int right = bestchild + 1; + if (right < size && best > twoheap[right]) { + bestchild = right; + best = twoheap[right]; + } + if (cur <= best) { + break; + } + twoheap[twopos] = best; + twovals[twopos] = twovals[bestchild]; + twopos = bestchild; + } + twoheap[twopos] = cur; + twovals[twopos] = val; + } + + /** + * Heapify-Down for 4-ary heap. + * + * @param fourpos Position in 4-ary heap. + * @param cur Current object + * @param val Value to reinsert. + */ + private void heapifyDown4(int fourpos, int cur, Object val) { + final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2; + while (fourpos < stop) { + final int child = (fourpos << 2) + 1; + int best = fourheap[child]; + int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE; + if (size > minsize) { + int nextchild = fourheap[candidate]; + if (best > nextchild) { + bestchild = candidate; + best = nextchild; + } + + minsize += 2; + if (size >= minsize) { + nextchild = fourheap[++candidate]; + if (best > nextchild) { + bestchild = candidate; + best = nextchild; + } + + if (size > minsize) { + nextchild = fourheap[++candidate]; + if (best > nextchild) { + bestchild = candidate; + best = nextchild; + } + } + } + } + if (cur <= best) { + break; + } + fourheap[fourpos] = best; + fourvals[fourpos] = fourvals[bestchild]; + fourpos = bestchild; + } + fourheap[fourpos] = cur; + fourvals[fourpos] = val; + } + + @Override + public int peekKey() { + return twoheap[0]; + } + + @Override + @SuppressWarnings("unchecked") + public V peekValue() { + return (V)twovals[0]; + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append(IntegerObjectMinHeap.class.getSimpleName()).append(" ["); + for (UnsortedIter iter = new UnsortedIter(); iter.valid(); iter.advance()) { + buf.append(iter.getKey()).append(':').append(iter.getValue()).append(','); + } + buf.append(']'); + return buf.toString(); + } + + @Override + public UnsortedIter unsortedIter() { + return new UnsortedIter(); + } + + /** + * Unsorted iterator - in heap order. Does not poll the heap. + * + * Use this class as follows: + * + * <pre> + * {@code + * for (IntegerObjectHeap.UnsortedIter<V> iter = heap.unsortedIter(); iter.valid(); iter.next()) { + * doSomething(iter.get()); + * } + * } + * </pre> + * + * @author Erich Schubert + */ + private class UnsortedIter implements IntegerObjectHeap.UnsortedIter<V> { + /** + * Iterator position. + */ + protected int pos = 0; + + /** + * Modification counter we were initialized at. + */ + protected final int myModCount = modCount; + + @Override + public boolean valid() { + if (modCount != myModCount) { + throw new ConcurrentModificationException(); + } + return pos < size; + } + + @Override + public void advance() { + pos++; + } + + @Override + public int getKey() { + return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]); + } + + @SuppressWarnings("unchecked") + + @Override + public V getValue() { + return (V)((pos < TWO_HEAP_MAX_SIZE) ? twovals[pos] : fourvals[pos - TWO_HEAP_MAX_SIZE]); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerPriorityObject.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerPriorityObject.java index 2014de65..f007b9fc 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerPriorityObject.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerPriorityObject.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ObjectHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ObjectHeap.java index 2e20ed56..b5dbbb0e 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ObjectHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ObjectHeap.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,53 +23,24 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; along with this program. If not, see <http://www.gnu.org/licenses/>. */ -import java.util.Arrays; - -import de.lmu.ifi.dbs.elki.math.MathUtil; +import de.lmu.ifi.dbs.elki.utilities.datastructures.iterator.Iter; /** - * Basic in-memory heap structure. - * - * This heap is built lazily: if you first add many elements, then poll the - * heap, it will be bulk-loaded in O(n) instead of iteratively built in O(n log - * n). This is implemented via a simple validTo counter. + * Basic in-memory heap for K values. * * @author Erich Schubert + * + * @apiviz.has UnsortedIter + * + * @param <K> Key type */ -public abstract class ObjectHeap<K> extends AbstractHeap { - /** - * Heap storage: queue - */ - protected transient Object[] queue; - - /** - * Constructor with initial capacity. - * - * @param size initial capacity - */ - public ObjectHeap(int size) { - super(); - this.size = 0; - this.queue = new Object[size]; - } - +public interface ObjectHeap<K> { /** * Add a key-value pair to the heap * * @param key Key */ - public void add(Object key) { - // resize when needed - if (size + 1 > queue.length) { - resize(size + 1); - } - // final int pos = size; - this.queue[size] = key; - this.size += 1; - heapifyUp(size - 1, key); - validSize += 1; - heapModified(); - } + void add(K key); /** * Add a key-value pair to the heap, except if the new element is larger than @@ -78,13 +49,7 @@ public abstract class ObjectHeap<K> extends AbstractHeap { * @param key Key * @param max Maximum size of heap */ - public void add(Object key, int max) { - if (size < max) { - add(key); - } else if (comp(key, peek())) { - replaceTopElement(key); - } - } + void add(K key, int max); /** * Combined operation that removes the top element, and inserts a new element @@ -93,175 +58,69 @@ public abstract class ObjectHeap<K> extends AbstractHeap { * @param e New element to insert * @return Previous top element of the heap */ - @SuppressWarnings("unchecked") - public Object replaceTopElement(Object e) { - ensureValid(); - Object oldroot = (K) queue[0]; - heapifyDown(0, e); - heapModified(); - return oldroot; - } + K replaceTopElement(K e); /** * Get the current top key * * @return Top key */ - @SuppressWarnings("unchecked") - public Object peek() { - if (size == 0) { - throw new ArrayIndexOutOfBoundsException("Peek() on an empty heap!"); - } - ensureValid(); - return (K) queue[0]; - } + K peek(); /** * Remove the first element * * @return Top element */ - public Object poll() { - return removeAt(0); - } + K poll(); /** - * Repair the heap + * Delete all elements from the heap. */ - protected void ensureValid() { - if (validSize != size) { - if (size > 1) { - // Parent of first invalid - int nextmin = validSize > 0 ? ((validSize - 1) >>> 1) : 0; - int curmin = MathUtil.nextAllOnesInt(nextmin); // Next line - int nextmax = curmin - 1; // End of valid line - int pos = (size - 2) >>> 1; // Parent of last element - // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin+", "+nextmin); - while (pos >= nextmin) { - // System.err.println(validSize+"<="+size+" iter:"+pos+"->"+curmin); - while (pos >= curmin) { - if (!heapifyDown(pos, queue[pos])) { - final int parent = (pos - 1) >>> 1; - if (parent < curmin) { - nextmin = Math.min(nextmin, parent); - nextmax = Math.max(nextmax, parent); - } - } - pos--; - } - curmin = nextmin; - pos = Math.min(pos, nextmax); - nextmax = -1; - } - } - validSize = size; - } - } + void clear(); /** - * Remove the element at the given position. + * Query the size * - * @param pos Element position. - * @return Removed element + * @return Size */ - @SuppressWarnings("unchecked") - protected Object removeAt(int pos) { - if (pos < 0 || pos >= size) { - return null; - } - final Object top = (K) queue[0]; - // Replacement object: - final Object reinkey = queue[size - 1]; - // Keep heap in sync - if (validSize == size) { - size -= 1; - validSize -= 1; - heapifyDown(pos, reinkey); - } else { - size -= 1; - validSize = Math.min(pos >>> 1, validSize); - queue[pos] = reinkey; - } - heapModified(); - return top; - } + public int size(); /** - * Execute a "Heapify Upwards" aka "SiftUp". Used in insertions. + * Is the heap empty? * - * @param pos insertion position - * @param curkey Current key + * @return {@code true} when the size is 0. */ - protected void heapifyUp(int pos, Object curkey) { - while (pos > 0) { - final int parent = (pos - 1) >>> 1; - Object parkey = queue[parent]; - - if (comp(curkey, parkey)) { // Compare - break; - } - queue[pos] = parkey; - pos = parent; - } - queue[pos] = curkey; - } + public boolean isEmpty(); /** - * Execute a "Heapify Downwards" aka "SiftDown". Used in deletions. + * Get an unsorted iterator to inspect the heap. * - * @param ipos re-insertion position - * @param curkey Current key - * @return true when the order was changed + * @return Iterator */ - protected boolean heapifyDown(final int ipos, Object curkey) { - int pos = ipos; - final int half = size >>> 1; - while (pos < half) { - // Get left child (must exist!) - int cpos = (pos << 1) + 1; - Object chikey = queue[cpos]; - // Test right child, if present - final int rchild = cpos + 1; - if (rchild < size) { - Object right = queue[rchild]; - if (comp(chikey, right)) { // Compare - cpos = rchild; - chikey = right; - } - } - - if (comp(chikey, curkey)) { // Compare - break; - } - queue[pos] = chikey; - pos = cpos; - } - queue[pos] = curkey; - return (pos == ipos); - } + UnsortedIter<K> unsortedIter(); /** - * Test whether we need to resize to have the requested capacity. + * Unsorted iterator - in heap order. Does not poll the heap. * - * @param requiredSize required capacity - */ - protected final void resize(int requiredSize) { - queue = Arrays.copyOf(queue, desiredSize(requiredSize, queue.length)); - } - - /** - * Delete all elements from the heap. + * <pre> + * {@code + * for (ObjectHeap.UnsortedIter<K> iter = heap.unsortedIter(); iter.valid(); iter.next()) { + * doSomething(iter.get()); + * } + * } + * </pre> + * + * @author Erich Schubert + * + * @param <K> Key type */ - @Override - public void clear() { - super.clear(); - for (int i = 0; i < size; i++) { - queue[i] = null; - } + public static interface UnsortedIter<K> extends Iter { + /** + * Get the iterators current object. + * + * @return Current object + */ + K get(); } - - /** - * Compare two objects - */ - abstract protected boolean comp(Object o1, Object o2); } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TiedTopBoundedHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TiedTopBoundedHeap.java index 2daaafa4..32f57999 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TiedTopBoundedHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TiedTopBoundedHeap.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -25,11 +25,8 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; import java.util.ArrayList; import java.util.Comparator; -import java.util.Iterator; import java.util.List; -import de.lmu.ifi.dbs.elki.utilities.iterator.MergedIterator; - /** * A size-limited heap similar to {@link TopBoundedHeap}, discarding elements * with the highest value. However, this variation keeps a list of tied @@ -43,7 +40,7 @@ public class TiedTopBoundedHeap<E> extends TopBoundedHeap<E> { /** * List to keep ties in. */ - private List<E> ties = new ArrayList<E>(); + private List<E> ties = new ArrayList<>(); /** * Constructor with comparator. @@ -75,12 +72,6 @@ public class TiedTopBoundedHeap<E> extends TopBoundedHeap<E> { ties.clear(); } - @SuppressWarnings("unchecked") - @Override - public Iterator<E> iterator() { - return new MergedIterator<E>(ties.iterator(), super.iterator()); - } - @Override public E peek() { if (ties.isEmpty()) { @@ -131,4 +122,44 @@ public class TiedTopBoundedHeap<E> extends TopBoundedHeap<E> { ties.clear(); } } + + /** + * Get an unordered heap iterator. + * + * @return Iterator. + */ + @Override + public UnorderedIter unorderedIter() { + return new UnorderedIter(); + } + + /** + * Unordered heap iterator class. + * + * @author Erich Schubert + * + */ + public class UnorderedIter extends Heap<E>.UnorderedIter { + /** + * Constructor. + */ + protected UnorderedIter() { + super(); + } + + @Override + public boolean valid() { + return pos < size(); + } + + @Override + public E get() { + final int ssize = TiedTopBoundedHeap.super.size(); + if (pos < ssize) { + return super.get(); + } else { + return ties.get(pos - ssize); + } + } + } } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TiedTopBoundedUpdatableHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TiedTopBoundedUpdatableHeap.java index 8e39af1d..3905030f 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TiedTopBoundedUpdatableHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TiedTopBoundedUpdatableHeap.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -29,7 +29,6 @@ import java.util.Iterator; import java.util.List; import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; -import de.lmu.ifi.dbs.elki.utilities.iterator.MergedIterator; /** * A size-limited heap similar to {@link TopBoundedHeap}, discarding elements @@ -44,7 +43,7 @@ public class TiedTopBoundedUpdatableHeap<E> extends TopBoundedUpdatableHeap<E> { /** * List to keep ties in. */ - private List<E> ties = new ArrayList<E>(); + private List<E> ties = new ArrayList<>(); /** * Constructor with comparator. @@ -76,12 +75,6 @@ public class TiedTopBoundedUpdatableHeap<E> extends TopBoundedUpdatableHeap<E> { ties.clear(); } - @SuppressWarnings("unchecked") - @Override - public Iterator<E> iterator() { - return new MergedIterator<E>(ties.iterator(), super.iterator()); - } - @Override public void offerAt(int pos, E e) { if(pos == IN_TIES) { diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TopBoundedHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TopBoundedHeap.java index 07b595f6..9adda9f3 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TopBoundedHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TopBoundedHeap.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -69,7 +69,6 @@ public class TopBoundedHeap<E> extends Heap<E> { return; } // Peek at the top element, return if we are worse. - ensureValid(); final int comp; if (comparator == null) { @SuppressWarnings("unchecked") diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TopBoundedUpdatableHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TopBoundedUpdatableHeap.java index 75f2abcf..4a591d4c 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TopBoundedUpdatableHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/TopBoundedUpdatableHeap.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -68,7 +68,6 @@ public class TopBoundedUpdatableHeap<E> extends UpdatableHeap<E> { super.offerAt(pos, e); return; } - ensureValid(); if (compare(e, queue[0]) < 0) { // while we did not change, this still was "successful". return; diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/UpdatableHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/UpdatableHeap.java index 1ab5f4df..a585d94d 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/UpdatableHeap.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/UpdatableHeap.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -49,7 +49,7 @@ public class UpdatableHeap<O> extends Heap<O> { /** * Holds the indices in the heap of each element. */ - protected final TObjectIntMap<Object> index = new TObjectIntHashMap<Object>(100, 0.5f, NO_VALUE); + protected final TObjectIntMap<Object> index = new TObjectIntHashMap<>(100, 0.5f, NO_VALUE); /** * Simple constructor with default size. @@ -105,43 +105,32 @@ public class UpdatableHeap<O> extends Heap<O> { * @param e Element */ protected void offerAt(final int pos, O e) { - if(pos == NO_VALUE) { + if (pos == NO_VALUE) { // resize when needed - if(size + 1 > queue.length) { + if (size + 1 > queue.length) { resize(size + 1); } - // final int pos = size; - this.queue[size] = e; index.put(e, size); - size += 1; - // We do NOT YET update the heap. This is done lazily. + size++; + heapifyUp(size - 1, e); heapModified(); return; - } - else { + } else { assert (pos >= 0) : "Unexpected negative position."; assert (queue[pos].equals(e)); // Did the value improve? - if(comparator == null) { + if (comparator == null) { @SuppressWarnings("unchecked") Comparable<Object> c = (Comparable<Object>) e; - if(c.compareTo(queue[pos]) >= 0) { + if (c.compareTo(queue[pos]) >= 0) { return; } - } - else { - if(comparator.compare(e, queue[pos]) >= 0) { + } else { + if (comparator.compare(e, queue[pos]) >= 0) { return; } } - if(pos >= validSize) { - queue[pos] = e; - // validSize = Math.min(pos, validSize); - } - else { - // ensureValid(); - heapifyUp(pos, e); - } + heapifyUp(pos, e); heapModified(); return; } @@ -149,7 +138,7 @@ public class UpdatableHeap<O> extends Heap<O> { @Override protected O removeAt(int pos) { - if(pos < 0 || pos >= size) { + if (pos < 0 || pos >= size) { return null; } @SuppressWarnings("unchecked") @@ -158,34 +147,22 @@ public class UpdatableHeap<O> extends Heap<O> { final Object reinsert = queue[size - 1]; queue[size - 1] = null; // Keep heap in sync? - if(validSize == size) { - size -= 1; - validSize -= 1; - if(comparator != null) { - if(comparator.compare(ret, reinsert) > 0) { - heapifyUpComparator(pos, reinsert); - } - else { - heapifyDownComparator(pos, reinsert); - } + size--; + if (comparator != null) { + if (comparator.compare(ret, reinsert) > 0) { + heapifyUpComparator(pos, reinsert); + } else { + heapifyDownComparator(pos, reinsert); } - else { - @SuppressWarnings("unchecked") - Comparable<Object> comp = (Comparable<Object>) ret; - if(comp.compareTo(reinsert) > 0) { - heapifyUpComparable(pos, reinsert); - } - else { - heapifyDownComparable(pos, reinsert); - } + } else { + @SuppressWarnings("unchecked") + Comparable<Object> comp = (Comparable<Object>) ret; + if (comp.compareTo(reinsert) > 0) { + heapifyUpComparable(pos, reinsert); + } else { + heapifyDownComparable(pos, reinsert); } } - else { - size -= 1; - validSize = Math.min(pos >>> 1, validSize); - queue[pos] = reinsert; - index.put(reinsert, pos); - } heapModified(); // Keep index up to date index.remove(ret); @@ -200,10 +177,9 @@ public class UpdatableHeap<O> extends Heap<O> { */ public O removeObject(O e) { int pos = index.get(e); - if(pos >= 0) { + if (pos >= 0) { return removeAt(pos); - } - else { + } else { return null; } } @@ -214,7 +190,7 @@ public class UpdatableHeap<O> extends Heap<O> { index.remove(node); return node; } - + @Override public O replaceTopElement(O e) { O node = super.replaceTopElement(e); @@ -232,11 +208,11 @@ public class UpdatableHeap<O> extends Heap<O> { @SuppressWarnings("unchecked") protected void heapifyUpComparable(int pos, Object elem) { final Comparable<Object> cur = (Comparable<Object>) elem; // queue[pos]; - while(pos > 0) { + while (pos > 0) { final int parent = (pos - 1) >>> 1; Object par = queue[parent]; - if(cur.compareTo(par) >= 0) { + if (cur.compareTo(par) >= 0) { break; } queue[pos] = par; @@ -255,11 +231,11 @@ public class UpdatableHeap<O> extends Heap<O> { */ @Override protected void heapifyUpComparator(int pos, Object cur) { - while(pos > 0) { + while (pos > 0) { final int parent = (pos - 1) >>> 1; Object par = queue[parent]; - if(comparator.compare(cur, par) >= 0) { + if (comparator.compare(cur, par) >= 0) { break; } queue[pos] = par; @@ -276,21 +252,21 @@ public class UpdatableHeap<O> extends Heap<O> { Comparable<Object> cur = (Comparable<Object>) reinsert; int pos = ipos; final int half = size >>> 1; - while(pos < half) { + while (pos < half) { // Get left child (must exist!) int cpos = (pos << 1) + 1; Object child = queue[cpos]; // Test right child, if present final int rchild = cpos + 1; - if(rchild < size) { + if (rchild < size) { Object right = queue[rchild]; - if(((Comparable<Object>) child).compareTo(right) > 0) { + if (((Comparable<Object>) child).compareTo(right) > 0) { cpos = rchild; child = right; } } - if(cur.compareTo(child) <= 0) { + if (cur.compareTo(child) <= 0) { break; } queue[pos] = child; @@ -299,32 +275,32 @@ public class UpdatableHeap<O> extends Heap<O> { } queue[pos] = cur; index.put(cur, pos); - return (pos == ipos); + return (pos != ipos); } @Override protected boolean heapifyDownComparator(final int ipos, Object cur) { int pos = ipos; final int half = size >>> 1; - while(pos < half) { + while (pos < half) { int min = pos; Object best = cur; final int lchild = (pos << 1) + 1; Object left = queue[lchild]; - if(comparator.compare(best, left) > 0) { + if (comparator.compare(best, left) > 0) { min = lchild; best = left; } final int rchild = lchild + 1; - if(rchild < size) { + if (rchild < size) { Object right = queue[rchild]; - if(comparator.compare(best, right) > 0) { + if (comparator.compare(best, right) > 0) { min = rchild; best = right; } } - if(min == pos) { + if (min == pos) { break; } queue[pos] = best; @@ -333,6 +309,6 @@ public class UpdatableHeap<O> extends Heap<O> { } queue[pos] = cur; index.put(cur, pos); - return (pos == ipos); + return (pos != ipos); } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/package-info.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/package-info.java index 3f193171..83be37f4 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/package-info.java @@ -5,7 +5,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2012 +Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/HashMapHierarchy.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/HashMapHierarchy.java new file mode 100644 index 00000000..c77a9329 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/HashMapHierarchy.java @@ -0,0 +1,580 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Arrays; +import java.util.HashMap; +import java.util.Iterator; + +/** + * Centralized hierarchy implementation, using a HashMap of Lists. + * + * @author Erich Schubert + * + * @param <O> Object type (arbitrary!) + */ +public class HashMapHierarchy<O> implements ModifiableHierarchy<O> { + /** + * Reference storage. + */ + final private HashMap<O, Rec<O>> graph; + + /** + * Constructor. + */ + public HashMapHierarchy() { + super(); + this.graph = new HashMap<>(); + } + + @Override + public int size() { + return graph.size(); + } + + @Override + public void add(O parent, O child) { + // Add child to parent. + { + Rec<O> rec = graph.get(parent); + if (rec == null) { + rec = new Rec<>(); + graph.put(parent, rec); + } + rec.addChild(child); + } + // Add child to parent + { + Rec<O> rec = graph.get(child); + if (rec == null) { + rec = new Rec<>(); + graph.put(child, rec); + } + rec.addParent(parent); + } + } + + @Override + public void add(O entry) { + Rec<O> rec = graph.get(entry); + if (rec == null) { + rec = new Rec<>(); + graph.put(entry, rec); + } + } + + @Override + public void remove(O parent, O child) { + // Remove child from parent. + { + Rec<O> rec = graph.get(parent); + if (rec != null) { + rec.removeChild(child); + } + } + // Remove parent from child + { + Rec<O> rec = graph.get(child); + if (rec != null) { + rec.removeParent(parent); + } + } + } + + @Override + public void remove(O entry) { + Rec<O> rec = graph.get(entry); + if (rec == null) { + return; + } + for (int i = 0; i < rec.nump; i++) { + graph.get(rec.parents[i]).removeChild(entry); + rec.parents[i] = null; + } + for (int i = 0; i < rec.numc; i++) { + graph.get(rec.children[i]).removeParent(entry); + rec.children[i] = null; + } + graph.remove(entry); + } + + @SuppressWarnings("unchecked") + @Override + public void removeSubtree(O entry) { + Rec<O> rec = graph.get(entry); + if (rec == null) { + return; + } + for (int i = 0; i < rec.nump; i++) { + graph.get(rec.parents[i]).removeChild(entry); + rec.parents[i] = null; + } + for (int i = 0; i < rec.numc; i++) { + final Rec<O> crec = graph.get(rec.children[i]); + crec.removeParent(entry); + if (crec.nump == 0) { + removeSubtree((O) rec.children[i]); + } + rec.children[i] = null; + } + } + + @Override + public int numChildren(O obj) { + Rec<O> rec = graph.get(obj); + if (rec == null) { + return 0; + } + return rec.numc; + } + + @SuppressWarnings("unchecked") + @Override + public Iter<O> iterChildren(O obj) { + Rec<O> rec = graph.get(obj); + if (rec == null) { + return (Iter<O>) EMPTY_ITERATOR; + } + return rec.iterChildren(); + } + + @Override + public Iter<O> iterDescendants(O obj) { + return new ItrDesc(obj); + } + + @Override + public int numParents(O obj) { + Rec<O> rec = graph.get(obj); + if (rec == null) { + return 0; + } + return rec.nump; + } + + @SuppressWarnings("unchecked") + @Override + public Iter<O> iterParents(O obj) { + Rec<O> rec = graph.get(obj); + if (rec == null) { + return (Iter<O>) EMPTY_ITERATOR; + } + return rec.iterParents(); + } + + @Override + public Iter<O> iterAncestors(O obj) { + return new ItrAnc(obj); + } + + @Override + public Iter<O> iterAll() { + return new ItrAll(); + } + + /** + * Hierarchy pointers for an object. + * + * @author Erich Schubert + * + * @apiviz.exclude + * + * @param <O> object type + */ + private static class Rec<O> { + /** + * Number of parents, number of children. + */ + int nump = 0, numc = 0; + + /** + * Parents. + */ + Object[] parents = null; + + /** + * Children. + */ + Object[] children = null; + + /** + * Add a parent. + * + * @param parent Parent to add. + */ + void addParent(O parent) { + if (parents == null) { + parents = new Object[1]; + parents[0] = parent; + nump = 1; + } else { + for (int i = 0; i < nump; i++) { + if (parent.equals(parents[i])) { + return; + } + } + if (parents.length == nump) { + final int newsize = Math.min(5, (parents.length << 1) + 1); + parents = Arrays.copyOf(parents, newsize); + } + parents[nump] = parent; + nump++; + } + } + + /** + * Add a child. + * + * @param child Child to add + */ + void addChild(O child) { + if (children == null) { + children = new Object[5]; + children[0] = child; + numc = 1; + } else { + for (int i = 0; i < numc; i++) { + if (child.equals(children[i])) { + return; + } + } + if (children.length == numc) { + children = Arrays.copyOf(children, (children.length << 1) + 1); + } + children[numc] = child; + numc++; + } + } + + /** + * Remove a parent. + * + * @param parent Parent to remove. + */ + void removeParent(O parent) { + if (parents == null) { + return; + } + for (int i = 0; i < nump; i++) { + if (parent.equals(parents[i])) { + System.arraycopy(parents, i + 1, parents, i, nump - 1 - i); + parents[nump] = null; + nump--; + break; + } + } + if (nump == 0) { + parents = null; + } + } + + /** + * Remove a child. + * + * @param child Child to remove. + */ + void removeChild(O child) { + if (children == null) { + return; + } + for (int i = 0; i < numc; i++) { + if (child.equals(children[i])) { + System.arraycopy(children, i + 1, children, i, numc - 1 - i); + children[numc] = null; + numc--; + break; + } + } + if (numc == 0) { + children = null; + } + } + + /** + * Iterate over parents. + * + * @return Iterator for parents. + */ + @SuppressWarnings("unchecked") + public Iter<O> iterParents() { + if (nump == 0) { + return (Iter<O>) EMPTY_ITERATOR; + } + return new ItrParents(); + } + + /** + * Iterate over parents. + * + * @return Iterator for parents. + */ + @SuppressWarnings("unchecked") + public Iter<O> iterChildren() { + if (numc == 0) { + return (Iter<O>) EMPTY_ITERATOR; + } + return new ItrChildren(); + } + + /** + * Parent iterator. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + class ItrParents implements Iter<O> { + int pos = 0; + + @Override + public boolean valid() { + return pos < nump; + } + + @Override + public void advance() { + pos++; + } + + @SuppressWarnings("unchecked") + @Override + public O get() { + return (O) parents[pos]; + } + } + + /** + * Child iterator. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + class ItrChildren implements Iter<O> { + int pos = 0; + + @Override + public boolean valid() { + return pos < numc; + } + + @Override + public void advance() { + pos++; + } + + @SuppressWarnings("unchecked") + @Override + public O get() { + return (O) children[pos]; + } + } + } + + /** + * Iterator to collect into the descendants. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + private class ItrDesc implements Iter<O> { + /** + * Iterator over children + */ + final Iter<O> childiter; + + /** + * Iterator of current child + */ + Iter<O> subiter = null; + + /** + * Starting element. + * + * @param start + */ + ItrDesc(O start) { + childiter = iterChildren(start); + } + + @Override + public boolean valid() { + return childiter.valid() || (subiter != null && subiter.valid()); + } + + @Override + public void advance() { + if (subiter == null) { // Not yet descended + assert (childiter.valid()); + subiter = iterDescendants(childiter.get()); + } else { // Continue with subtree + subiter.advance(); + } + if (subiter.valid()) { + return; + } + // Proceed to next child. + childiter.advance(); + subiter = null; + } + + @Override + public O get() { + if (subiter != null) { + assert (subiter.valid()); + return subiter.get(); + } else { + assert (childiter.valid()); + return childiter.get(); + } + } + } + + /** + * Iterator over all Ancestors. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + private class ItrAnc implements Iter<O> { + /** + * Iterator over children + */ + final Iter<O> parentiter; + + /** + * Iterator of current child + */ + Iter<O> subiter = null; + + /** + * Starting element. + * + * @param start + */ + ItrAnc(O start) { + parentiter = iterParents(start); + } + + @Override + public boolean valid() { + return parentiter.valid() || (subiter != null && subiter.valid()); + } + + @Override + public void advance() { + if (subiter == null) { // Not yet descended + assert (parentiter.valid()); + subiter = iterAncestors(parentiter.get()); + } else { // Continue with subtree + subiter.advance(); + } + if (subiter.valid()) { + return; + } + // Proceed to next child. + parentiter.advance(); + subiter = null; + } + + @Override + public O get() { + if (subiter != null) { + assert (subiter.valid()); + return subiter.get(); + } else { + assert (parentiter.valid()); + return parentiter.get(); + } + } + } + + /** + * Iterator over all members of the hierarchy. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + private class ItrAll implements Iter<O> { + /** + * The true iterator. + */ + final Iterator<O> iter; + + /** + * Current object. + */ + O cur = null; + + /** + * Constructor. + */ + ItrAll() { + iter = graph.keySet().iterator(); + advance(); + } + + @Override + public boolean valid() { + return cur != null; + } + + @Override + public void advance() { + if (iter.hasNext()) { + cur = iter.next(); + } else { + cur = null; + } + } + + @Override + public O get() { + return cur; + } + } + + /** + * Empty iterator. + */ + private static final Iter<?> EMPTY_ITERATOR = new Iter<Object>() { + @Override + public boolean valid() { + return false; + } + + @Override + public void advance() { + throw new UnsupportedOperationException("Empty iterators must not be advanced."); + } + + @Override + public Object get() { + throw new UnsupportedOperationException("Iterator is empty."); + } + }; +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/Hierarchical.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/Hierarchical.java deleted file mode 100644 index 29909069..00000000 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/Hierarchical.java +++ /dev/null @@ -1,90 +0,0 @@ -package de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy; - -/* - This file is part of ELKI: - Environment for Developing KDD-Applications Supported by Index-Structures - - Copyright (C) 2012 - Ludwig-Maximilians-Universität München - Lehr- und Forschungseinheit für Datenbanksysteme - ELKI Development Team - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -import java.util.Iterator; -import java.util.List; - - -/** - * Interface for objects with an <b>internal</b> hierarchy interface. - * - * Note that the object can chose to delegate the hierarchy to an external hierarchy. - * - * @author Erich Schubert - * - * @param <O> Object type in hierarchy - */ -public interface Hierarchical<O> { - /** - * Test for hierarchical properties - * - * @return hierarchical data model. - */ - public boolean isHierarchical(); - - /** - * Get number of children - * - * @return number of children - */ - public int numChildren(); - - /** - * Get children list. Resulting list MAY be modified. Result MAY be null, if - * the model is not hierarchical. - * - * @return list of children - */ - public List<O> getChildren(); - - /** - * Iterate descendants (recursive children) - * - * @return iterator for descendants - */ - public Iterator<O> iterDescendants(); - - /** - * Get number of parents - * - * @return number of parents - */ - public int numParents(); - - /** - * Get parents list. Resulting list MAY be modified. Result MAY be null, if - * the model is not hierarchical. - * - * @return list of parents - */ - public List<O> getParents(); - - /** - * Iterate ancestors (recursive parents) - * - * @return iterator for ancestors - */ - public Iterator<O> iterAncestors(); -}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/Hierarchy.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/Hierarchy.java index 0a16e9b7..fec9c7b4 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/Hierarchy.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/Hierarchy.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,38 +23,40 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy; along with this program. If not, see <http://www.gnu.org/licenses/>. */ -import java.util.Iterator; -import java.util.List; - /** * This interface represents an (external) hierarchy of objects. It can contain * arbitrary objects, BUT the hierarchy has to be accessed using the hierarchy - * object, i.e. {@code hierarchy.getChildren(object);}. - * - * See {@link Hierarchical} for an interface for objects with an internal - * hierarchy (where you can use {@code object.getChildren();}) + * object, i.e. {@code hierarchy.iterChildren(object);}. * * @author Erich Schubert * + * @apiviz.has Iter + * * @param <O> Object type */ public interface Hierarchy<O> { /** + * Total size - number of objects contained. + * + * @return Size + */ + int size(); + + /** * Get number of children * * @param self object to get number of children for * @return number of children */ - public int numChildren(O self); + int numChildren(O self); /** - * Get children list. Resulting list MAY be modified. Result MAY be null, if - * the model is not hierarchical. + * Iterate over the (direct) children. * * @param self object to get children for - * @return list of children + * @return iterator for children */ - public List<O> getChildren(O self); + Iter<O> iterChildren(O self); /** * Iterate descendants (recursive children) @@ -62,7 +64,7 @@ public interface Hierarchy<O> { * @param self object to get descendants for * @return iterator for descendants */ - public Iterator<O> iterDescendants(O self); + Iter<O> iterDescendants(O self); /** * Get number of (direct) parents @@ -70,16 +72,15 @@ public interface Hierarchy<O> { * @param self reference object * @return number of parents */ - public int numParents(O self); + int numParents(O self); /** - * Get parents list. Resulting list MAY be modified. Result MAY be null, if - * the model is not hierarchical. + * Iterate over the (direct) parents. * * @param self object to get parents for - * @return list of parents + * @return iterator of parents */ - public List<O> getParents(O self); + Iter<O> iterParents(O self); /** * Iterate ancestors (recursive parents) @@ -87,5 +88,30 @@ public interface Hierarchy<O> { * @param self object to get ancestors for * @return iterator for ancestors */ - public Iterator<O> iterAncestors(O self); -}
\ No newline at end of file + Iter<O> iterAncestors(O self); + + /** + * Iterate over all members. + * + * @return Iterator over all members. + */ + Iter<O> iterAll(); + + /** + * Iterator interface. + * + * TODO: add a skipSubtree method? + * + * @author Erich Schubert + * + * @param <O> Object type. + */ + static interface Iter<O> extends de.lmu.ifi.dbs.elki.utilities.datastructures.iterator.Iter { + /** + * Access the current object. + * + * @return Current object + */ + O get(); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/HierarchyHashmapList.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/HierarchyHashmapList.java deleted file mode 100644 index bd6d67bf..00000000 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/HierarchyHashmapList.java +++ /dev/null @@ -1,299 +0,0 @@ -package de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy; - -/* - This file is part of ELKI: - Environment for Developing KDD-Applications Supported by Index-Structures - - Copyright (C) 2012 - Ludwig-Maximilians-Universität München - Lehr- und Forschungseinheit für Datenbanksysteme - ELKI Development Team - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -import java.util.Collections; -import java.util.HashMap; -import java.util.Iterator; -import java.util.LinkedList; -import java.util.List; - -import de.lmu.ifi.dbs.elki.logging.LoggingUtil; -import de.lmu.ifi.dbs.elki.utilities.iterator.EmptyIterator; - -/** - * Centralized hierarchy implementation, using a HashMap of Lists. - * - * @author Erich Schubert - * - * @param <O> Object type (arbitrary!) - */ -public class HierarchyHashmapList<O> implements ModifiableHierarchy<O> { - /** - * The data storage for parents - */ - final private HashMap<O, List<O>> pmap; - - /** - * The data storage for children - */ - final private HashMap<O, List<O>> cmap; - - /** - * Constructor - */ - public HierarchyHashmapList() { - super(); - this.pmap = new HashMap<O, List<O>>(); - this.cmap = new HashMap<O, List<O>>(); - } - - @Override - public void add(O parent, O child) { - // Add child to parent. - { - List<O> pchi = this.cmap.get(parent); - if(pchi == null) { - pchi = new LinkedList<O>(); - this.cmap.put(parent, pchi); - } - if(!pchi.contains(child)) { - pchi.add(child); - } else { - LoggingUtil.warning("Result added twice: "+parent+" -> "+child, new Throwable()); - } - } - // Add child to parent - { - List<O> cpar = this.pmap.get(child); - if(cpar == null) { - cpar = new LinkedList<O>(); - this.pmap.put(child, cpar); - } - if(!cpar.contains(parent)) { - cpar.add(parent); - } else { - LoggingUtil.warning("Result added twice: "+parent+" <- "+child, new Throwable()); - } - } - } - - @Override - public void remove(O parent, O child) { - // Remove child from parent. - { - List<O> pchi = this.cmap.get(parent); - if(pchi != null) { - while(pchi.remove(child)) { - // repeat - remove all instances - } - if(pchi.size() == 0) { - this.cmap.remove(parent); - } - } - } - // Remove parent from child - { - List<O> cpar = this.pmap.get(child); - if(cpar != null) { - while(cpar.remove(parent)) { - // repeat - remove all instances - } - if(cpar.size() == 0) { - this.pmap.remove(child); - } - } - } - } - - /** - * Put an object along with parent and child lists. - * - * @param obj Object - * @param parents Parent list - * @param children Child list - */ - public void put(O obj, List<O> parents, List<O> children) { - this.pmap.put(obj, parents); - this.cmap.put(obj, children); - } - - @Override - public int numChildren(O obj) { - List<O> children = this.cmap.get(obj); - if(children == null) { - return 0; - } - return children.size(); - } - - @Override - public List<O> getChildren(O obj) { - List<O> children = this.cmap.get(obj); - if(children == null) { - return Collections.emptyList(); - } - return children; - } - - @Override - public Iterator<O> iterDescendants(O obj) { - return new ItrDesc(obj); - } - - @Override - public int numParents(O obj) { - List<O> parents = this.pmap.get(obj); - if(parents == null) { - return 0; - } - return parents.size(); - } - - @Override - public List<O> getParents(O obj) { - List<O> parents = this.pmap.get(obj); - if(parents == null) { - return Collections.emptyList(); - } - return parents; - } - - @Override - public Iterator<O> iterAncestors(O obj) { - return new ItrAnc(obj); - } - - /** - * Iterator to collect into the descendants. - * - * @author Erich Schubert - * - * @apiviz.exclude - */ - private class ItrDesc implements Iterator<O> { - /** - * Starting object (for cloning); - */ - final O start; - - /** - * Iterator over children - */ - final Iterator<O> childiter; - - /** - * Iterator of current child - */ - Iterator<O> subiter; - - public ItrDesc(O start) { - this.start = start; - List<O> children = getChildren(start); - if(children != null) { - this.childiter = children.iterator(); - } - else { - this.childiter = EmptyIterator.STATIC(); - } - this.subiter = null; - } - - @Override - public boolean hasNext() { - if(subiter != null && subiter.hasNext()) { - return true; - } - return childiter.hasNext(); - } - - @Override - public O next() { - // Try nested iterator first ... - if(subiter != null && subiter.hasNext()) { - return subiter.next(); - } - // Next direct child, update subiter. - final O child = childiter.next(); - subiter = iterDescendants(child); - return child; - } - - @Override - public void remove() { - throw new UnsupportedOperationException(); - } - } - - /** - * Iterator over all Ancestors. - * - * @author Erich Schubert - * - * @apiviz.exclude - */ - private class ItrAnc implements Iterator<O> { - /** - * Starting object (for cloning); - */ - final O start; - - /** - * Iterator over parents - */ - final Iterator<O> parentiter; - - /** - * Iterator of current parent - */ - Iterator<O> subiter; - - public ItrAnc(O start) { - this.start = start; - List<O> parents = getParents(start); - if(parents != null) { - this.parentiter = parents.iterator(); - } - else { - this.parentiter = EmptyIterator.STATIC(); - } - this.subiter = null; - } - - @Override - public boolean hasNext() { - if(subiter != null && subiter.hasNext()) { - return true; - } - return parentiter.hasNext(); - } - - @Override - public O next() { - // Try nested iterator first ... - if(subiter != null && subiter.hasNext()) { - return subiter.next(); - } - // Next direct parent, update subiter. - final O parent = parentiter.next(); - subiter = iterAncestors(parent); - return parent; - } - - @Override - public void remove() { - throw new UnsupportedOperationException(); - } - } -}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/HierarchyReferenceLists.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/HierarchyReferenceLists.java deleted file mode 100644 index 76091298..00000000 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/HierarchyReferenceLists.java +++ /dev/null @@ -1,232 +0,0 @@ -package de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy; - -/* - This file is part of ELKI: - Environment for Developing KDD-Applications Supported by Index-Structures - - Copyright (C) 2012 - Ludwig-Maximilians-Universität München - Lehr- und Forschungseinheit für Datenbanksysteme - ELKI Development Team - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -import java.util.Iterator; -import java.util.List; - -import de.lmu.ifi.dbs.elki.utilities.iterator.EmptyIterator; - -/** - * Hierarchy implementation with a per-object representation. - * - * @author Erich Schubert - * - * @apiviz.uses Hierarchical - * - * @param <O> Type of objects in hierarchy - */ -public class HierarchyReferenceLists<O extends Hierarchical<O>> implements Hierarchy<O> { - /** - * Owner - */ - protected O owner; - - /** - * Storage for children - */ - protected List<O> children; - - /** - * Storage for parents - */ - protected List<O> parents; - - /** - * Constructor for hierarchy object. - * - * @param owner owning cluster. - * @param children child clusters. May be null. - * @param parents parent clusters. May be null. - */ - public HierarchyReferenceLists(O owner, List<O> children, List<O> parents) { - super(); - this.owner = owner; - this.children = children; - this.parents = parents; - } - - @Override - public int numChildren(O self) { - if(owner != self) { - throw new UnsupportedOperationException("Decentral hierarchy queried for wrong object!"); - } - if(children == null) { - return 0; - } - return children.size(); - } - - @Override - public List<O> getChildren(O self) { - if(owner != self) { - throw new UnsupportedOperationException("Decentral hierarchy queried for wrong object!"); - } - return children; - } - - @Override - public Iterator<O> iterDescendants(O self) { - if(owner != self) { - return EmptyIterator.STATIC(); - } - if (children == null) { - return EmptyIterator.STATIC(); - } - return new ItrDesc(self); - } - - @Override - public int numParents(O self) { - if(owner != self) { - throw new UnsupportedOperationException("Decentral hierarchy queried for wrong object!"); - } - if (parents == null) { - return 0; - } - return parents.size(); - } - - /** - * Return parents - */ - @Override - public List<O> getParents(O self) { - if(owner != self) { - throw new UnsupportedOperationException("Decentral hierarchy queried for wrong object!"); - } - return parents; - } - - @Override - public Iterator<O> iterAncestors(O self) { - if(owner != self) { - throw new UnsupportedOperationException("Decentral hierarchy queried for wrong object!"); - } - if (parents == null) { - return EmptyIterator.STATIC(); - } - return new ItrAnc(self); - } - - /** - * Iterator to collect into the descendants. - * - * @author Erich Schubert - * - * @apiviz.exclude - */ - private class ItrDesc implements Iterator<O> { - /** - * Iterator over children - */ - final Iterator<O> childiter; - - /** - * Iterator of current child - */ - Iterator<O> subiter; - - public ItrDesc(O start) { - assert (start == owner); - this.childiter = children.iterator(); - this.subiter = null; - } - - @Override - public boolean hasNext() { - if(subiter != null && subiter.hasNext()) { - return true; - } - return childiter.hasNext(); - } - - @Override - public O next() { - // Try nested iterator first ... - if(subiter != null && subiter.hasNext()) { - return subiter.next(); - } - // Next direct child, update subiter. - final O child = childiter.next(); - subiter = child.iterDescendants(); - return child; - } - - @Override - public void remove() { - throw new UnsupportedOperationException(); - } - } - - /** - * Iterator over all Ancestors. - * - * @author Erich Schubert - * - * @apiviz.exclude - */ - private class ItrAnc implements Iterator<O> { - /** - * Iterator over parents - */ - final Iterator<O> parentiter; - - /** - * Iterator of current parent - */ - Iterator<O> subiter; - - public ItrAnc(O start) { - assert (start == owner); - this.parentiter = parents.iterator(); - this.subiter = null; - } - - @Override - public boolean hasNext() { - if(subiter != null && subiter.hasNext()) { - return true; - } - return parentiter.hasNext(); - } - - @Override - public O next() { - // Try nested iterator first ... - if(subiter != null && subiter.hasNext()) { - return subiter.next(); - } - // Next direct parent, update subiter. - final O parent = parentiter.next(); - subiter = parent.iterAncestors(); - return parent; - } - - @Override - public void remove() { - throw new UnsupportedOperationException(); - } - } -}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/ModifiableHierarchy.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/ModifiableHierarchy.java index dadc6f66..06001d6b 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/ModifiableHierarchy.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/ModifiableHierarchy.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,8 +23,6 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy; along with this program. If not, see <http://www.gnu.org/licenses/>. */ - - /** * Modifiable Hierarchy. * @@ -39,8 +37,14 @@ public interface ModifiableHierarchy<O> extends Hierarchy<O> { * @param parent Parent * @param child Child */ - // TODO: return true when new? - public void add(O parent, O child); + void add(O parent, O child); + + /** + * Add an entry (initializes data structures). + * + * @param entry Entry + */ + void add(O entry); /** * Remove a parent-child relationship. @@ -48,6 +52,20 @@ public interface ModifiableHierarchy<O> extends Hierarchy<O> { * @param parent Parent * @param child Child */ - // TODO: return true when found? - public void remove(O parent, O child); + void remove(O parent, O child); + + /** + * Remove an entry and all its parent-child relationships. + * + * @param entry Entry + */ + void remove(O entry); + + /** + * Remove an entry and it's whole subtree (unless the elements are reachable + * by a different path!) + * + * @param entry Entry + */ + void removeSubtree(O entry); } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/package-info.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/package-info.java index 0aba31be..965b15fc 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/hierarchy/package-info.java @@ -5,7 +5,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2012 +Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractObjDynamicHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractObjDynamicHistogram.java index 9d0dba0d..165c2c8b 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractObjDynamicHistogram.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractObjDynamicHistogram.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -127,21 +127,36 @@ public abstract class AbstractObjDynamicHistogram<T> extends AbstractObjStaticHi // Store in cache if (cachefill >= 0) { if (cachefill < cacheposs.length) { - cacheposs[cachefill] = coord; cachevals[cachefill] = cloneForCache(value); - cachefill++; + ++cachefill; return; - } else { - materialize(); - // But continue below! } } - // Check if we need to resample to accomodate this bin. - testResample(coord); - // super class will handle histogram resizing / shifting - T exist = get(coord); - data[getBinNr(coord)] = aggregate(exist, value); + if (coord == Double.NEGATIVE_INFINITY) { + aggregateSpecial(value, 0); + } else if (coord == Double.POSITIVE_INFINITY) { + aggregateSpecial(value, 1); + } else if (Double.isNaN(coord)) { + aggregateSpecial(value, 2); + } else { + // super class will handle histogram resizing / shifting + T exist = get(coord); + data[getBinNr(coord)] = aggregate(exist, value); + } + } + + /** + * Aggregate for a special value. + * + * @param value Parameter value + * @param bin Special bin index. + */ + protected void aggregateSpecial(T value, int bin) { + final T exist = getSpecial(bin); + // Note: do not inline above accessor, as getSpecial will initialize the + // special variable used below! + special[bin] = aggregate(exist, value); } /** diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractObjStaticHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractObjStaticHistogram.java index c1882302..4a1649af 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractObjStaticHistogram.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractObjStaticHistogram.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -35,6 +35,16 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; */ public abstract class AbstractObjStaticHistogram<T> extends AbstractStaticHistogram implements ObjHistogram<T> { /** + * Data store + */ + Object[] data; + + /** + * Special value storage: infinity, NaN + */ + Object[] special = null; + + /** * Constructor. * * @param bins Number of bins @@ -46,15 +56,13 @@ public abstract class AbstractObjStaticHistogram<T> extends AbstractStaticHistog if (bins >= 0) { // -1 will be used by FlexiHistogram to delay initialization. data = new Object[bins]; + for (int i = 0; i < bins; i++) { + data[i] = makeObject(); + } } } /** - * Data store - */ - Object[] data; - - /** * Access the value of a bin with new data. * * @param coord Coordinate @@ -62,6 +70,15 @@ public abstract class AbstractObjStaticHistogram<T> extends AbstractStaticHistog */ @SuppressWarnings("unchecked") public T get(double coord) { + if (coord == Double.NEGATIVE_INFINITY) { + return getSpecial(0); + } + if (coord == Double.POSITIVE_INFINITY) { + return getSpecial(1); + } + if (Double.isNaN(coord)) { + return getSpecial(2); + } int bin = getBinNr(coord); if (bin < 0) { if (size - bin > data.length) { @@ -103,6 +120,19 @@ public abstract class AbstractObjStaticHistogram<T> extends AbstractStaticHistog } /** + * Ensure that we have storage for special values (infinity, NaN) + * + * @param idx Index to return. + */ + @SuppressWarnings("unchecked") + protected T getSpecial(int idx) { + if (special == null) { + special = new Object[] { makeObject(), makeObject(), makeObject() }; + } + return (T) special[idx]; + } + + /** * Class to make a new object for the data store. * * @return New instance. diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractStaticHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractStaticHistogram.java index 799ac009..3363e61e 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractStaticHistogram.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/AbstractStaticHistogram.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleArrayStaticHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleArrayStaticHistogram.java index aeba3c4b..86b53d03 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleArrayStaticHistogram.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleArrayStaticHistogram.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2012 +Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleDynamicHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleDynamicHistogram.java index 77a1f9e4..84f97dfe 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleDynamicHistogram.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleDynamicHistogram.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleHistogram.java index d5cee785..e4a24c95 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleHistogram.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleHistogram.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleStaticHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleStaticHistogram.java index db839d10..5a634cf2 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleStaticHistogram.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/DoubleStaticHistogram.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatDynamicHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatDynamicHistogram.java index a14ed00a..9829eaf8 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatDynamicHistogram.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatDynamicHistogram.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatHistogram.java index f5a65bfa..7f034152 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatHistogram.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatHistogram.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatStaticHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatStaticHistogram.java index b3f41994..063bd80a 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatStaticHistogram.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/FloatStaticHistogram.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/Histogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/Histogram.java index 75be6830..8c8d9a87 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/Histogram.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/Histogram.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,7 +23,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; along with this program. If not, see <http://www.gnu.org/licenses/>. */ -import de.lmu.ifi.dbs.elki.utilities.iterator.ArrayIter; +import de.lmu.ifi.dbs.elki.utilities.datastructures.iterator.ArrayIter; /** * Abstract API for histograms. Without specific type information, to allow this diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntArrayStaticHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntArrayStaticHistogram.java index 8d00604b..ff9a82aa 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntArrayStaticHistogram.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntArrayStaticHistogram.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2012 +Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntDynamicHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntDynamicHistogram.java index 0967ebd5..b131af7d 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntDynamicHistogram.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntDynamicHistogram.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntHistogram.java index 9ec4ec56..9bfae100 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntHistogram.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntHistogram.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntStaticHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntStaticHistogram.java index d4de36d7..7b1eed94 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntStaticHistogram.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntStaticHistogram.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongArrayStaticHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongArrayStaticHistogram.java index efbf751f..e3580792 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongArrayStaticHistogram.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongArrayStaticHistogram.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongDynamicHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongDynamicHistogram.java index 676a5e8f..93c4eee5 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongDynamicHistogram.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongDynamicHistogram.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongHistogram.java index 9be15e65..16577c38 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongHistogram.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongHistogram.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongStaticHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongStaticHistogram.java index 63e15599..b270908d 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongStaticHistogram.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/LongStaticHistogram.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/MeanVarianceStaticHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/MeanVarianceStaticHistogram.java index 2a464382..0f1ea0a3 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/MeanVarianceStaticHistogram.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/MeanVarianceStaticHistogram.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ObjHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ObjHistogram.java index ac4d4e4b..bad4eec1 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ObjHistogram.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ObjHistogram.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortDynamicHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortDynamicHistogram.java index ff94928b..a49810ee 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortDynamicHistogram.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortDynamicHistogram.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortHistogram.java index 699df896..0b83bc4c 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortHistogram.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortHistogram.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortStaticHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortStaticHistogram.java index b2809e1e..2819d966 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortStaticHistogram.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/ShortStaticHistogram.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.histogram; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/package-info.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/package-info.java index 65dd6446..cee1836b 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/package-info.java @@ -13,7 +13,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/iterator/ArrayIter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/iterator/ArrayIter.java index fb6601d1..7b2a96ad 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/iterator/ArrayIter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/iterator/ArrayIter.java @@ -1,10 +1,10 @@ -package de.lmu.ifi.dbs.elki.utilities.iterator; +package de.lmu.ifi.dbs.elki.utilities.datastructures.iterator; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -27,8 +27,8 @@ package de.lmu.ifi.dbs.elki.utilities.iterator; * Array iterators can also go backwards and seek. * * @author Erich Schubert - * - * @apiviz.landmark + * + * @apiviz.excludeSubtypes */ public interface ArrayIter extends Iter { /** @@ -37,7 +37,7 @@ public interface ArrayIter extends Iter { * @return Iterator position */ public int getOffset(); - + /** * Moves the iterator forward or backward by the given offset. * @@ -56,4 +56,4 @@ public interface ArrayIter extends Iter { * @param off Seek offset */ public void seek(int off); -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/iterator/ReverseListIterator.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/iterator/ArrayListIter.java index c207e44c..820217ec 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/iterator/ReverseListIterator.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/iterator/ArrayListIter.java @@ -1,10 +1,10 @@ -package de.lmu.ifi.dbs.elki.utilities.iterator; +package de.lmu.ifi.dbs.elki.utilities.datastructures.iterator; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,83 +23,77 @@ package de.lmu.ifi.dbs.elki.utilities.iterator; along with this program. If not, see <http://www.gnu.org/licenses/>. */ -import java.util.Iterator; import java.util.List; -import java.util.ListIterator; /** - * Reverse iterator for lists. + * ELKI style Iterator for array lists. + * + * Note: this implementation is only efficient for lists with efficient random + * access and seeking (i.e. ArrayLists, but not Linked Lists!) * * @author Erich Schubert * - * @param <E> Element type + * @apiviz.excludeSubtypes + * + * @param <O> contained object type. */ -public class ReverseListIterator<E> implements Iterator<E>, ListIterator<E> { +public class ArrayListIter<O> implements ArrayIter { /** - * The actual iterator + * The array list to iterate over. */ - final ListIterator<E> iter; + final List<O> data; /** - * Constructor. - * - * @param iter List iterator + * Current position. */ - public ReverseListIterator(ListIterator<E> iter) { - this.iter = iter; - } + int pos = 0; /** * Constructor. * - * @param list Existing list + * @param data Data array. */ - public ReverseListIterator(List<E> list) { - this.iter = list.listIterator(list.size()); - } - - @Override - public boolean hasNext() { - return iter.hasPrevious(); + public ArrayListIter(List<O> data) { + super(); + this.data = data; } @Override - public E next() { - return iter.previous(); + public boolean valid() { + return pos < data.size(); } @Override - public void remove() { - iter.remove(); + public void advance() { + pos++; } @Override - public boolean hasPrevious() { - return iter.hasNext(); + public int getOffset() { + return pos; } @Override - public E previous() { - return iter.next(); + public void advance(int count) { + pos += count; } @Override - public int nextIndex() { - return iter.previousIndex(); + public void retract() { + pos--; } @Override - public int previousIndex() { - return iter.nextIndex(); + public void seek(int off) { + pos = off; } - @Override - public void set(E e) { - iter.set(e); - } - - @Override - public void add(E e) { - iter.add(e); + /** + * Get the current element. + * + * @return current element + */ + public O get() { + return data.get(pos); } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/iterator/Iter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/iterator/Iter.java index 8bb1dcc6..3d111f14 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/iterator/Iter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/iterator/Iter.java @@ -1,10 +1,10 @@ -package de.lmu.ifi.dbs.elki.utilities.iterator; +package de.lmu.ifi.dbs.elki.utilities.datastructures.iterator; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -54,12 +54,13 @@ package de.lmu.ifi.dbs.elki.utilities.iterator; * @author Erich Schubert * * @apiviz.landmark + * @apiviz.excludeSubtypes */ public interface Iter { /** * Returns true if the iterator currently points to a valid object. * - * @return a <code>boolean</code> value + * @return a <code>boolean</code> value, whether the position is valid. */ public boolean valid(); @@ -67,4 +68,4 @@ public interface Iter { * Moves the iterator forward to the next entry. */ public void advance(); -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/iterator/MIter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/iterator/MIter.java index d9017d6c..14e5443d 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/iterator/MIter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/iterator/MIter.java @@ -1,11 +1,10 @@ -package de.lmu.ifi.dbs.elki.utilities.iterator; - +package de.lmu.ifi.dbs.elki.utilities.datastructures.iterator; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -27,11 +26,29 @@ package de.lmu.ifi.dbs.elki.utilities.iterator; /** * Modifiable iterator, that also supports removal. * + * Usage example: + * + * <pre> + * {@code + * for (MIter iter = ids.iter(); iter.valid(); iter.advance()) { + * if (testSomething(iter)) { + * iter.remove(); + * continue; // Iterator may point to something else + * } + * } + * } + * </pre> + * * @author Erich Schubert + * + * @apiviz.excludeSubtypes */ public interface MIter extends Iter { /** * Remove the object the iterator currently points to. + * + * Note that, usually, the iterator will now point to a different object, very + * often to the previous one (but this is not guaranteed!) */ void remove(); } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/iterator/package-info.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/iterator/package-info.java new file mode 100644 index 00000000..d241fcc4 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/iterator/package-info.java @@ -0,0 +1,40 @@ +/** + * <p>ELKI Iterator API.</p> + * + * <p>ELKI uses a custom iterator API instead of the usual {@link java.util.Iterator} classes (the "Java Collections API"). + * The reason for this is largely efficiency. Benchmarking showed that the Java Iterator API can be quite expensive when dealing + * with primitive types, as {@link java.util.Iterator#next} is meant to always return an object.</p> + * + * <p>However, the benefits become more apparent when considering multi-valued iterators. + * For example an iterator over a k nearest neighbor set in ELKI both represents an object by its DBID, + * and a distance value. For double-valued distances, it can be retrieved using a primitive value getter + * (saving an extra object copy), and since the iterator can be used as a DBIDRef, it can also represent + * the current object without creating additional objects.</p> + * + * <p>While it may seem odd to depart from Java conventions such as the collections API, + * note that these iterators are very close to the standard C++ conventions, so nothing entirely unusual. + * Also the GNU trove libraries - used by ELKI in various places - use the same kind of iterators.</p> + */ +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +package de.lmu.ifi.dbs.elki.utilities.datastructures.iterator;
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/package-info.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/package-info.java index ae8308af..a0d894a9 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/package-info.java @@ -5,7 +5,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2012 +Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/designpattern/Observers.java b/src/de/lmu/ifi/dbs/elki/utilities/designpattern/Observers.java deleted file mode 100644 index dacae560..00000000 --- a/src/de/lmu/ifi/dbs/elki/utilities/designpattern/Observers.java +++ /dev/null @@ -1,80 +0,0 @@ -package de.lmu.ifi.dbs.elki.utilities.designpattern; - -import java.util.ArrayList; - -/* - This file is part of ELKI: - Environment for Developing KDD-Applications Supported by Index-Structures - - Copyright (C) 2012 - Ludwig-Maximilians-Universität München - Lehr- und Forschungseinheit für Datenbanksysteme - ELKI Development Team - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -/** - * Class to manage the observers of an instance. - * - * Design note: to avoid reference cycles, this object does not keep track of - * its owner. - * - * @author Erich Schubert - * - * @apiviz.stereotype delegate - * @apiviz.has Observer - */ -public class Observers<T> extends ArrayList<Observer<? super T>> { - /** - * Serial version - */ - private static final long serialVersionUID = 1L; - - /** - * Constructor. - */ - public Observers() { - super(); - } - - /** - * Add an observer to the object. - * - * @param o Observer to add - */ - public void addObserver(Observer<? super T> o) { - super.add(o); - } - - /** - * Remove an observer from the object. - * - * @param o Observer to remove - */ - public void removeObserver(Observer<? super T> o) { - super.remove(o); - } - - /** - * Notify the observers of the changed object. - * - * @param owner Owner of the Observers list - changed instance - */ - public void notifyObservers(T owner) { - for(Observer<? super T> observer : this) { - observer.update(owner); - } - } -}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/utilities/designpattern/package-info.java b/src/de/lmu/ifi/dbs/elki/utilities/designpattern/package-info.java deleted file mode 100644 index 563470af..00000000 --- a/src/de/lmu/ifi/dbs/elki/utilities/designpattern/package-info.java +++ /dev/null @@ -1,26 +0,0 @@ -/** - * <p>Interfaces and implementations related to common "design patterns".</p> - */ -/* -This file is part of ELKI: -Environment for Developing KDD-Applications Supported by Index-Structures - -Copyright (C) 2012 -Ludwig-Maximilians-Universität München -Lehr- und Forschungseinheit für Datenbanksysteme -ELKI Development Team - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU Affero General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU Affero General Public License for more details. - -You should have received a copy of the GNU Affero General Public License -along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ -package de.lmu.ifi.dbs.elki.utilities.designpattern;
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/utilities/documentation/Description.java b/src/de/lmu/ifi/dbs/elki/utilities/documentation/Description.java index 2769ff13..10fe40f3 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/documentation/Description.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/documentation/Description.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.documentation; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/documentation/DocumentationUtil.java b/src/de/lmu/ifi/dbs/elki/utilities/documentation/DocumentationUtil.java index 0fa869f4..a1d649d4 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/documentation/DocumentationUtil.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/documentation/DocumentationUtil.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.documentation; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/documentation/Reference.java b/src/de/lmu/ifi/dbs/elki/utilities/documentation/Reference.java index acb3a0aa..8d2768e2 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/documentation/Reference.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/documentation/Reference.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.documentation; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -36,7 +36,7 @@ import java.lang.annotation.Target; */ @Documented @Retention(RetentionPolicy.RUNTIME) -@Target( { ElementType.TYPE, ElementType.METHOD }) +@Target( { ElementType.TYPE, ElementType.METHOD, ElementType.FIELD }) public @interface Reference { /** * Publication title. diff --git a/src/de/lmu/ifi/dbs/elki/utilities/documentation/Restricted.java b/src/de/lmu/ifi/dbs/elki/utilities/documentation/Restricted.java index 8daa97ab..0cb205df 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/documentation/Restricted.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/documentation/Restricted.java @@ -10,7 +10,7 @@ import java.lang.annotation.Target; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/documentation/Title.java b/src/de/lmu/ifi/dbs/elki/utilities/documentation/Title.java index 2a390f19..9676e8e0 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/documentation/Title.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/documentation/Title.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.documentation; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/documentation/package-info.java b/src/de/lmu/ifi/dbs/elki/utilities/documentation/package-info.java index 70f31ace..d6f9ac3c 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/documentation/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/documentation/package-info.java @@ -5,7 +5,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2012 +Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/ensemble/EnsembleVoting.java b/src/de/lmu/ifi/dbs/elki/utilities/ensemble/EnsembleVoting.java index a83517f0..47c46d46 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/ensemble/EnsembleVoting.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/ensemble/EnsembleVoting.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.ensemble; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -39,4 +39,14 @@ public interface EnsembleVoting extends Parameterizable { * @return combined score. */ public double combine(double[] scores); + + /** + * Combine scores function. Note: it is assumed that the scores are + * comparable. + * + * @param scores Scores to combine + * @param count Number of entries to use. + * @return combined score. + */ + public double combine(double[] scores, int count); } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/ensemble/EnsembleVotingBayes.java b/src/de/lmu/ifi/dbs/elki/utilities/ensemble/EnsembleVotingBayes.java deleted file mode 100644 index 7263513b..00000000 --- a/src/de/lmu/ifi/dbs/elki/utilities/ensemble/EnsembleVotingBayes.java +++ /dev/null @@ -1,106 +0,0 @@ -package de.lmu.ifi.dbs.elki.utilities.ensemble; - -/* - This file is part of ELKI: - Environment for Developing KDD-Applications Supported by Index-Structures - - Copyright (C) 2012 - Ludwig-Maximilians-Universität München - Lehr- und Forschungseinheit für Datenbanksysteme - ELKI Development Team - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessConstraint; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; - -/** - * Combination rule based on Bayes theorems. - * - * Note: this assumes that the scores are probabilistic! - * - * @author Erich Schubert - */ -public class EnsembleVotingBayes implements EnsembleVoting { - /** - * Minimum vote to cast - */ - private double minvote = 0.05; - - /** - * Constructor. - * - * @param minvote Minimum vote to cast (0 to 0.5) - */ - public EnsembleVotingBayes(double minvote) { - this.minvote = minvote; - } - - @Override - public double combine(double[] scores) { - double pos = 1.0; - double neg = 1.0; - for (double score : scores) { - if (score < minvote) { - score = minvote; - } else if (score > 1.0 - minvote) { - score = 1.0 - minvote; - } - pos *= score; - neg *= (1.0 - score); - } - return pos / (pos + neg); - } - - /** - * Parameterization class. - * - * @author Erich Schubert - * - * @apiviz.exclude - */ - public static class Parameterizer extends AbstractParameterizer { - /** - * Option ID for the minimum and maximum vote - */ - public static final OptionID MIN_ID = new OptionID("ensemble.bayes.min", "Minimum (and maximum) vote share, in the range 0 to 0.5"); - - /** - * Minimum vote to cast - */ - private double minvote = 0.05; - - @Override - protected void makeOptions(Parameterization config) { - super.makeOptions(config); - DoubleParameter minvoteP = new DoubleParameter(MIN_ID, 0.05); - minvoteP.addConstraint(new GreaterEqualConstraint(0.0)); - minvoteP.addConstraint(new LessConstraint(0.5)); - - if (config.grab(minvoteP)) { - minvote = minvoteP.getValue(); - } - } - - @Override - protected EnsembleVotingBayes makeInstance() { - return new EnsembleVotingBayes(minvote); - } - } -} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/iterator/OneItemIterator.java b/src/de/lmu/ifi/dbs/elki/utilities/ensemble/EnsembleVotingInverseMultiplicative.java index f6b00242..2e082761 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/iterator/OneItemIterator.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/ensemble/EnsembleVotingInverseMultiplicative.java @@ -1,10 +1,10 @@ -package de.lmu.ifi.dbs.elki.utilities.iterator; +package de.lmu.ifi.dbs.elki.utilities.ensemble; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,47 +23,53 @@ package de.lmu.ifi.dbs.elki.utilities.iterator; along with this program. If not, see <http://www.gnu.org/licenses/>. */ -import java.util.Iterator; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; /** - * Dummy iterator/iterable that returns a single object, once. + * Inverse multiplicative voting: * - * Note: a "null" object is not supported. + * {@code 1-product(1-s_i)} * * @author Erich Schubert - * - * @param <T> Object type to return */ -public class OneItemIterator<T> implements Iterator<T> { +public class EnsembleVotingInverseMultiplicative implements EnsembleVoting { /** - * Object to return. + * Static instance. */ - private T object = null; - + public static final EnsembleVotingInverseMultiplicative STATIC = new EnsembleVotingInverseMultiplicative(); + /** * Constructor. - * - * @param object Object to return once. */ - public OneItemIterator(T object) { + public EnsembleVotingInverseMultiplicative() { super(); - this.object = object; } @Override - public boolean hasNext() { - return (object != null); + public double combine(double[] scores) { + return combine(scores, scores.length); } @Override - public T next() { - T ret = object; - object = null; - return ret; + public double combine(double[] scores, int count) { + double prod = 1.; + for (int i = 0; i < count; i++) { + prod *= (1 - scores[i]); + } + return 1 - prod; } - @Override - public void remove() { - throw new UnsupportedOperationException(); + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected EnsembleVotingInverseMultiplicative makeInstance() { + return STATIC; + } } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/ensemble/EnsembleVotingMax.java b/src/de/lmu/ifi/dbs/elki/utilities/ensemble/EnsembleVotingMax.java index b467e9f6..e179999d 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/ensemble/EnsembleVotingMax.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/ensemble/EnsembleVotingMax.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.ensemble; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -39,9 +39,16 @@ public class EnsembleVotingMax implements EnsembleVoting { @Override public double combine(double[] scores) { - double max = Double.NEGATIVE_INFINITY; - for (double val : scores) { - max = Math.max(max, val); + return combine(scores, scores.length); + } + + @Override + public double combine(double[] scores, int count) { + double max = scores[0]; + for (int i = 1; i < count; i++) { + if (scores[i] > max) { + max = scores[i]; + } } return max; } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/ensemble/EnsembleVotingMean.java b/src/de/lmu/ifi/dbs/elki/utilities/ensemble/EnsembleVotingMean.java index 20746927..19643782 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/ensemble/EnsembleVotingMean.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/ensemble/EnsembleVotingMean.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.ensemble; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -39,10 +39,15 @@ public class EnsembleVotingMean implements EnsembleVoting { @Override public double combine(double[] scores) { - double sum = 0.0; - for (double score : scores) { - sum += score; + return combine(scores, scores.length); + } + + @Override + public double combine(double[] scores, int count) { + double sum = 0.; + for (int i = 0; i < count; i++) { + sum += scores[i]; } - return sum / scores.length; + return sum / count; } } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/ensemble/EnsembleVotingMedian.java b/src/de/lmu/ifi/dbs/elki/utilities/ensemble/EnsembleVotingMedian.java index fa1b3fa6..de137b40 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/ensemble/EnsembleVotingMedian.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/ensemble/EnsembleVotingMedian.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.ensemble; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -55,7 +55,12 @@ public class EnsembleVotingMedian implements EnsembleVoting { @Override public double combine(double[] scores) { - return QuickSelect.quantile(scores, quantile); + return combine(scores, scores.length); + } + + @Override + public double combine(double[] scores, int count) { + return QuickSelect.quantile(scores, 0, count, quantile); } /** diff --git a/src/de/lmu/ifi/dbs/elki/utilities/ensemble/EnsembleVotingMin.java b/src/de/lmu/ifi/dbs/elki/utilities/ensemble/EnsembleVotingMin.java index fcf5c138..b4baa4ab 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/ensemble/EnsembleVotingMin.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/ensemble/EnsembleVotingMin.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.ensemble; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -40,9 +40,16 @@ public class EnsembleVotingMin implements EnsembleVoting { @Override public double combine(double[] scores) { - double min = Double.POSITIVE_INFINITY; - for (double val : scores) { - min = Math.min(min, val); + return combine(scores, scores.length); + } + + @Override + public double combine(double[] scores, int count) { + double min = scores[0]; + for (int i = 1; i < count; i++) { + if (scores[i] < min) { + min = scores[i]; + } } return min; } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/iterator/UnmodifiableIterator.java b/src/de/lmu/ifi/dbs/elki/utilities/ensemble/EnsembleVotingMultiplicative.java index 03c1119c..3d99b8aa 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/iterator/UnmodifiableIterator.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/ensemble/EnsembleVotingMultiplicative.java @@ -1,10 +1,10 @@ -package de.lmu.ifi.dbs.elki.utilities.iterator; +package de.lmu.ifi.dbs.elki.utilities.ensemble; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,46 +23,53 @@ package de.lmu.ifi.dbs.elki.utilities.iterator; along with this program. If not, see <http://www.gnu.org/licenses/>. */ -import java.util.Iterator; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; /** - * Iterator proxy that does not allow modifications. + * Inverse multiplicative voting: * - * @author Erich Schubert - * - * @apiviz.stereotype decorator - * @apiviz.composedOf Iterator oneway - - decorates + * {@code product(s_i)} * - * @param <T> + * @author Erich Schubert */ -public final class UnmodifiableIterator<T> implements Iterator<T> { +public class EnsembleVotingMultiplicative implements EnsembleVoting { /** - * Real iterator + * Static instance. */ - private Iterator<T> inner; + public static final EnsembleVotingMultiplicative STATIC = new EnsembleVotingMultiplicative(); /** * Constructor. - * - * @param inner Real iterator to proxy. */ - public UnmodifiableIterator(Iterator<T> inner) { + public EnsembleVotingMultiplicative() { super(); - this.inner = inner; } @Override - public boolean hasNext() { - return inner.hasNext(); + public double combine(double[] scores) { + return combine(scores, scores.length); } @Override - public T next() { - return inner.next(); + public double combine(double[] scores, int count) { + double prod = 1.; + for (int i = 0; i < count; i++) { + prod *= scores[i]; + } + return prod; } - @Override - public void remove() { - throw new UnsupportedOperationException(); + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected EnsembleVotingMultiplicative makeInstance() { + return STATIC; + } } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/ensemble/EnsembleVotingRestrictedBayes.java b/src/de/lmu/ifi/dbs/elki/utilities/ensemble/EnsembleVotingRestrictedBayes.java deleted file mode 100644 index c2edcbfd..00000000 --- a/src/de/lmu/ifi/dbs/elki/utilities/ensemble/EnsembleVotingRestrictedBayes.java +++ /dev/null @@ -1,127 +0,0 @@ -package de.lmu.ifi.dbs.elki.utilities.ensemble; - -/* - This file is part of ELKI: - Environment for Developing KDD-Applications Supported by Index-Structures - - Copyright (C) 2012 - Ludwig-Maximilians-Universität München - Lehr- und Forschungseinheit für Datenbanksysteme - ELKI Development Team - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessConstraint; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessEqualConstraint; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessGlobalConstraint; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; - -/** - * Combination rule based on Bayes theorems. - * - * @author Erich Schubert - */ -public class EnsembleVotingRestrictedBayes implements EnsembleVoting { - /** - * Minimum vote to cast - */ - private double minvote = 0.05; - - /** - * Maximum vote to cast - */ - private double maxvote = 1.0; - - /** - * Constructor. - * - * @param minvote minimum vote - * @param maxvote maximum vote - */ - public EnsembleVotingRestrictedBayes(double minvote, double maxvote) { - this.minvote = minvote; - this.maxvote = maxvote; - } - - @Override - public double combine(double[] scores) { - double pos = 1.0; - double neg = 1.0; - for (double score : scores) { - score = Math.min(minvote, Math.max(maxvote, score)); - final double cscore = score; - pos *= cscore; - neg *= (1.0 - cscore); - } - return pos / (pos + neg); - } - - /** - * Parameterization class. - * - * @author Erich Schubert - * - * @apiviz.exclude - */ - public static class Parameterizer extends AbstractParameterizer { - /** - * Option ID for the minimum and maximum vote - */ - public static final OptionID MIN_ID = new OptionID("ensemble.bayes.min", "Minimum vote share."); - - /** - * Option ID for the minimum and maximum vote - */ - public static final OptionID MAX_ID = new OptionID("ensemble.bayes.max", "Maximum vote share."); - - /** - * Minimum vote to cast - */ - private double minvote = 0.05; - - /** - * Maximum vote to cast - */ - private double maxvote = 1.0; - - @Override - protected void makeOptions(Parameterization config) { - super.makeOptions(config); - DoubleParameter minvoteP = new DoubleParameter(MIN_ID, 0.05); - minvoteP.addConstraint(new GreaterEqualConstraint(0.0)); - minvoteP.addConstraint(new LessConstraint(0.0)); - if (config.grab(minvoteP)) { - minvote = minvoteP.doubleValue(); - } - DoubleParameter maxvoteP = new DoubleParameter(MAX_ID, 0.95); - maxvoteP.addConstraint(new GreaterConstraint(0.0)); - maxvoteP.addConstraint(new LessEqualConstraint(0.0)); - if (config.grab(maxvoteP)) { - maxvote = maxvoteP.doubleValue(); - } - config.checkConstraint(new LessGlobalConstraint<Double>(minvoteP, maxvoteP)); - } - - @Override - protected EnsembleVotingRestrictedBayes makeInstance() { - return new EnsembleVotingRestrictedBayes(minvote, maxvote); - } - } -} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/ensemble/package-info.java b/src/de/lmu/ifi/dbs/elki/utilities/ensemble/package-info.java index 572b40bb..76ee4ab8 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/ensemble/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/ensemble/package-info.java @@ -5,7 +5,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2012 +Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/exceptions/APIViolationException.java b/src/de/lmu/ifi/dbs/elki/utilities/exceptions/APIViolationException.java index d332f2b0..ab6430f4 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/exceptions/APIViolationException.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/exceptions/APIViolationException.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.exceptions; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/exceptions/AbortException.java b/src/de/lmu/ifi/dbs/elki/utilities/exceptions/AbortException.java index 768cb243..21179a62 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/exceptions/AbortException.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/exceptions/AbortException.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.exceptions; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/exceptions/ExceptionMessages.java b/src/de/lmu/ifi/dbs/elki/utilities/exceptions/ExceptionMessages.java index 1b1eed80..cdaa6220 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/exceptions/ExceptionMessages.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/exceptions/ExceptionMessages.java @@ -1,26 +1,27 @@ package de.lmu.ifi.dbs.elki.utilities.exceptions;
-/* -This file is part of ELKI: -Environment for Developing KDD-Applications Supported by Index-Structures - -Copyright (C) 2012 -Ludwig-Maximilians-Universität München -Lehr- und Forschungseinheit für Datenbanksysteme -ELKI Development Team - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU Affero General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU Affero General Public License for more details. - -You should have received a copy of the GNU Affero General Public License -along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ +
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures +
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team +
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version. +
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details. +
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
/**
* Interface to collect exception messages that are used in several cases.
@@ -32,38 +33,48 @@ public interface ExceptionMessages { * Message when the user requested a help message.
*/
public static final String USER_REQUESTED_HELP = "Aborted: User requested help message.";
+
/**
* Messages in case a database is unexpectedly empty.
*/
public static final String DATABASE_EMPTY = "database empty: must contain elements";
+
/**
- * Message when a new label was discovered in a database, that did not exist before.
+ * Message when a new label was discovered in a database, that did not exist
+ * before.
*/
public static final String INCONSISTENT_STATE_NEW_LABEL = "inconsistent state of database - found new label";
+
/**
* Message when an empty clustering is encountered.
*/
public static final String CLUSTERING_EMPTY = "Clustering doesn't contain any cluster.";
+
/**
* Message when a distance doesn't support undefined values.
*/
public static final String UNSUPPORTED_UNDEFINED_DISTANCE = "Undefinded distance not supported!";
+
/**
* Generic "unsupported" message
*/
public static final String UNSUPPORTED = "Unsupported.";
+
/**
* Generic "not yet supported" message
*/
public static final String UNSUPPORTED_NOT_YET = "Not yet supported.";
+
/**
* "remove unsupported" message for iterators
*/
public static final String UNSUPPORTED_REMOVE = "remove() unsupported";
+
/**
* File not found. 404.
*/
public static final String FILE_NOT_FOUND = "File not found";
+
/**
* File already exists, will not overwrite.
*/
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/designpattern/Observer.java b/src/de/lmu/ifi/dbs/elki/utilities/exceptions/NotImplementedException.java index 6afec037..343a5b38 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/designpattern/Observer.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/exceptions/NotImplementedException.java @@ -1,10 +1,10 @@ -package de.lmu.ifi.dbs.elki.utilities.designpattern; +package de.lmu.ifi.dbs.elki.utilities.exceptions; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,19 +23,35 @@ package de.lmu.ifi.dbs.elki.utilities.designpattern; along with this program. If not, see <http://www.gnu.org/licenses/>. */ - /** - * Simple Observer design pattern. + * Exception thrown when a particular code path was not yet implemented. * - * @author Erich Schubert + * See {@link ExceptionMessages} for common explanations. * - * @param T object type to observe + * @author Erich Schubert */ -public interface Observer<T> { +public class NotImplementedException extends AbortException { + /** + * Serial version. + */ + private static final long serialVersionUID = 1L; + + /** + * Constructor. + * + * @param message Error message + * @param cause Cause + */ + public NotImplementedException(String message, Throwable cause) { + super(message, cause); + } + /** - * This method is called when an observed object was updated. - * - * @param o Observable + * Constructor. + * + * @param message Error message */ - public void update(T o); -}
\ No newline at end of file + public NotImplementedException(String message) { + super(message); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/exceptions/ObjectNotFoundException.java b/src/de/lmu/ifi/dbs/elki/utilities/exceptions/ObjectNotFoundException.java index c99e5394..266ad0bb 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/exceptions/ObjectNotFoundException.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/exceptions/ObjectNotFoundException.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.exceptions; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/exceptions/UnableToComplyException.java b/src/de/lmu/ifi/dbs/elki/utilities/exceptions/UnableToComplyException.java index 024615f8..ecb80950 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/exceptions/UnableToComplyException.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/exceptions/UnableToComplyException.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.exceptions; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/exceptions/package-info.java b/src/de/lmu/ifi/dbs/elki/utilities/exceptions/package-info.java index f9733d18..b55fd8ee 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/exceptions/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/exceptions/package-info.java @@ -5,7 +5,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2012 +Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/iterator/AbstractFilteredIterator.java b/src/de/lmu/ifi/dbs/elki/utilities/iterator/AbstractFilteredIterator.java deleted file mode 100644 index 9a729595..00000000 --- a/src/de/lmu/ifi/dbs/elki/utilities/iterator/AbstractFilteredIterator.java +++ /dev/null @@ -1,120 +0,0 @@ -package de.lmu.ifi.dbs.elki.utilities.iterator; - -/* - This file is part of ELKI: - Environment for Developing KDD-Applications Supported by Index-Structures - - Copyright (C) 2012 - Ludwig-Maximilians-Universität München - Lehr- und Forschungseinheit für Datenbanksysteme - ELKI Development Team - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -import java.util.Iterator; - -import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; - -/** - * Abstract class to build filtered views on Iterables. - * - * @author Erich Schubert - * - * @apiviz.stereotype decorator - * @apiviz.composedOf Iterator - * - * @param <IN> Input type - * @param <OUT> Output type - */ -public abstract class AbstractFilteredIterator<IN, OUT extends IN> implements Iterator<OUT> { - /** - * The iterator to use. - */ - Iterator<IN> itr = null; - - /** - * The next object to return. - */ - OUT nextobj = null; - - /** - * Constructor. - */ - public AbstractFilteredIterator() { - super(); - } - - /** - * Init the iterators. - */ - protected void init() { - this.itr = getParentIterator(); - if (this.itr == null) { - throw new AbortException("Filtered iterator has 'null' parent."); - } - } - - /** - * Get an iterator for the actual data. Used in initialization. - * - * @return iterator - */ - protected abstract Iterator<IN> getParentIterator(); - - /** - * Test the filter predicate for a new object. - * - * @param nextobj Object to test - * @return cast object when true, {@code null} otherwise - */ - protected abstract OUT testFilter(IN nextobj); - - /** - * Find the next visualizer. - */ - private void updateNext() { - if(itr == null) { - init(); - } - nextobj = null; - while(itr.hasNext()) { - IN v = itr.next(); - nextobj = testFilter(v); - if(nextobj != null) { - break; - } - } - } - - @Override - public boolean hasNext() { - if(itr == null) { - updateNext(); - } - return (nextobj != null); - } - - @Override - public OUT next() { - OUT ret = this.nextobj; - updateNext(); - return ret; - } - - @Override - public void remove() { - throw new UnsupportedOperationException(); - } -}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/utilities/iterator/EmptyIterator.java b/src/de/lmu/ifi/dbs/elki/utilities/iterator/EmptyIterator.java deleted file mode 100644 index 6cbdad29..00000000 --- a/src/de/lmu/ifi/dbs/elki/utilities/iterator/EmptyIterator.java +++ /dev/null @@ -1,71 +0,0 @@ -package de.lmu.ifi.dbs.elki.utilities.iterator; - -/* - This file is part of ELKI: - Environment for Developing KDD-Applications Supported by Index-Structures - - Copyright (C) 2012 - Ludwig-Maximilians-Universität München - Lehr- und Forschungseinheit für Datenbanksysteme - ELKI Development Team - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -import java.util.Iterator; - -/** - * Empty iterator/iterable, that never returns any data. - * - * @author Erich Schubert - * - * @param <T> Data type - */ -public final class EmptyIterator<T> implements Iterator<T>, Iterable<T> { - @Override - public boolean hasNext() { - return false; - } - - @Override - public T next() { - return null; - } - - @Override - public void remove() { - throw new UnsupportedOperationException(); - } - - @Override - public Iterator<T> iterator() { - return STATIC(); - } - - /** - * Static instance - */ - protected static final EmptyIterator<?> STATIC_INSTANCE = new EmptyIterator<Object>(); - - /** - * Access the static instance. - * - * @param <T> type to (not) iterate over - * @return Cast static instance. - */ - @SuppressWarnings("unchecked") - public static <T> EmptyIterator<T> STATIC() { - return (EmptyIterator<T>) STATIC_INSTANCE; - } -}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/utilities/iterator/MergedIterator.java b/src/de/lmu/ifi/dbs/elki/utilities/iterator/MergedIterator.java deleted file mode 100644 index b5d2fd02..00000000 --- a/src/de/lmu/ifi/dbs/elki/utilities/iterator/MergedIterator.java +++ /dev/null @@ -1,115 +0,0 @@ -package de.lmu.ifi.dbs.elki.utilities.iterator; - -/* - This file is part of ELKI: - Environment for Developing KDD-Applications Supported by Index-Structures - - Copyright (C) 2012 - Ludwig-Maximilians-Universität München - Lehr- und Forschungseinheit für Datenbanksysteme - ELKI Development Team - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -import java.util.Arrays; -import java.util.Collection; -import java.util.Iterator; - -/** - * Iterator that joins multiple existing iterators into one. - * - * @author Erich Schubert - * - * @apiviz.stereotype adapter - * @apiviz.uses Iterator oneway - * - * @param <E> Entry type - */ -public class MergedIterator<E> implements Iterator<E> { - /** - * All the iterators we process - */ - final Iterator<Iterator<E>> iterators; - - /** - * The iterator we are currently processing - */ - Iterator<E> current = null; - - /** - * The last iterator we returned an object for, for remove() - */ - Iterator<E> last = null; - - /** - * Main constructor. - * - * @param iterators Iterators - */ - public MergedIterator(Iterator<Iterator<E>> iterators) { - super(); - this.iterators = iterators; - } - - /** - * Auxillary constructor with Collections - * - * @param iterators Iterators - */ - public MergedIterator(Collection<Iterator<E>> iterators) { - this(iterators.iterator()); - } - - /** - * Auxillary constructor with arrays - * - * @param iterators Iterators - */ - public MergedIterator(Iterator<E>... iterators) { - this(Arrays.asList(iterators).iterator()); - } - - @Override - public boolean hasNext() { - do { - if(current != null && current.hasNext()) { - return true; - } - if(!iterators.hasNext()) { - return false; - } - // advance master iterator and retry - current = iterators.next(); - } - while(true); - } - - @Override - public E next() { - while(!current.hasNext()) { - current = iterators.next(); - } - last = current; - return current.next(); - } - - @Override - public void remove() { - if(last == null) { - throw new RuntimeException("Iterator.remove() called without next()"); - } - last.remove(); - } -}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/utilities/iterator/TypeFilterIterator.java b/src/de/lmu/ifi/dbs/elki/utilities/iterator/TypeFilterIterator.java deleted file mode 100644 index 5aa908c8..00000000 --- a/src/de/lmu/ifi/dbs/elki/utilities/iterator/TypeFilterIterator.java +++ /dev/null @@ -1,86 +0,0 @@ -package de.lmu.ifi.dbs.elki.utilities.iterator; - -/* - This file is part of ELKI: - Environment for Developing KDD-Applications Supported by Index-Structures - - Copyright (C) 2012 - Ludwig-Maximilians-Universität München - Lehr- und Forschungseinheit für Datenbanksysteme - ELKI Development Team - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -import java.util.Iterator; - -/** - * Iterator that filters results by type. - * - * @author Erich Schubert - * - * @param <IN> Input datatype - * @param <OUT> Output datatype - */ -public class TypeFilterIterator<IN, OUT extends IN> extends AbstractFilteredIterator<IN, OUT> implements Iterator<OUT> { - /** - * Class restriction - */ - private Class<? super OUT> filterClass; - - /** - * Parent iterator - */ - private Iterator<IN> parent; - - /** - * Constructor. - * - * @param filterClass Filter - * @param parent Parent collection - */ - public TypeFilterIterator(Class<? super OUT> filterClass, Iterable<IN> parent) { - super(); - this.filterClass = filterClass; - this.parent = parent.iterator(); - } - - /** - * Constructor. - * - * @param filterClass Filter - * @param parent Parent iterator - */ - public TypeFilterIterator(Class<? super OUT> filterClass, Iterator<IN> parent) { - super(); - this.filterClass = filterClass; - this.parent = parent; - } - - @Override - protected Iterator<IN> getParentIterator() { - return parent; - } - - @SuppressWarnings("unchecked") - @Override - protected OUT testFilter(IN nextobj) { - try { - return (OUT) filterClass.cast(nextobj); - } - catch(ClassCastException e) { - return null; - } - } -}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/utilities/iterator/UnmodifiableListIterator.java b/src/de/lmu/ifi/dbs/elki/utilities/iterator/UnmodifiableListIterator.java deleted file mode 100644 index 7b9c9d79..00000000 --- a/src/de/lmu/ifi/dbs/elki/utilities/iterator/UnmodifiableListIterator.java +++ /dev/null @@ -1,98 +0,0 @@ -package de.lmu.ifi.dbs.elki.utilities.iterator; - -/* - This file is part of ELKI: - Environment for Developing KDD-Applications Supported by Index-Structures - - Copyright (C) 2012 - Ludwig-Maximilians-Universität München - Lehr- und Forschungseinheit für Datenbanksysteme - ELKI Development Team - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -import java.util.ListIterator; - -/** - * Iterator proxy that does not allow modifications. - * - * @author Erich Schubert - * - * @apiviz.stereotype decorator - * @apiviz.composedOf ListIterator oneway - - decorates - * - * @param <T> - */ -public final class UnmodifiableListIterator<T> implements ListIterator<T> { - /** - * Real iterator - */ - private ListIterator<T> inner; - - /** - * Constructor. - * - * @param inner Real iterator to proxy. - */ - public UnmodifiableListIterator(ListIterator<T> inner) { - super(); - this.inner = inner; - } - - @Override - public boolean hasNext() { - return inner.hasNext(); - } - - @Override - public T next() { - return inner.next(); - } - - @Override - public boolean hasPrevious() { - return inner.hasPrevious(); - } - - @Override - public T previous() { - return inner.previous(); - } - - @Override - public int nextIndex() { - return inner.nextIndex(); - } - - @Override - public int previousIndex() { - return inner.previousIndex(); - } - - @Override - public void remove() { - throw new UnsupportedOperationException(); - } - - @Override - public void add(T e) { - throw new UnsupportedOperationException(); - } - - @Override - public void set(T e) { - throw new UnsupportedOperationException(); - } -}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/utilities/iterator/package-info.java b/src/de/lmu/ifi/dbs/elki/utilities/iterator/package-info.java deleted file mode 100644 index 9a889d1d..00000000 --- a/src/de/lmu/ifi/dbs/elki/utilities/iterator/package-info.java +++ /dev/null @@ -1,26 +0,0 @@ -/** - * <p>Various {@link java.util.Iterator} decorators and adapters.</p> - */ -/* -This file is part of ELKI: -Environment for Developing KDD-Applications Supported by Index-Structures - -Copyright (C) 2012 -Ludwig-Maximilians-Universität München -Lehr- und Forschungseinheit für Datenbanksysteme -ELKI Development Team - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU Affero General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU Affero General Public License for more details. - -You should have received a copy of the GNU Affero General Public License -along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ -package de.lmu.ifi.dbs.elki.utilities.iterator;
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/AbstractParameterizer.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/AbstractParameterizer.java index 0a094ced..6aa22964 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/AbstractParameterizer.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/AbstractParameterizer.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/InternalParameterizationErrors.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/InternalParameterizationErrors.java index c8797c73..1cff200a 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/InternalParameterizationErrors.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/InternalParameterizationErrors.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -63,7 +63,7 @@ public class InternalParameterizationErrors extends ParameterException { */ public InternalParameterizationErrors(String message, Exception internalError) { super(message); - final ArrayList<Exception> errors = new ArrayList<Exception>(1); + final ArrayList<Exception> errors = new ArrayList<>(1); errors.add(internalError); this.internalErrors = errors; } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/NoParameterValueException.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/NoParameterValueException.java index ce422de7..53d992ff 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/NoParameterValueException.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/NoParameterValueException.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/OptionID.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/OptionID.java index 2c9814e3..60e51a98 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/OptionID.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/OptionID.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -32,85 +32,6 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling; */ public final class OptionID { /** - * Flag to obtain help-message. - * <p> - * Key: {@code -h} - * </p> - */ - public static final OptionID HELP = new OptionID("h", "Request a help-message, either for the main-routine or for any specified algorithm. " + "Causes immediate stop of the program."); - - /** - * Flag to obtain help-message. - * <p> - * Key: {@code -help} - * </p> - */ - public static final OptionID HELP_LONG = new OptionID("help", "Request a help-message, either for the main-routine or for any specified algorithm. " + "Causes immediate stop of the program."); - - /** - * OptionID for {@link de.lmu.ifi.dbs.elki.workflow.AlgorithmStep} - */ - public static final OptionID ALGORITHM = new OptionID("algorithm", "Algorithm to run."); - - /** - * Optional Parameter to specify a class to obtain a description for. - * <p> - * Key: {@code -description} - * </p> - */ - public static final OptionID DESCRIPTION = new OptionID("description", "Class to obtain a description of. " + "Causes immediate stop of the program."); - - /** - * Optional Parameter to specify a class to enable debugging for. - * <p> - * Key: {@code -enableDebug} - * </p> - */ - public static final OptionID DEBUG = new OptionID("enableDebug", "Parameter to enable debugging for particular packages."); - - /** - * OptionID for {@link de.lmu.ifi.dbs.elki.workflow.InputStep} - */ - public static final OptionID DATABASE = new OptionID("db", "Database class."); - - /** - * OptionID for {@link de.lmu.ifi.dbs.elki.workflow.InputStep} - */ - // TODO: move to database class? - public static final OptionID DATABASE_CONNECTION = new OptionID("dbc", "Database connection class."); - - /** - * OptionID for {@link de.lmu.ifi.dbs.elki.workflow.EvaluationStep} - */ - public static final OptionID EVALUATOR = new OptionID("evaluator", "Class to evaluate the results with."); - - /** - * OptionID for {@link de.lmu.ifi.dbs.elki.workflow.OutputStep} - */ - public static final OptionID RESULT_HANDLER = new OptionID("resulthandler", "Result handler class."); - - /** - * OptionID for the application output file/folder - */ - public static final OptionID OUTPUT = new OptionID("out", "Directory name (or name of an existing file) to write the obtained results in. " + "If this parameter is omitted, per default the output will sequentially be given to STDOUT."); - - /** - * Flag to allow verbose messages while running the application. - * <p> - * Key: {@code -verbose} - * </p> - */ - public static final OptionID VERBOSE_FLAG = new OptionID("verbose", "Enable verbose messages."); - - /** - * Flag to allow verbose messages while running the application. - * <p> - * Key: {@code -time} - * </p> - */ - public static final OptionID TIME_FLAG = new OptionID("time", "Enable logging of runtime data. Do not combine with more verbose logging, since verbose logging can significantly impact performance."); - - /** * Option name */ private String name; diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/OptionUtil.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/OptionUtil.java index 76ae9feb..ea28aa42 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/OptionUtil.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/OptionUtil.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/ParameterException.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/ParameterException.java index 0235b4a9..326335fd 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/ParameterException.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/ParameterException.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/Parameterizable.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/Parameterizable.java index 13f202dd..ead565b1 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/Parameterizable.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/Parameterizable.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,8 +23,6 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling; along with this program. If not, see <http://www.gnu.org/licenses/>. */ -import de.lmu.ifi.dbs.elki.utilities.InspectionUtilFrequentlyScanned; - /** * Interface to define the required methods for command line interaction. * @@ -96,7 +94,7 @@ import de.lmu.ifi.dbs.elki.utilities.InspectionUtilFrequentlyScanned; * @apiviz.uses de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization * .Parameterization oneway */ -public interface Parameterizable extends InspectionUtilFrequentlyScanned { +public interface Parameterizable { // Empty marker interface - the \@Description / \@Title / \@Reference and // constructor requirements cannot be specified in Java! } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/Parameterizer.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/Parameterizer.java index aa5c74bd..7e3a848c 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/Parameterizer.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/Parameterizer.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/UnspecifiedParameterException.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/UnspecifiedParameterException.java index ab138704..31af6d87 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/UnspecifiedParameterException.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/UnspecifiedParameterException.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/UnusedParameterException.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/UnusedParameterException.java index 0914c0f1..d5dbd4b2 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/UnusedParameterException.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/UnusedParameterException.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/WrongParameterValueException.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/WrongParameterValueException.java index c51eae0e..eeb6a9e7 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/WrongParameterValueException.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/WrongParameterValueException.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/AbstractNumberConstraint.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/AbstractNumberConstraint.java index f622aba5..fc4a673e 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/AbstractNumberConstraint.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/AbstractNumberConstraint.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/AllOrNoneMustBeSetGlobalConstraint.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/AllOrNoneMustBeSetGlobalConstraint.java index c2448ba5..a06a06c1 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/AllOrNoneMustBeSetGlobalConstraint.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/AllOrNoneMustBeSetGlobalConstraint.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -60,8 +60,8 @@ public class AllOrNoneMustBeSetGlobalConstraint implements GlobalParameterConstr @Override public void test() throws ParameterException { - ArrayList<String> set = new ArrayList<String>(); - ArrayList<String> notSet = new ArrayList<String>(); + ArrayList<String> set = new ArrayList<>(); + ArrayList<String> notSet = new ArrayList<>(); for(Parameter<?> p : parameterList) { if(p.isDefined()) { diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/EqualSizeGlobalConstraint.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/EqualSizeGlobalConstraint.java index 22d1ceb2..2ee7be9c 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/EqualSizeGlobalConstraint.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/EqualSizeGlobalConstraint.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/EqualStringConstraint.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/EqualStringConstraint.java index 4c4a8de3..1ab35cc1 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/EqualStringConstraint.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/EqualStringConstraint.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/GlobalListSizeConstraint.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/GlobalListSizeConstraint.java index 1bad7d00..bcedd342 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/GlobalListSizeConstraint.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/GlobalListSizeConstraint.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/GlobalParameterConstraint.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/GlobalParameterConstraint.java index b59c9493..5e38b2b7 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/GlobalParameterConstraint.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/GlobalParameterConstraint.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/GlobalVectorListElementSizeConstraint.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/GlobalVectorListElementSizeConstraint.java index 6bc9fd2d..01c99d42 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/GlobalVectorListElementSizeConstraint.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/GlobalVectorListElementSizeConstraint.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/GreaterConstraint.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/GreaterConstraint.java index 17a9a54d..33c73fb8 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/GreaterConstraint.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/GreaterConstraint.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/GreaterEqualConstraint.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/GreaterEqualConstraint.java index 7eb0de83..d05235c1 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/GreaterEqualConstraint.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/GreaterEqualConstraint.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/IntervalConstraint.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/IntervalConstraint.java index db6a1ed8..00c2f5ad 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/IntervalConstraint.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/IntervalConstraint.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/LessConstraint.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/LessConstraint.java index a8369304..5c4d0635 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/LessConstraint.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/LessConstraint.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/LessEqualConstraint.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/LessEqualConstraint.java index e35381b3..d81c821b 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/LessEqualConstraint.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/LessEqualConstraint.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/LessEqualGlobalConstraint.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/LessEqualGlobalConstraint.java index 9855b96d..1216e03b 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/LessEqualGlobalConstraint.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/LessEqualGlobalConstraint.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/LessGlobalConstraint.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/LessGlobalConstraint.java index e475e92f..a722edab 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/LessGlobalConstraint.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/LessGlobalConstraint.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/ListEachConstraint.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/ListEachConstraint.java index 7dab4006..918c59f5 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/ListEachConstraint.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/ListEachConstraint.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -46,7 +46,7 @@ public class ListEachConstraint<T> implements ParameterConstraint<List<T>> { */ public ListEachConstraint() { super(); - this.constraints = new ArrayList<ParameterConstraint<? super T>>(); + this.constraints = new ArrayList<>(); } /** @@ -56,7 +56,7 @@ public class ListEachConstraint<T> implements ParameterConstraint<List<T>> { */ public ListEachConstraint(ParameterConstraint<? super T> constraint) { super(); - this.constraints = new ArrayList<ParameterConstraint<? super T>>(1); + this.constraints = new ArrayList<>(1); this.constraints.add(constraint); } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/ListParameterNoDuplicateValueConstraint.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/ListParameterNoDuplicateValueConstraint.java index 6545cc9d..1ff23f80 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/ListParameterNoDuplicateValueConstraint.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/ListParameterNoDuplicateValueConstraint.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -57,7 +57,7 @@ public class ListParameterNoDuplicateValueConstraint<T extends Object> implement */ @Override public void test(List<T> list) throws ParameterException { - Set<T> values = new HashSet<T>(); + Set<T> values = new HashSet<>(); for(T pv : list) { if(!values.add(pv)) { diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/ListSizeConstraint.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/ListSizeConstraint.java index 82b3ae7e..95b5214b 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/ListSizeConstraint.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/ListSizeConstraint.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/NoDuplicateValueGlobalConstraint.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/NoDuplicateValueGlobalConstraint.java index 39ab2680..65b427a1 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/NoDuplicateValueGlobalConstraint.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/NoDuplicateValueGlobalConstraint.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -76,7 +76,7 @@ public class NoDuplicateValueGlobalConstraint implements GlobalParameterConstrai */ @Override public void test() throws ParameterException { - Set<Object> numbers = new HashSet<Object>(); + Set<Object> numbers = new HashSet<>(); for(Parameter<?> param : parameters) { if(param.isDefined()) { diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/OneMustBeSetGlobalConstraint.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/OneMustBeSetGlobalConstraint.java index c0109ac5..e7802685 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/OneMustBeSetGlobalConstraint.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/OneMustBeSetGlobalConstraint.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,6 +23,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints; along with this program. If not, see <http://www.gnu.org/licenses/>. */ +import java.util.Arrays; import java.util.List; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionUtil; @@ -54,6 +55,16 @@ public class OneMustBeSetGlobalConstraint implements GlobalParameterConstraint { } /** + * Creates a One-Must-Be-Set global parameter constraint. That is, at least + * one parameter value of the given list of parameters has to be set. + * + * @param params list of parameters + */ + public OneMustBeSetGlobalConstraint(Parameter<?>... params) { + parameters = Arrays.asList(params); + } + + /** * Checks if at least one parameter value of the list of parameters specified * is set. If not, a parameter exception is thrown. * diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/OnlyOneIsAllowedToBeSetGlobalConstraint.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/OnlyOneIsAllowedToBeSetGlobalConstraint.java index 22db8511..bdd55572 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/OnlyOneIsAllowedToBeSetGlobalConstraint.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/OnlyOneIsAllowedToBeSetGlobalConstraint.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -24,6 +24,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints; */ import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionUtil; @@ -55,17 +56,27 @@ public class OnlyOneIsAllowedToBeSetGlobalConstraint implements GlobalParameterC } /** + * Constructs a global parameter constraint for testing if only one parameter + * of a list of parameters is set. + * + * @param params list of parameters to be checked + */ + public OnlyOneIsAllowedToBeSetGlobalConstraint(Parameter<?>... params) { + parameters = Arrays.asList(params); + } + + /** * Checks if only one parameter of a list of parameters is set. If not, a * parameter exception is thrown. */ @Override public void test() throws ParameterException { - ArrayList<String> set = new ArrayList<String>(); - for(Parameter<?> p : parameters) { - if(p.isDefined()) { + ArrayList<String> set = new ArrayList<>(); + for (Parameter<?> p : parameters) { + if (p.isDefined()) { // FIXME: Retire the use of this constraint for Flags! - if(p instanceof Flag) { - if (((Flag)p).getValue().booleanValue()) { + if (p instanceof Flag) { + if (((Flag) p).getValue().booleanValue()) { set.add(p.getName()); } } else { @@ -73,7 +84,7 @@ public class OnlyOneIsAllowedToBeSetGlobalConstraint implements GlobalParameterC } } } - if(set.size() > 1) { + if (set.size() > 1) { throw new WrongParameterValueException("Global Parameter Constraint Error.\n" + "Only one of the parameters " + OptionUtil.optionsNamesToString(parameters) + " is allowed to be set. " + "Parameters currently set: " + set.toString()); } } @@ -82,4 +93,4 @@ public class OnlyOneIsAllowedToBeSetGlobalConstraint implements GlobalParameterC public String getDescription() { return "Only one of the parameters " + OptionUtil.optionsNamesToString(parameters) + " is allowed to be set."; } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/ParameterConstraint.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/ParameterConstraint.java index ff66ec05..2847260a 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/ParameterConstraint.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/ParameterConstraint.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/ParameterFlagGlobalConstraint.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/ParameterFlagGlobalConstraint.java index ff2e6675..ab77240a 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/ParameterFlagGlobalConstraint.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/ParameterFlagGlobalConstraint.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -49,7 +49,7 @@ public class ParameterFlagGlobalConstraint<S> implements GlobalParameterConstrai /** * Parameter possibly to be checked. */ - private Parameter<S> param; + private Parameter<? extends S> param; /** * Flag the checking of the parameter constraints is dependent on. @@ -64,7 +64,7 @@ public class ParameterFlagGlobalConstraint<S> implements GlobalParameterConstrai /** * List of parameter constraints. */ - private List<ParameterConstraint<? super S>> cons; + private List<? extends ParameterConstraint<? super S>> cons; /** * Constructs a global parameter constraint specifying that the testing of the @@ -78,7 +78,7 @@ public class ParameterFlagGlobalConstraint<S> implements GlobalParameterConstrai * @param flagConstraint indicates at which status of the flag the parameter * is to be checked */ - public ParameterFlagGlobalConstraint(Parameter<S> p, List<ParameterConstraint<? super S>> c, Flag f, boolean flagConstraint) { + public ParameterFlagGlobalConstraint(Parameter<? extends S> p, List<? extends ParameterConstraint<? super S>> c, Flag f, boolean flagConstraint) { param = p; flag = f; this.flagConstraint = flagConstraint; diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/StringLengthConstraint.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/StringLengthConstraint.java index 84d8fa6d..ff42a21f 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/StringLengthConstraint.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/StringLengthConstraint.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/package-info.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/package-info.java index 4b8c163a..e05ca2fa 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/package-info.java @@ -5,7 +5,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2012 +Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/package-info.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/package-info.java index 593c11d2..e4c5489b 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/package-info.java @@ -253,7 +253,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2012 +Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameterization/AbstractParameterization.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameterization/AbstractParameterization.java index 7cc0055e..75a5be86 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameterization/AbstractParameterization.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameterization/AbstractParameterization.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -46,7 +46,7 @@ public abstract class AbstractParameterization implements Parameterization { /** * Errors */ - List<ParameterException> errors = new ArrayList<ParameterException>(); + List<ParameterException> errors = new ArrayList<>(); /** * The logger of the class. @@ -89,7 +89,7 @@ public abstract class AbstractParameterization implements Parameterization { public synchronized void clearErrors() { // Do NOT use errors.clear(), since we might have an error report // referencing the collection! - errors = new ArrayList<ParameterException>(); + errors = new ArrayList<>(); } /** diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameterization/ChainedParameterization.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameterization/ChainedParameterization.java index a2fd168e..97cdb51f 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameterization/ChainedParameterization.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameterization/ChainedParameterization.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -44,7 +44,7 @@ public class ChainedParameterization extends AbstractParameterization { /** * Keep the list of parameterizations. */ - private List<Parameterization> chain = new ArrayList<Parameterization>(); + private List<Parameterization> chain = new ArrayList<>(); /** * Error target diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameterization/EmptyParameterization.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameterization/EmptyParameterization.java index 3391ff4a..02802593 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameterization/EmptyParameterization.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameterization/EmptyParameterization.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameterization/ListParameterization.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameterization/ListParameterization.java index 7d728280..b870b57b 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameterization/ListParameterization.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameterization/ListParameterization.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -44,7 +44,7 @@ public class ListParameterization extends AbstractParameterization { /** * The actual parameters, for storage */ - LinkedList<Pair<OptionID, Object>> parameters = new LinkedList<Pair<OptionID, Object>>(); + LinkedList<Pair<OptionID, Object>> parameters = new LinkedList<>(); /** * Default constructor. @@ -52,7 +52,7 @@ public class ListParameterization extends AbstractParameterization { public ListParameterization() { super(); } - + /** * Constructor with an existing collection. * @@ -81,9 +81,9 @@ public class ListParameterization extends AbstractParameterization { * @param value Value */ public void addParameter(OptionID optionid, Object value) { - parameters.add(new Pair<OptionID, Object>(optionid, value)); + parameters.add(new Pair<>(optionid, value)); } - + /** * Convenience - add a Flag option directly. * @@ -94,7 +94,7 @@ public class ListParameterization extends AbstractParameterization { addFlag(flag.getOptionID()); } } - + /** * Convenience - add a Parameter for forwarding * @@ -107,11 +107,11 @@ public class ListParameterization extends AbstractParameterization { } @Override - public boolean setValueForOption(Parameter<?> opt) throws ParameterException { + public boolean setValueForOption(Parameter<?> opt) throws ParameterException { Iterator<Pair<OptionID, Object>> iter = parameters.iterator(); - while(iter.hasNext()) { + while (iter.hasNext()) { Pair<OptionID, Object> pair = iter.next(); - if(pair.first == opt.getOptionID()) { + if (pair.first == opt.getOptionID()) { iter.remove(); opt.setValue(pair.second); return true; @@ -134,8 +134,8 @@ public class ListParameterization extends AbstractParameterization { return (parameters.size() > 0); } - /** {@inheritDoc} - * Default implementation, for flat parameterizations. + /** + * {@inheritDoc} Default implementation, for flat parameterizations. */ @Override public Parameterization descend(Object option) { @@ -158,11 +158,17 @@ public class ListParameterization extends AbstractParameterization { * @return Array list of parameters */ public ArrayList<String> serialize() { - ArrayList<String> params = new ArrayList<String>(); + ArrayList<String> params = new ArrayList<>(); for (Pair<OptionID, Object> pair : parameters) { params.add("-" + pair.getFirst().toString()); - params.add(pair.getSecond().toString()); + if (pair.getSecond() instanceof String) { + params.add((String) pair.getSecond()); + } else if (pair.getSecond() instanceof Class) { + params.add(((Class<?>) pair.getSecond()).getCanonicalName()); + } else { // Fallback: + params.add(pair.getSecond().toString()); + } } return params; } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameterization/MergedParameterization.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameterization/MergedParameterization.java index 580a3371..9f765963 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameterization/MergedParameterization.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameterization/MergedParameterization.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -69,7 +69,7 @@ public class MergedParameterization implements Parameterization { super(); this.inner = child; this.current = new ListParameterization(); - this.used = new ArrayList<Pair<OptionID, Object>>(); + this.used = new ArrayList<>(); } /** diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameterization/Parameterization.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameterization/Parameterization.java index 5cdff013..01c46583 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameterization/Parameterization.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameterization/Parameterization.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameterization/SerializedParameterization.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameterization/SerializedParameterization.java index b88b7dc4..b3e3f214 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameterization/SerializedParameterization.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameterization/SerializedParameterization.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -59,7 +59,7 @@ public class SerializedParameterization extends AbstractParameterization { */ public SerializedParameterization() { super(); - parameters = new LinkedList<String>(); + parameters = new LinkedList<>(); } /** diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameterization/TrackParameters.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameterization/TrackParameters.java index 424f54ee..5a20bd9e 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameterization/TrackParameters.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameterization/TrackParameters.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -54,18 +54,18 @@ public class TrackParameters implements Parameterization { /** * Tracking storage */ - List<Pair<Object, Parameter<?>>> options = new ArrayList<Pair<Object, Parameter<?>>>(); + List<Pair<Object, Parameter<?>>> options = new ArrayList<>(); /** * Tree information: parent links */ - Map<Object, Object> parents = new HashMap<Object, Object>(); + Map<Object, Object> parents = new HashMap<>(); /** * Tree information: child links */ // Implementation note: we need the map to support "null" keys! - Map<Object, List<Object>> children = new HashMap<Object, List<Object>>(); + Map<Object, List<Object>> children = new HashMap<>(); /** * Current parent for nested parameterization @@ -149,10 +149,10 @@ public class TrackParameters implements Parameterization { * @return Parameters given */ public Collection<Pair<OptionID, Object>> getGivenParameters() { - ArrayList<Pair<OptionID, Object>> ret = new ArrayList<Pair<OptionID, Object>>(); + ArrayList<Pair<OptionID, Object>> ret = new ArrayList<>(); for(Pair<Object, Parameter<?>> pair : options) { if(pair.second.isDefined() && pair.second.getGivenValue() != null) { - ret.add(new Pair<OptionID, Object>(pair.second.getOptionID(), pair.second.getGivenValue())); + ret.add(new Pair<>(pair.second.getOptionID(), pair.second.getGivenValue())); } } return ret; @@ -180,7 +180,7 @@ public class TrackParameters implements Parameterization { parents.put(opt, cur); List<Object> c = children.get(cur); if(c == null) { - c = new ArrayList<Object>(); + c = new ArrayList<>(); children.put(cur, c); } if(!c.contains(opt)) { diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameterization/UnParameterization.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameterization/UnParameterization.java index a04b3217..2a05a766 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameterization/UnParameterization.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameterization/UnParameterization.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -45,7 +45,7 @@ public class UnParameterization implements Parameterization { /** * Errors */ - List<ParameterException> errors = new ArrayList<ParameterException>(); + List<ParameterException> errors = new ArrayList<>(); @Override public boolean hasUnusedParameters() { diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameterization/package-info.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameterization/package-info.java index 41856ad7..ce9c89b4 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameterization/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameterization/package-info.java @@ -8,7 +8,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2012 +Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/AbstractParameter.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/AbstractParameter.java index ce4b094d..8e1b48c3 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/AbstractParameter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/AbstractParameter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -334,7 +334,7 @@ public abstract class AbstractParameter<T> implements Parameter<T> { @Override public void addConstraint(ParameterConstraint<? super T> constraint) { if (constraints == null) { - this.constraints = new ArrayList<ParameterConstraint<? super T>>(1); + this.constraints = new ArrayList<>(1); } constraints.add(constraint); } @@ -346,7 +346,7 @@ public abstract class AbstractParameter<T> implements Parameter<T> { */ public void addConstraints(Collection<? extends ParameterConstraint<? super T>> cs) { if (constraints == null) { - this.constraints = new ArrayList<ParameterConstraint<? super T>>(cs.size()); + this.constraints = new ArrayList<>(cs.size()); } constraints.addAll(cs); } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/ClassListParameter.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/ClassListParameter.java index f87e3973..35cd0573 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/ClassListParameter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/ClassListParameter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -48,11 +48,6 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameteriz // TODO: Add missing constructors. (ObjectListParameter also!) public class ClassListParameter<C> extends ListParameter<Class<? extends C>> { /** - * Class loader - */ - protected static final ClassLoader loader = ClassLoader.getSystemClassLoader(); - - /** * The restriction class for the list of class names. */ protected Class<C> restrictionClass; @@ -88,12 +83,12 @@ public class ClassListParameter<C> extends ListParameter<Class<? extends C>> { public String getValueAsString() { StringBuilder buf = new StringBuilder(); final String defPackage = restrictionClass.getPackage().getName() + "."; - for(Class<? extends C> c : getValue()) { - if(buf.length() > 0) { + for (Class<? extends C> c : getValue()) { + if (buf.length() > 0) { buf.append(LIST_SEP); } String name = c.getName(); - if(name.startsWith(defPackage)) { + if (name.startsWith(defPackage)) { name = name.substring(defPackage.length()); } buf.append(name); @@ -107,57 +102,41 @@ public class ClassListParameter<C> extends ListParameter<Class<? extends C>> { try { List<?> l = List.class.cast(obj); // do extra validation: - for(Object o : l) { - if(!(o instanceof Class)) { + for (Object o : l) { + if (!(o instanceof Class)) { throw new WrongParameterValueException("Wrong parameter format for parameter \"" + getName() + "\". Given list contains objects of different type!"); } } // TODO: can we use reflection to get extra checks? // TODO: Should we copy the list? return (List<Class<? extends C>>) l; - } - catch(ClassCastException e) { + } catch (ClassCastException e) { // continue with others } // Did we get a single class? try { - if(restrictionClass.isAssignableFrom((Class<?>) obj)) { - List<Class<? extends C>> clss = new ArrayList<Class<? extends C>>(1); + if (restrictionClass.isAssignableFrom((Class<?>) obj)) { + List<Class<? extends C>> clss = new ArrayList<>(1); clss.add((Class<? extends C>) obj); return clss; } - } - catch(ClassCastException e) { + } catch (ClassCastException e) { // continue with others } - if(obj instanceof String) { + if (obj instanceof String) { String[] classes = SPLIT.split((String) obj); // TODO: allow empty lists (and list constraints) to enforce length? - if(classes.length == 0) { + if (classes.length == 0) { throw new WrongParameterValueException("Wrong parameter format! Given list of classes for parameter \"" + getName() + "\" is either empty or has the wrong format!"); } - List<Class<? extends C>> cls = new ArrayList<Class<? extends C>>(classes.length); - for(String cl : classes) { - try { - Class<?> c; - try { - c = loader.loadClass(cl); - } - catch(ClassNotFoundException e) { - // try in package of restriction class - c = loader.loadClass(restrictionClass.getPackage().getName() + "." + cl); - } - // Redundant check, also in validate(), but not expensive. - if(!restrictionClass.isAssignableFrom(c)) { - throw new WrongParameterValueException(this, cl, "Class \"" + cl + "\" does not extend/implement restriction class " + restrictionClass + ".\n"); - } - else { - cls.add((Class<? extends C>) c); - } - } - catch(ClassNotFoundException e) { - throw new WrongParameterValueException(this, cl, "Class \"" + cl + "\" not found.\n", e); + List<Class<? extends C>> cls = new ArrayList<>(classes.length); + for (String cl : classes) { + Class<? extends C> clz = InspectionUtil.findImplementation(restrictionClass, cl); + if (clz != null) { + cls.add(clz); + } else { + throw new WrongParameterValueException(this, (String) obj, "Class '" + cl + "' not found for given value. Must be a subclass / implementation of " + restrictionClass.getName()); } } return cls; @@ -168,8 +147,8 @@ public class ClassListParameter<C> extends ListParameter<Class<? extends C>> { @Override protected boolean validate(List<Class<? extends C>> obj) throws ParameterException { - for(Class<? extends C> cls : obj) { - if(!restrictionClass.isAssignableFrom(cls)) { + for (Class<? extends C> cls : obj) { + if (!restrictionClass.isAssignableFrom(cls)) { throw new WrongParameterValueException(this, cls.getName(), "Class \"" + cls.getName() + "\" does not extend/implement restriction class " + restrictionClass + ".\n"); } } @@ -217,20 +196,19 @@ public class ClassListParameter<C> extends ListParameter<Class<? extends C>> { */ public List<C> instantiateClasses(Parameterization config) { config = config.descend(this); - List<C> instances = new ArrayList<C>(); - if(getValue() == null) { + List<C> instances = new ArrayList<>(); + if (getValue() == null) { config.reportError(new UnusedParameterException("Value of parameter " + getName() + " has not been specified.")); return instances; // empty list. } - for(Class<? extends C> cls : getValue()) { + for (Class<? extends C> cls : getValue()) { // NOTE: There is a duplication of this code in ObjectListParameter - keep // in sync! try { C instance = ClassGenericsUtil.tryInstantiate(restrictionClass, cls, config); instances.add(instance); - } - catch(Exception e) { + } catch (Exception e) { config.reportError(new WrongParameterValueException(this, cls.getName(), e)); } } @@ -247,26 +225,24 @@ public class ClassListParameter<C> extends ListParameter<Class<? extends C>> { public String restrictionString() { String prefix = restrictionClass.getPackage().getName() + "."; StringBuilder info = new StringBuilder(); - if(restrictionClass.isInterface()) { + if (restrictionClass.isInterface()) { info.append("Implementing "); - } - else { + } else { info.append("Extending "); } info.append(restrictionClass.getName()); info.append(FormatUtil.NEWLINE); List<Class<?>> known = getKnownImplementations(); - if(!known.isEmpty()) { + if (!known.isEmpty()) { info.append("Known classes (default package " + prefix + "):"); info.append(FormatUtil.NEWLINE); - for(Class<?> c : known) { + for (Class<?> c : known) { info.append("->" + FormatUtil.NONBREAKING_SPACE); String name = c.getName(); - if(name.startsWith(prefix)) { + if (name.startsWith(prefix)) { info.append(name.substring(prefix.length())); - } - else { + } else { info.append(name); } info.append(FormatUtil.NEWLINE); @@ -292,9 +268,9 @@ public class ClassListParameter<C> extends ListParameter<Class<? extends C>> { */ @Override public String getValuesDescription() { - if(restrictionClass != null && restrictionClass != Object.class) { + if (restrictionClass != null && restrictionClass != Object.class) { return restrictionString(); } return ""; } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/ClassParameter.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/ClassParameter.java index 6d63f9b5..a0669fca 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/ClassParameter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/ClassParameter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -51,16 +51,6 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameteriz // TODO: turn restrictionClass into a constraint? public class ClassParameter<C> extends AbstractParameter<Class<? extends C>> { /** - * Class loader. - */ - protected static final ClassLoader loader = ClassLoader.getSystemClassLoader(); - - /** - * Factory class postfix. - */ - public static final String FACTORY_POSTFIX = "$Factory"; - - /** * The restriction class for this class parameter. */ protected Class<C> restrictionClass; @@ -132,29 +122,9 @@ public class ClassParameter<C> extends AbstractParameter<Class<? extends C>> { return (Class<? extends C>) obj; } if (obj instanceof String) { - String value = (String) obj; - try { - // Try exact class factory first. - try { - return (Class<? extends C>) loader.loadClass(value + FACTORY_POSTFIX); - } catch (ClassNotFoundException e) { - // Ignore, retry - } - try { - return (Class<? extends C>) loader.loadClass(value); - } catch (ClassNotFoundException e) { - // Ignore, retry - } - // Try factory for guessed name next - try { - return (Class<? extends C>) loader.loadClass(restrictionClass.getPackage().getName() + "." + value + FACTORY_POSTFIX); - } catch (ClassNotFoundException e) { - // Ignore, retry - } - // Last try: guessed name prefix only - return (Class<? extends C>) loader.loadClass(restrictionClass.getPackage().getName() + "." + value); - } catch (ClassNotFoundException e) { - throw new WrongParameterValueException(this, value, "Given class \"" + value + "\" not found.", e); + Class<? extends C> clz = InspectionUtil.findImplementation(restrictionClass, (String) obj); + if (clz != null) { + return clz; } } throw new WrongParameterValueException(this, obj.toString(), "Class not found for given value. Must be a subclass / implementation of " + restrictionClass.getName()); @@ -330,9 +300,9 @@ public class ClassParameter<C> extends AbstractParameter<Class<? extends C>> { */ public static String canonicalClassName(Class<?> c, Class<?> parent) { if (parent == null) { - return canonicalClassName(c, null, FACTORY_POSTFIX); + return canonicalClassName(c, null, InspectionUtil.FACTORY_POSTFIX); } - return canonicalClassName(c, parent.getPackage(), FACTORY_POSTFIX); + return canonicalClassName(c, parent.getPackage(), InspectionUtil.FACTORY_POSTFIX); } @Override diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/DistanceParameter.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/DistanceParameter.java index f9943610..e97b6d0e 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/DistanceParameter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/DistanceParameter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/DoubleListParameter.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/DoubleListParameter.java index 885a3bfc..89cfc345 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/DoubleListParameter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/DoubleListParameter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -81,7 +81,7 @@ public class DoubleListParameter extends ListParameter<Double> { } if(obj instanceof String) { String[] values = SPLIT.split((String) obj); - ArrayList<Double> doubleValue = new ArrayList<Double>(values.length); + ArrayList<Double> doubleValue = new ArrayList<>(values.length); for(String val : values) { doubleValue.add(Double.valueOf(val)); } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/DoubleParameter.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/DoubleParameter.java index 4aee3511..632e1f8c 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/DoubleParameter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/DoubleParameter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/EnumParameter.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/EnumParameter.java index c2e1c733..4d05753c 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/EnumParameter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/EnumParameter.java @@ -111,14 +111,16 @@ public class EnumParameter<E extends Enum<E>> extends AbstractParameter<E> { @Override
protected E parseValue(Object obj) throws ParameterException {
- if(obj == null) {
+ if (obj == null) {
throw new UnspecifiedParameterException(this);
}
- if(obj instanceof String) {
+ if (enumClass.isInstance(obj)) {
+ return enumClass.cast(obj);
+ }
+ if (obj instanceof String) {
try {
return Enum.valueOf(enumClass, (String) obj);
- }
- catch(IllegalArgumentException ex) {
+ } catch (IllegalArgumentException ex) {
throw new WrongParameterValueException("Enum parameter " + getName() + " is invalid (must be one of [" + joinEnumNames(", ") + "].");
}
}
@@ -138,8 +140,8 @@ public class EnumParameter<E extends Enum<E>> extends AbstractParameter<E> { public Collection<String> getPossibleValues() {
// Convert to string array
final E[] enums = enumClass.getEnumConstants();
- ArrayList<String> values = new ArrayList<String>(enums.length);
- for(E t : enums) {
+ ArrayList<String> values = new ArrayList<>(enums.length);
+ for (E t : enums) {
values.add(t.name());
}
return values;
@@ -155,8 +157,8 @@ public class EnumParameter<E extends Enum<E>> extends AbstractParameter<E> { private String joinEnumNames(String separator) {
E[] enumTypes = enumClass.getEnumConstants();
StringBuilder sb = new StringBuilder();
- for(int i = 0; i < enumTypes.length; ++i) {
- if(i > 0) {
+ for (int i = 0; i < enumTypes.length; ++i) {
+ if (i > 0) {
sb.append(separator);
}
sb.append(enumTypes[i].name());
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/FileListParameter.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/FileListParameter.java index 203712aa..eb638298 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/FileListParameter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/FileListParameter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -108,7 +108,7 @@ public class FileListParameter extends ListParameter<File> { } if(obj instanceof String) { String[] values = SPLIT.split((String) obj); - ArrayList<File> fileValue = new ArrayList<File>(values.length); + ArrayList<File> fileValue = new ArrayList<>(values.length); for(String val : values) { fileValue.add(new File(val)); } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/FileParameter.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/FileParameter.java index 2cc5ab37..ea3fa454 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/FileParameter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/FileParameter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -137,4 +137,13 @@ public class FileParameter extends AbstractParameter<File> { public String getSyntax() { return "<file>"; } + + /** + * Get the file type (input / output) + * + * @return file type + */ + public FileType getFileType() { + return fileType; + } } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/Flag.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/Flag.java index 90230077..7587d2a5 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/Flag.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/Flag.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/IntListParameter.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/IntListParameter.java index f61f8842..93012955 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/IntListParameter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/IntListParameter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -90,7 +90,7 @@ public class IntListParameter extends ListParameter<Integer> { } if(obj instanceof String) { String[] values = SPLIT.split((String) obj); - ArrayList<Integer> intValue = new ArrayList<Integer>(values.length); + ArrayList<Integer> intValue = new ArrayList<>(values.length); for(String val : values) { intValue.add(Integer.valueOf(val)); } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/IntParameter.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/IntParameter.java index 22823e16..30457330 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/IntParameter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/IntParameter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/ListParameter.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/ListParameter.java index e71a744e..119fb121 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/ListParameter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/ListParameter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/LongParameter.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/LongParameter.java index ac8bd62c..f5d441b5 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/LongParameter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/LongParameter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/NumberParameter.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/NumberParameter.java index 8928c292..a4448d30 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/NumberParameter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/NumberParameter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/ObjectListParameter.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/ObjectListParameter.java index 85a32085..ada6239a 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/ObjectListParameter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/ObjectListParameter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -81,8 +81,8 @@ public class ObjectListParameter<C> extends ClassListParameter<C> { } if (List.class.isInstance(obj)) { List<?> l = (List<?>) obj; - ArrayList<C> inst = new ArrayList<C>(l.size()); - ArrayList<Class<? extends C>> classes = new ArrayList<Class<? extends C>>(l.size()); + ArrayList<C> inst = new ArrayList<>(l.size()); + ArrayList<Class<? extends C>> classes = new ArrayList<>(l.size()); for (Object o : l) { // does the given objects class fit? if (restrictionClass.isInstance(o)) { @@ -105,7 +105,7 @@ public class ObjectListParameter<C> extends ClassListParameter<C> { // Did we get a single instance? try { C inst = restrictionClass.cast(obj); - this.instances = new ArrayList<C>(1); + this.instances = new ArrayList<>(1); this.instances.add(inst); return super.parseValue(inst.getClass()); } catch (ClassCastException e) { @@ -118,7 +118,7 @@ public class ObjectListParameter<C> extends ClassListParameter<C> { public List<C> instantiateClasses(Parameterization config) { if (instances == null) { // instantiateClasses will descend itself. - instances = new ArrayList<C>(super.instantiateClasses(config)); + instances = new ArrayList<>(super.instantiateClasses(config)); } else { Parameterization cfg = null; for (int i = 0; i < instances.size(); i++) { @@ -138,6 +138,6 @@ public class ObjectListParameter<C> extends ClassListParameter<C> { } } } - return new ArrayList<C>(instances); + return new ArrayList<>(instances); } }
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/ObjectParameter.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/ObjectParameter.java index eff80954..4166d0a2 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/ObjectParameter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/ObjectParameter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/Parameter.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/Parameter.java index 6dadbf8f..110633b3 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/Parameter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/Parameter.java @@ -9,7 +9,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.ParameterConstra This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/PatternParameter.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/PatternParameter.java index 05c99d2b..b76edbbb 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/PatternParameter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/PatternParameter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/RandomParameter.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/RandomParameter.java index 34d01de5..6c0668dd 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/RandomParameter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/RandomParameter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/StringParameter.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/StringParameter.java index 06789dba..3a9bbf11 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/StringParameter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/StringParameter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/VectorListParameter.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/VectorListParameter.java index a9d01f22..906bfbd8 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/VectorListParameter.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/VectorListParameter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -29,7 +29,6 @@ import java.util.List; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; import de.lmu.ifi.dbs.elki.utilities.optionhandling.ParameterException; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.UnspecifiedParameterException; import de.lmu.ifi.dbs.elki.utilities.optionhandling.WrongParameterValueException; import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.ParameterConstraint; @@ -178,11 +177,11 @@ public class VectorListParameter extends ListParameter<List<Double>> { if(vectors.length == 0) { throw new WrongParameterValueException("Wrong parameter format! Given list of vectors for parameter \"" + getName() + "\" is empty!"); } - ArrayList<List<Double>> vecs = new ArrayList<List<Double>>(); + ArrayList<List<Double>> vecs = new ArrayList<>(); for(String vector : vectors) { String[] coordinates = SPLIT.split(vector); - ArrayList<Double> vectorCoord = new ArrayList<Double>(); + ArrayList<Double> vectorCoord = new ArrayList<>(); for(String coordinate : coordinates) { try { vectorCoord.add(Double.valueOf(coordinate)); diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/package-info.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/package-info.java index b6ff5216..75d7c5a1 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/package-info.java @@ -12,7 +12,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2012 +Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/package-info.java b/src/de/lmu/ifi/dbs/elki/utilities/package-info.java index 6a8126e1..1e110d8e 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/package-info.java @@ -38,7 +38,6 @@ * <li>{@link de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionUtil}: Managing parameter settings</li> * <li>{@link de.lmu.ifi.dbs.elki.utilities.InspectionUtil}: class and classpath inspection.</li> * <li>{@link de.lmu.ifi.dbs.elki.utilities.documentation.DocumentationUtil}: documentation extraction from annotations.</li> - * <li>{@link de.lmu.ifi.dbs.elki.persistent.PageFileUtil}: reporting page file accesses.</li> * <li>{@link de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.query.RStarTreeUtil}: reporting page file accesses.</li> * </ul></li> * </ul> @@ -47,7 +46,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2012 +Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/pairs/CPair.java b/src/de/lmu/ifi/dbs/elki/utilities/pairs/CPair.java index dabc623e..036eaea2 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/pairs/CPair.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/pairs/CPair.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.pairs; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/pairs/CTriple.java b/src/de/lmu/ifi/dbs/elki/utilities/pairs/CTriple.java index 2a07fc28..88ed35eb 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/pairs/CTriple.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/pairs/CTriple.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.pairs; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,8 +23,6 @@ package de.lmu.ifi.dbs.elki.utilities.pairs; along with this program. If not, see <http://www.gnu.org/licenses/>. */ -import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; - /** * Triple with canonical comparison function. * diff --git a/src/de/lmu/ifi/dbs/elki/utilities/pairs/DoubleDoublePair.java b/src/de/lmu/ifi/dbs/elki/utilities/pairs/DoubleDoublePair.java index 143c8014..092d6635 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/pairs/DoubleDoublePair.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/pairs/DoubleDoublePair.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.pairs; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -59,6 +59,15 @@ public class DoubleDoublePair implements Comparable<DoubleDoublePair>, PairInter } /** + * Clone constructor. + * + * @param other Existing pair. + */ + public DoubleDoublePair(DoubleDoublePair other) { + this(other.first, other.second); + } + + /** * Trivial equals implementation * * @param obj Object to compare to diff --git a/src/de/lmu/ifi/dbs/elki/utilities/pairs/DoubleIntPair.java b/src/de/lmu/ifi/dbs/elki/utilities/pairs/DoubleIntPair.java index 17698454..be18c712 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/pairs/DoubleIntPair.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/pairs/DoubleIntPair.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.pairs; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/pairs/DoubleObjPair.java b/src/de/lmu/ifi/dbs/elki/utilities/pairs/DoubleObjPair.java index 3bffd979..678d4532 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/pairs/DoubleObjPair.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/pairs/DoubleObjPair.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.pairs; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/pairs/FCPair.java b/src/de/lmu/ifi/dbs/elki/utilities/pairs/FCPair.java index 6724ddfb..4d2f707c 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/pairs/FCPair.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/pairs/FCPair.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.pairs; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/pairs/IntDoublePair.java b/src/de/lmu/ifi/dbs/elki/utilities/pairs/IntDoublePair.java index 368134b0..bfcf5971 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/pairs/IntDoublePair.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/pairs/IntDoublePair.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.pairs; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/pairs/IntIntPair.java b/src/de/lmu/ifi/dbs/elki/utilities/pairs/IntIntPair.java index 108afdef..5d3210a4 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/pairs/IntIntPair.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/pairs/IntIntPair.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.pairs; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/pairs/Pair.java b/src/de/lmu/ifi/dbs/elki/utilities/pairs/Pair.java index f682e06f..ca8db67d 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/pairs/Pair.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/pairs/Pair.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.pairs; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/pairs/PairInterface.java b/src/de/lmu/ifi/dbs/elki/utilities/pairs/PairInterface.java index 5ea33971..fde1f3a9 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/pairs/PairInterface.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/pairs/PairInterface.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.pairs; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/pairs/PairUtil.java b/src/de/lmu/ifi/dbs/elki/utilities/pairs/PairUtil.java index 01b27ca0..387d4f79 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/pairs/PairUtil.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/pairs/PairUtil.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.pairs; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -50,7 +50,7 @@ public final class PairUtil { * @return Comparator */ public static <FIRST extends Comparable<? super FIRST>, SECOND extends Comparable<? super SECOND>> Comparator<Pair<? extends FIRST, ? extends SECOND>> comparator() { - return new CompareNatural<FIRST, SECOND>(); + return new CompareNatural<>(); } /** @@ -63,7 +63,7 @@ public final class PairUtil { * @return Comparator */ public static <FIRST, SECOND> Comparator<Pair<? extends FIRST, ? extends SECOND>> comparator(Comparator<? super FIRST> c1, Comparator<? super SECOND> c2) { - return new Compare<FIRST, SECOND>(c1, c2); + return new Compare<>(c1, c2); } /** @@ -75,7 +75,7 @@ public final class PairUtil { * @return Comparator */ public static <FIRST extends Comparable<? super FIRST>, SECOND> Comparator<Pair<? extends FIRST, ? extends SECOND>> comparatorFirst() { - return new CompareNaturalFirst<FIRST, SECOND>(); + return new CompareNaturalFirst<>(); } /** @@ -88,7 +88,7 @@ public final class PairUtil { * @return Comparator */ public static <FIRST, SECOND> Comparator<Pair<? extends FIRST, ? extends SECOND>> comparatorFirst(Comparator<? super FIRST> c1) { - return new CompareByFirst<FIRST, SECOND>(c1); + return new CompareByFirst<>(c1); } /** @@ -100,7 +100,7 @@ public final class PairUtil { * @return Comparator */ public static <FIRST, SECOND extends Comparable<? super SECOND>> Comparator<Pair<? extends FIRST, ? extends SECOND>> comparatorSecond() { - return new CompareNaturalSecond<FIRST, SECOND>(); + return new CompareNaturalSecond<>(); } /** @@ -113,7 +113,7 @@ public final class PairUtil { * @return Comparator */ public static <FIRST, SECOND> Comparator<Pair<? extends FIRST, ? extends SECOND>> comparatorSecond(Comparator<? super SECOND> c2) { - return new CompareBySecond<FIRST, SECOND>(c2); + return new CompareBySecond<>(c2); } /** @@ -125,7 +125,7 @@ public final class PairUtil { * @return Comparator */ public static <FIRST extends Comparable<? super FIRST>, SECOND extends Comparable<? super SECOND>> Comparator<Pair<? extends FIRST, ? extends SECOND>> comparatorSwapped() { - return new CompareNaturalSwapped<FIRST, SECOND>(); + return new CompareNaturalSwapped<>(); } /** @@ -139,7 +139,7 @@ public final class PairUtil { * @return Comparator */ public static <FIRST, SECOND> Comparator<Pair<? extends FIRST, ? extends SECOND>> comparatorSwapped(Comparator<? super FIRST> c1, Comparator<? super SECOND> c2) { - return new CompareSwapped<FIRST, SECOND>(c1, c2); + return new CompareSwapped<>(c1, c2); } /** diff --git a/src/de/lmu/ifi/dbs/elki/utilities/pairs/SCPair.java b/src/de/lmu/ifi/dbs/elki/utilities/pairs/SCPair.java index 585c7a57..90d7f738 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/pairs/SCPair.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/pairs/SCPair.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.pairs; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/pairs/Triple.java b/src/de/lmu/ifi/dbs/elki/utilities/pairs/Triple.java index 2f75ec5a..4cac1e9c 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/pairs/Triple.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/pairs/Triple.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.pairs; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/pairs/package-info.java b/src/de/lmu/ifi/dbs/elki/utilities/pairs/package-info.java index 2fdf3770..6a4e9501 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/pairs/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/pairs/package-info.java @@ -37,7 +37,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2012 +Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/referencepoints/AxisBasedReferencePoints.java b/src/de/lmu/ifi/dbs/elki/utilities/referencepoints/AxisBasedReferencePoints.java index 26a4c7ed..24829d98 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/referencepoints/AxisBasedReferencePoints.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/referencepoints/AxisBasedReferencePoints.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.referencepoints; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -88,7 +88,7 @@ public class AxisBasedReferencePoints<V extends NumberVector<?>> implements Refe delta[d] = spacescale * (minmax.second.doubleValue(d) - mean[d]); } - ArrayList<V> result = new ArrayList<V>(2 + dim); + ArrayList<V> result = new ArrayList<>(2 + dim); double[] vec = new double[dim]; // Use min and max @@ -141,7 +141,7 @@ public class AxisBasedReferencePoints<V extends NumberVector<?>> implements Refe @Override protected AxisBasedReferencePoints<V> makeInstance() { - return new AxisBasedReferencePoints<V>(spacescale); + return new AxisBasedReferencePoints<>(spacescale); } } } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/referencepoints/FullDatabaseReferencePoints.java b/src/de/lmu/ifi/dbs/elki/utilities/referencepoints/FullDatabaseReferencePoints.java index 335a63f8..b92d0575 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/referencepoints/FullDatabaseReferencePoints.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/referencepoints/FullDatabaseReferencePoints.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.referencepoints; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/referencepoints/GridBasedReferencePoints.java b/src/de/lmu/ifi/dbs/elki/utilities/referencepoints/GridBasedReferencePoints.java index 60e47872..b94564cf 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/referencepoints/GridBasedReferencePoints.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/referencepoints/GridBasedReferencePoints.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.referencepoints; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -102,7 +102,7 @@ public class GridBasedReferencePoints<V extends NumberVector<?>> implements Refe } int gridpoints = Math.max(1, (int) Math.pow(gridres + 1, dim)); - ArrayList<V> result = new ArrayList<V>(gridpoints); + ArrayList<V> result = new ArrayList<>(gridpoints); double[] delta = new double[dim]; if (gridres > 0) { double halfgrid = gridres / 2.0; @@ -166,7 +166,7 @@ public class GridBasedReferencePoints<V extends NumberVector<?>> implements Refe @Override protected GridBasedReferencePoints<V> makeInstance() { - return new GridBasedReferencePoints<V>(gridres, gridscale); + return new GridBasedReferencePoints<>(gridres, gridscale); } } } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/referencepoints/RandomGeneratedReferencePoints.java b/src/de/lmu/ifi/dbs/elki/utilities/referencepoints/RandomGeneratedReferencePoints.java index 71e8e4c8..0a59d410 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/referencepoints/RandomGeneratedReferencePoints.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/referencepoints/RandomGeneratedReferencePoints.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.referencepoints; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -102,7 +102,7 @@ public class RandomGeneratedReferencePoints<V extends NumberVector<?>> implement delta[d] = (minmax.second.doubleValue(d + 1) - minmax.first.doubleValue(d + 1)); } - ArrayList<V> result = new ArrayList<V>(samplesize); + ArrayList<V> result = new ArrayList<>(samplesize); double[] vec = new double[dim]; for (int i = 0; i < samplesize; i++) { for (int d = 0; d < dim; d++) { @@ -153,7 +153,7 @@ public class RandomGeneratedReferencePoints<V extends NumberVector<?>> implement @Override protected RandomGeneratedReferencePoints<V> makeInstance() { - return new RandomGeneratedReferencePoints<V>(samplesize, scale); + return new RandomGeneratedReferencePoints<>(samplesize, scale); } } } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/referencepoints/RandomSampleReferencePoints.java b/src/de/lmu/ifi/dbs/elki/utilities/referencepoints/RandomSampleReferencePoints.java index 2cb778b3..a2a48b30 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/referencepoints/RandomSampleReferencePoints.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/referencepoints/RandomSampleReferencePoints.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.referencepoints; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -83,14 +83,14 @@ public class RandomSampleReferencePoints<V extends NumberVector<?>> implements R if(samplesize >= db.size()) { LoggingUtil.warning("Sample size is larger than database size!"); - ArrayList<V> selection = new ArrayList<V>(db.size()); + ArrayList<V> selection = new ArrayList<>(db.size()); for(DBIDIter iditer = db.iterDBIDs(); iditer.valid(); iditer.advance()) { selection.add(db.get(iditer)); } return selection; } - ArrayList<V> result = new ArrayList<V>(samplesize); + ArrayList<V> result = new ArrayList<>(samplesize); int dbsize = db.size(); // Guess the memory requirements of a hashmap. @@ -119,7 +119,7 @@ public class RandomSampleReferencePoints<V extends NumberVector<?>> implements R ids = null; // dirty! } else { - HashSet<Integer> selected = new HashSet<Integer>(); + HashSet<Integer> selected = new HashSet<>(); for(int i = 0; i < samplesize; i++) { int j = (int) Math.floor(Math.random() * dbsize); // Redraw from pool. @@ -159,7 +159,7 @@ public class RandomSampleReferencePoints<V extends NumberVector<?>> implements R @Override protected RandomSampleReferencePoints<V> makeInstance() { - return new RandomSampleReferencePoints<V>(samplesize); + return new RandomSampleReferencePoints<>(samplesize); } } }
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/utilities/referencepoints/ReferencePointsHeuristic.java b/src/de/lmu/ifi/dbs/elki/utilities/referencepoints/ReferencePointsHeuristic.java index 70114a32..672b5240 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/referencepoints/ReferencePointsHeuristic.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/referencepoints/ReferencePointsHeuristic.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.referencepoints; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/referencepoints/StarBasedReferencePoints.java b/src/de/lmu/ifi/dbs/elki/utilities/referencepoints/StarBasedReferencePoints.java index 0e562770..611100b4 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/referencepoints/StarBasedReferencePoints.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/referencepoints/StarBasedReferencePoints.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.referencepoints; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -117,7 +117,7 @@ public class StarBasedReferencePoints<V extends NumberVector<?>> implements Refe max[d] = (max[d] - centroid[d]) * scale + centroid[d]; } - ArrayList<V> result = new ArrayList<V>(2 * dim + 1); + ArrayList<V> result = new ArrayList<>(2 * dim + 1); if (!nocenter) { result.add(factory.newNumberVector(centroid)); } @@ -173,7 +173,7 @@ public class StarBasedReferencePoints<V extends NumberVector<?>> implements Refe @Override protected StarBasedReferencePoints<V> makeInstance() { - return new StarBasedReferencePoints<V>(nocenter, scale); + return new StarBasedReferencePoints<>(nocenter, scale); } } } diff --git a/src/de/lmu/ifi/dbs/elki/utilities/referencepoints/package-info.java b/src/de/lmu/ifi/dbs/elki/utilities/referencepoints/package-info.java index a8ce0bc5..6268d5d2 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/referencepoints/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/referencepoints/package-info.java @@ -7,7 +7,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2012 +Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/scaling/ClipScaling.java b/src/de/lmu/ifi/dbs/elki/utilities/scaling/ClipScaling.java index aad0b63a..068e1230 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/scaling/ClipScaling.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/scaling/ClipScaling.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.scaling; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/scaling/GammaScaling.java b/src/de/lmu/ifi/dbs/elki/utilities/scaling/GammaScaling.java index b23b9400..daff7275 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/scaling/GammaScaling.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/scaling/GammaScaling.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.scaling; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/scaling/IdentityScaling.java b/src/de/lmu/ifi/dbs/elki/utilities/scaling/IdentityScaling.java index a2202b92..7f24431d 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/scaling/IdentityScaling.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/scaling/IdentityScaling.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.scaling; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/scaling/LinearScaling.java b/src/de/lmu/ifi/dbs/elki/utilities/scaling/LinearScaling.java index 87f42835..ef28c434 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/scaling/LinearScaling.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/scaling/LinearScaling.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.scaling; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/scaling/MinusLogScaling.java b/src/de/lmu/ifi/dbs/elki/utilities/scaling/MinusLogScaling.java index b5007ad1..8e497dd9 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/scaling/MinusLogScaling.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/scaling/MinusLogScaling.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.scaling; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/scaling/ScalingFunction.java b/src/de/lmu/ifi/dbs/elki/utilities/scaling/ScalingFunction.java index 6bd0527f..c738aa9a 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/scaling/ScalingFunction.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/scaling/ScalingFunction.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.scaling; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/scaling/StaticScalingFunction.java b/src/de/lmu/ifi/dbs/elki/utilities/scaling/StaticScalingFunction.java index 228f5f1a..27d12db4 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/scaling/StaticScalingFunction.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/scaling/StaticScalingFunction.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.scaling; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/COPOutlierScaling.java b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/COPOutlierScaling.java new file mode 100644 index 00000000..202d1044 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/COPOutlierScaling.java @@ -0,0 +1,172 @@ +package de.lmu.ifi.dbs.elki.utilities.scaling.outlier; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Arrays; + +import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.meta.BestFitEstimator; +import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayLikeUtil; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; + +/** + * CDF based outlier score scaling. + * + * Enhanced version of the scaling proposed in: + * <p> + * H.-P. Kriegel, P. Kröger, E. Schubert, A. Zimek<br /> + * Interpreting and Unifying Outlier Scores<br /> + * Proc. 11th SIAM International Conference on Data Mining (SDM), Mesa, AZ, 2011 + * </p> + * + * See also: + * <p> + * Hans-Peter Kriegel, Peer Kröger, Erich Schubert, Arthur Zimek<br /> + * Outlier Detection in Arbitrarily Oriented Subspaces<br /> + * in: Proc. IEEE International Conference on Data Mining (ICDM 2012) + * </p> + * + * @author Erich Schubert + */ +@Reference(authors = "H.-P. Kriegel, P. Kröger, E. Schubert, A. Zimek", title = "Interpreting and Unifying Outlier Scores", booktitle = "Proc. 11th SIAM International Conference on Data Mining (SDM), Mesa, AZ, 2011", url = "http://siam.omnibooksonline.com/2011datamining/data/papers/018.pdf") +public class COPOutlierScaling implements OutlierScalingFunction { + /** + * Phi parameter. + */ + private double phi = 0.; + + /** + * Score distribution. + */ + private Distribution dist; + + /** + * Inversion flag. + */ + private boolean inverted = false; + + /** + * Constructor. + * + * @param phi Phi parameter + */ + public COPOutlierScaling(double phi) { + super(); + this.phi = phi; + } + + /** + * Secondary reference. + */ + @Reference(authors = "Hans-Peter Kriegel, Peer Kröger, Erich Schubert, Arthur Zimek", title = "Outlier Detection in Arbitrarily Oriented Subspaces", booktitle = "Proc. IEEE International Conference on Data Mining (ICDM 2012)") + public static final void secondReference() { + // Dummy, reference attachment point only. + } + + @Override + public double getScaled(double value) { + if (dist == null) { + throw new AbortException("Programming error: outlier scaling not initialized."); + } + double s = inverted ? (1 - dist.cdf(value)) : dist.cdf(value); + return (phi > 0.) ? (phi * s) / (1 - s + phi) : s; + } + + @Override + public double getMin() { + return 0.; + } + + @Override + public double getMax() { + return 1.; + } + + @Override + public void prepare(OutlierResult or) { + double[] s; + { + Relation<Double> scores = or.getScores(); + s = new double[scores.size()]; + int i = 0; + for (DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance(), i++) { + s[i] = scores.get(id); + } + } + Arrays.sort(s); + dist = BestFitEstimator.STATIC.estimate(s, ArrayLikeUtil.DOUBLEARRAYADAPTER); + inverted = (or.getOutlierMeta() instanceof InvertedOutlierScoreMeta); + } + + @Override + public <A> void prepare(A array, NumberArrayAdapter<?, A> adapter) { + double[] s = ArrayLikeUtil.toPrimitiveDoubleArray(array, adapter); + Arrays.sort(s); + dist = BestFitEstimator.STATIC.estimate(s, ArrayLikeUtil.DOUBLEARRAYADAPTER); + inverted = false; // Not supported + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + /** + * Phi parameter. + */ + public static final OptionID PHI_ID = new OptionID("copscaling.phi", "Phi parameter, expected rate of outliers. Set to 0 to use raw CDF values."); + + /** + * Phi value. + */ + private double phi = 0.; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + DoubleParameter phiP = new DoubleParameter(PHI_ID); + if (config.grab(phiP)) { + phi = phiP.doubleValue(); + } + } + + @Override + protected COPOutlierScaling makeInstance() { + return new COPOutlierScaling(phi); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/HeDESNormalizationOutlierScaling.java b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/HeDESNormalizationOutlierScaling.java index 4bf86f06..5e55c2e0 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/HeDESNormalizationOutlierScaling.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/HeDESNormalizationOutlierScaling.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.scaling.outlier; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -28,6 +28,7 @@ import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.math.DoubleMinMax; import de.lmu.ifi.dbs.elki.math.MeanVariance; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; /** @@ -63,9 +64,29 @@ public class HeDESNormalizationOutlierScaling implements OutlierScalingFunction DoubleMinMax minmax = new DoubleMinMax(); Relation<Double> scores = or.getScores(); - for(DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) { + for (DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) { double val = scores.get(id); - if(!Double.isNaN(val) && !Double.isInfinite(val)) { + if (!Double.isNaN(val) && !Double.isInfinite(val)) { + mv.put(val); + minmax.put(val); + } + } + + mean = mv.getMean(); + stddev = mv.getSampleStddev(); + scaledmax = getScaled(minmax.getMax()); + scaledmin = getScaled(minmax.getMin()); + } + + @Override + public <A> void prepare(A array, NumberArrayAdapter<?, A> adapter) { + MeanVariance mv = new MeanVariance(); + DoubleMinMax minmax = new DoubleMinMax(); + + final int size = adapter.size(array); + for (int i = 0; i < size; i++) { + double val = adapter.getDouble(array, i); + if (!Double.isNaN(val) && !Double.isInfinite(val)) { mv.put(val); minmax.put(val); } @@ -90,6 +111,10 @@ public class HeDESNormalizationOutlierScaling implements OutlierScalingFunction @Override public double getScaled(double value) { assert (stddev > 0 || (value == mean)) : "prepare() was not run prior to using the scaling function."; - return (value - mean) / stddev; + if (stddev > 0.) { + return (value - mean) / stddev; + } else { + return 0.; + } } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/MinusLogGammaScaling.java b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/MinusLogGammaScaling.java index f7cf0df0..effbcfe0 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/MinusLogGammaScaling.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/MinusLogGammaScaling.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.scaling.outlier; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/MinusLogStandardDeviationScaling.java b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/MinusLogStandardDeviationScaling.java index e34355a1..d30fbac7 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/MinusLogStandardDeviationScaling.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/MinusLogStandardDeviationScaling.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.scaling.outlier; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/MixtureModelOutlierScalingFunction.java b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/MixtureModelOutlierScalingFunction.java index e026b211..9c5ad920 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/MixtureModelOutlierScalingFunction.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/MixtureModelOutlierScalingFunction.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.scaling.outlier; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -31,6 +31,7 @@ import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.math.MathUtil; import de.lmu.ifi.dbs.elki.math.MeanVariance; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; /** @@ -45,7 +46,7 @@ public class MixtureModelOutlierScalingFunction implements OutlierScalingFunctio * The logger for this class. */ private static final Logging LOG = Logging.getLogger(MixtureModelOutlierScalingFunction.class); - + /** * Parameter mu of the gaussian distribution (outliers) */ @@ -122,14 +123,14 @@ public class MixtureModelOutlierScalingFunction implements OutlierScalingFunctio // Initial parameters - are these defaults sounds? MeanVariance mv = new MeanVariance(); Relation<Double> scores = or.getScores(); - for(DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) { + for (DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) { double val = scores.get(id); - if(!Double.isNaN(val) && !Double.isInfinite(val)) { + if (!Double.isNaN(val) && !Double.isInfinite(val)) { mv.put(val); } } double curMu = mv.getMean() * 2.; - if(curMu == 0) { + if (curMu == 0) { curMu = Double.MIN_NORMAL; } double curSigma = Math.max(mv.getSampleStddev(), Double.MIN_NORMAL); @@ -141,7 +142,7 @@ public class MixtureModelOutlierScalingFunction implements OutlierScalingFunctio int iter = 0; // logger.debugFine("iter #-1 mu = " + curMu + " sigma = " + curSigma + // " lambda = " + curLambda + " alpha = " + curAlpha); - while(true) { + while (true) { // E and M-Steps // Sum of weights double tisum = 0.0; @@ -149,7 +150,7 @@ public class MixtureModelOutlierScalingFunction implements OutlierScalingFunctio double wsum = 0.0; // Weighted deviation from previous mean double sqsum = 0.0; - for(int i = 0; i < ids.size(); i++) { + for (int i = 0; i < ids.size(); i++) { double val = or.getScores().get(ids.get(i)); // E-Step double ti = calcPosterior(val, curAlpha, curMu, curSigma, curLambda); @@ -158,7 +159,7 @@ public class MixtureModelOutlierScalingFunction implements OutlierScalingFunctio wsum += ti * val; sqsum += ti * val * val; // (val - curMu) * (val - curMu); } - if(tisum <= 0.0 || wsum <= 0.0) { + if (tisum <= 0.0 || wsum <= 0.0) { LOG.warning("MixtureModel Outlier Scaling converged to extreme."); break; } @@ -169,23 +170,115 @@ public class MixtureModelOutlierScalingFunction implements OutlierScalingFunctio // converged? { boolean changed = false; - if(Math.abs(newMu - curMu) > DELTA) { + if (Math.abs(newMu - curMu) > DELTA) { + changed = true; + } + if (Math.abs(newSigma - curSigma) > DELTA) { + changed = true; + } + if (Math.abs(newLambda - curLambda) > DELTA) { + changed = true; + } + if (Math.abs(newAlpha - curAlpha) > DELTA) { + changed = true; + } + if (!changed) { + break; + } + } + if (newSigma <= 0.0 || newAlpha <= 0.0) { + LOG.warning("MixtureModel Outlier Scaling converged to extreme."); + break; + } + // logger.debugFine("iter #"+iter+" mu = " + newMu + " sigma = " + + // newSigma + " lambda = " + newLambda + " alpha = " + newAlpha); + curMu = newMu; + curSigma = newSigma; + curLambda = newLambda; + curAlpha = newAlpha; + + iter++; + if (iter > 100) { + LOG.warning("Max iterations met in mixture model fitting."); + break; + } + } + mu = curMu; + sigma = curSigma; + lambda = curLambda; + alpha = curAlpha; + // logger.debugFine("mu = " + mu + " sigma = " + sigma + " lambda = " + + // lambda + " alpha = " + alpha); + } + + @Override + public <A> void prepare(A array, NumberArrayAdapter<?, A> adapter) { + // Initial parameters - are these defaults sounds? + MeanVariance mv = new MeanVariance(); + final int size = adapter.size(array); + for (int i = 0; i < size; i++) { + double val = adapter.getDouble(array, i); + if (!Double.isNaN(val) && !Double.isInfinite(val)) { + mv.put(val); + } + } + double curMu = mv.getMean() * 2.; + if (curMu == 0) { + curMu = Double.MIN_NORMAL; + } + double curSigma = Math.max(mv.getSampleStddev(), Double.MIN_NORMAL); + double curLambda = Math.min(1.0 / curMu, Double.MAX_VALUE); + double curAlpha = 0.05; + + // TODO: stop condition! + int iter = 0; + // logger.debugFine("iter #-1 mu = " + curMu + " sigma = " + curSigma + + // " lambda = " + curLambda + " alpha = " + curAlpha); + while (true) { + // E and M-Steps + // Sum of weights + double tisum = 0.0; + // Weighted sum + double wsum = 0.0; + // Weighted deviation from previous mean + double sqsum = 0.0; + for (int i = 0; i < size; i++) { + double val = adapter.getDouble(array, i); + // E-Step + double ti = calcPosterior(val, curAlpha, curMu, curSigma, curLambda); + // M-Step + tisum += ti; + wsum += ti * val; + sqsum += ti * val * val; // (val - curMu) * (val - curMu); + } + if (tisum <= 0.0 || wsum <= 0.0) { + LOG.warning("MixtureModel Outlier Scaling converged to extreme."); + break; + } + double newMu = wsum / tisum; + double newSigma = Math.max(Math.sqrt(sqsum / tisum - newMu * newMu), Double.MIN_NORMAL); + double newLambda = Math.min(tisum / wsum, Double.MAX_VALUE); + double newAlpha = tisum / size; + // converged? + { + boolean changed = false; + if (Math.abs(newMu - curMu) > DELTA) { changed = true; } - if(Math.abs(newSigma - curSigma) > DELTA) { + if (Math.abs(newSigma - curSigma) > DELTA) { changed = true; } - if(Math.abs(newLambda - curLambda) > DELTA) { + if (Math.abs(newLambda - curLambda) > DELTA) { changed = true; } - if(Math.abs(newAlpha - curAlpha) > DELTA) { + if (Math.abs(newAlpha - curAlpha) > DELTA) { changed = true; } - if(!changed) { + if (!changed) { break; } } - if(newSigma <= 0.0 || newAlpha <= 0.0) { + if (newSigma <= 0.0 || newAlpha <= 0.0) { LOG.warning("MixtureModel Outlier Scaling converged to extreme."); break; } @@ -197,7 +290,7 @@ public class MixtureModelOutlierScalingFunction implements OutlierScalingFunctio curAlpha = newAlpha; iter++; - if(iter > 100) { + if (iter > 100) { LOG.warning("Max iterations met in mixture model fitting."); break; } @@ -224,9 +317,9 @@ public class MixtureModelOutlierScalingFunction implements OutlierScalingFunctio public double getScaled(double value) { final double val = 1.0 - calcPosterior(value, alpha, mu, sigma, lambda); // Work around issues with unstable convergence. - if(Double.isNaN(val)) { + if (Double.isNaN(val)) { return 0.0; } return val; } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/MultiplicativeInverseScaling.java b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/MultiplicativeInverseScaling.java index 298a5853..d5cd3f40 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/MultiplicativeInverseScaling.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/MultiplicativeInverseScaling.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.scaling.outlier; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -26,6 +26,7 @@ package de.lmu.ifi.dbs.elki.utilities.scaling.outlier; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; /** @@ -68,16 +69,6 @@ public class MultiplicativeInverseScaling implements OutlierScalingFunction { @Override public void prepare(OutlierResult or) { - scaleval = getScaleValue(or); - } - - /** - * Compute the scaling value in a linear scan over the annotation. - * - * @param or Outlier result - * @return Scaling value. - */ - private static double getScaleValue(OutlierResult or) { double max = Double.MIN_VALUE; Relation<Double> scores = or.getScores(); for(DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) { @@ -87,9 +78,22 @@ public class MultiplicativeInverseScaling implements OutlierScalingFunction { max = Math.max(max, inv); } } - return max; + scaleval = max; } - + + @Override + public <A> void prepare(A array, NumberArrayAdapter<?, A> adapter) { + double max = Double.MIN_VALUE; + final int size = adapter.size(array); + for (int i = 0; i < size; i++) { + double inv = Math.abs(1.0 / adapter.getDouble(array, i)); + if(!Double.isInfinite(inv) && !Double.isNaN(inv)) { + max = Math.max(max, inv); + } + } + scaleval = max; + } + @Override public double getMin() { return 0.0; diff --git a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/OutlierGammaScaling.java b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/OutlierGammaScaling.java index 07e58679..7da0c933 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/OutlierGammaScaling.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/OutlierGammaScaling.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.scaling.outlier; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -29,6 +29,7 @@ import de.lmu.ifi.dbs.elki.math.MeanVariance; import de.lmu.ifi.dbs.elki.math.statistics.distribution.GammaDistribution; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; @@ -91,7 +92,7 @@ public class OutlierGammaScaling implements OutlierScalingFunction { public double getScaled(double value) { assert (theta > 0) : "prepare() was not run prior to using the scaling function."; value = preScale(value); - if(Double.isNaN(value) || Double.isInfinite(value)) { + if (Double.isNaN(value) || Double.isInfinite(value)) { return 1.0; } return Math.max(0, (GammaDistribution.regularizedGammaP(k, value / theta) - atmean) / (1 - atmean)); @@ -102,10 +103,29 @@ public class OutlierGammaScaling implements OutlierScalingFunction { meta = or.getOutlierMeta(); MeanVariance mv = new MeanVariance(); Relation<Double> scores = or.getScores(); - for(DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) { + for (DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) { double score = scores.get(id); score = preScale(score); - if(!Double.isNaN(score) && !Double.isInfinite(score)) { + if (!Double.isNaN(score) && !Double.isInfinite(score)) { + mv.put(score); + } + } + final double mean = mv.getMean(); + final double var = mv.getSampleVariance(); + k = (mean * mean) / var; + theta = var / mean; + atmean = GammaDistribution.regularizedGammaP(k, mean / theta); + // logger.warning("Mean:"+mean+" Var:"+var+" Theta: "+theta+" k: "+k+" valatmean"+atmean); + } + + @Override + public <A> void prepare(A array, NumberArrayAdapter<?, A> adapter) { + MeanVariance mv = new MeanVariance(); + final int size = adapter.size(array); + for (int i = 0; i < size; i++) { + double score = adapter.getDouble(array, i); + score = preScale(score); + if (!Double.isNaN(score) && !Double.isInfinite(score)) { mv.put(score); } } @@ -126,7 +146,7 @@ public class OutlierGammaScaling implements OutlierScalingFunction { * @return Normalized score. */ protected double preScale(double score) { - if(normalize) { + if (normalize) { score = meta.normalizeScore(score); } return score; @@ -156,7 +176,7 @@ public class OutlierGammaScaling implements OutlierScalingFunction { protected void makeOptions(Parameterization config) { super.makeOptions(config); Flag normalizeF = new Flag(NORMALIZE_ID); - if(config.grab(normalizeF)) { + if (config.grab(normalizeF)) { normalize = normalizeF.getValue(); } } @@ -166,4 +186,4 @@ public class OutlierGammaScaling implements OutlierScalingFunction { return new OutlierGammaScaling(normalize); } } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/OutlierLinearScaling.java b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/OutlierLinearScaling.java index 8f008176..dfae1068 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/OutlierLinearScaling.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/OutlierLinearScaling.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.scaling.outlier; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -30,6 +30,7 @@ import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.math.DoubleMinMax; import de.lmu.ifi.dbs.elki.math.MeanVariance; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GlobalParameterConstraint; @@ -136,7 +137,7 @@ public class OutlierLinearScaling implements OutlierScalingFunction { @Override public double getScaled(double value) { assert (factor != 0) : "prepare() was not run prior to using the scaling function."; - if(value <= min) { + if (value <= min) { return 0; } return Math.min(1, ((value - min) / factor)); @@ -144,53 +145,107 @@ public class OutlierLinearScaling implements OutlierScalingFunction { @Override public void prepare(OutlierResult or) { - if(usemean) { + if (usemean) { MeanVariance mv = new MeanVariance(); DoubleMinMax mm = (max == null) ? new DoubleMinMax() : null; boolean skippedzeros = false; Relation<Double> scores = or.getScores(); - for(DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) { + for (DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) { double val = scores.get(id); - if(nozeros && val == 0.0) { + if (nozeros && val == 0.0) { skippedzeros = true; continue; } - if(!Double.isNaN(val) && !Double.isInfinite(val)) { + if (!Double.isNaN(val) && !Double.isInfinite(val)) { mv.put(val); } - if(max == null) { + if (max == null) { mm.put(val); } } - if(skippedzeros && mm.getMin() == mm.getMax()) { + if (skippedzeros && mm.getMin() == mm.getMax()) { mm.put(0.0); mv.put(0.0); } min = mv.getMean(); - if(max == null) { + if (max == null) { max = mm.getMax(); } - } - else { - if(min == null || max == null) { + } else { + if (min == null || max == null) { boolean skippedzeros = false; DoubleMinMax mm = new DoubleMinMax(); Relation<Double> scores = or.getScores(); - for(DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) { + for (DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) { double val = scores.get(id); - if(nozeros && val == 0.0) { + if (nozeros && val == 0.0) { + skippedzeros = true; + continue; + } + mm.put(val); + } + if (skippedzeros && mm.getMin() == mm.getMax()) { + mm.put(0.0); + } + if (min == null) { + min = mm.getMin(); + } + if (max == null) { + max = mm.getMax(); + } + } + } + factor = (max - min); + } + + @Override + public <A> void prepare(A array, NumberArrayAdapter<?, A> adapter) { + if (usemean) { + MeanVariance mv = new MeanVariance(); + DoubleMinMax mm = (max == null) ? new DoubleMinMax() : null; + boolean skippedzeros = false; + final int size = adapter.size(array); + for (int i = 0; i < size; i++) { + double val = adapter.getDouble(array, i); + if (nozeros && val == 0.0) { + skippedzeros = true; + continue; + } + if (!Double.isNaN(val) && !Double.isInfinite(val)) { + mv.put(val); + } + if (max == null) { + mm.put(val); + } + } + if (skippedzeros && mm.getMin() == mm.getMax()) { + mm.put(0.0); + mv.put(0.0); + } + min = mv.getMean(); + if (max == null) { + max = mm.getMax(); + } + } else { + if (min == null || max == null) { + boolean skippedzeros = false; + DoubleMinMax mm = new DoubleMinMax(); + final int size = adapter.size(array); + for (int i = 0; i < size; i++) { + double val = adapter.getDouble(array, i); + if (nozeros && val == 0.0) { skippedzeros = true; continue; } mm.put(val); } - if(skippedzeros && mm.getMin() == mm.getMax()) { + if (skippedzeros && mm.getMin() == mm.getMax()) { mm.put(0.0); } - if(min == null) { + if (min == null) { min = mm.getMin(); } - if(max == null) { + if (max == null) { max = mm.getMax(); } } @@ -241,28 +296,28 @@ public class OutlierLinearScaling implements OutlierScalingFunction { super.makeOptions(config); DoubleParameter minP = new DoubleParameter(MIN_ID); minP.setOptional(true); - if(config.grab(minP)) { + if (config.grab(minP)) { min = minP.getValue(); } DoubleParameter maxP = new DoubleParameter(MAX_ID); maxP.setOptional(true); - if(config.grab(maxP)) { + if (config.grab(maxP)) { max = maxP.getValue(); } Flag meanF = new Flag(MEAN_ID); - if(config.grab(meanF)) { + if (config.grab(meanF)) { usemean = meanF.getValue(); } Flag nozerosF = new Flag(NOZEROS_ID); - if(config.grab(nozerosF)) { + if (config.grab(nozerosF)) { nozeros = nozerosF.getValue(); } // Use-Mean and Minimum value must not be set at the same time! - ArrayList<Parameter<?>> minmean = new ArrayList<Parameter<?>>(); + ArrayList<Parameter<?>> minmean = new ArrayList<>(); minmean.add(minP); minmean.add(meanF); GlobalParameterConstraint gpc = new OnlyOneIsAllowedToBeSetGlobalConstraint(minmean); @@ -274,4 +329,4 @@ public class OutlierLinearScaling implements OutlierScalingFunction { return new OutlierLinearScaling(min, max, usemean, nozeros); } } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/OutlierMinusLogScaling.java b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/OutlierMinusLogScaling.java index 45c6928b..f0e04913 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/OutlierMinusLogScaling.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/OutlierMinusLogScaling.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.scaling.outlier; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -27,6 +27,7 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.math.DoubleMinMax; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; /** @@ -79,13 +80,27 @@ public class OutlierMinusLogScaling implements OutlierScalingFunction { public void prepare(OutlierResult or) { DoubleMinMax mm = new DoubleMinMax(); Relation<Double> scores = or.getScores(); - for(DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) { + for (DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) { double val = scores.get(id); - if(!Double.isNaN(val) && !Double.isInfinite(val)) { + if (!Double.isNaN(val) && !Double.isInfinite(val)) { mm.put(val); } } max = mm.getMax(); mlogmax = -Math.log(mm.getMin() / max); } -}
\ No newline at end of file + + @Override + public <A> void prepare(A array, NumberArrayAdapter<?, A> adapter) { + DoubleMinMax mm = new DoubleMinMax(); + final int size = adapter.size(array); + for (int i = 0; i < size; i++) { + double val = adapter.getDouble(array, i); + if (!Double.isNaN(val) && !Double.isInfinite(val)) { + mm.put(val); + } + } + max = mm.getMax(); + mlogmax = -Math.log(mm.getMin() / max); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/OutlierScalingFunction.java b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/OutlierScalingFunction.java index ac774745..9de05e78 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/OutlierScalingFunction.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/OutlierScalingFunction.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.scaling.outlier; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -24,6 +24,7 @@ package de.lmu.ifi.dbs.elki.utilities.scaling.outlier; */ import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; import de.lmu.ifi.dbs.elki.utilities.scaling.ScalingFunction; /** @@ -37,9 +38,22 @@ public interface OutlierScalingFunction extends ScalingFunction { /** * Prepare is called once for each data set, before getScaled() will be * called. This function can be used to extract global parameters such as - * means, minimums or maximums from the Database, Result or Annotation. + * means, minimums or maximums from the outlier scores. * * @param or Outlier result to use */ public void prepare(OutlierResult or); + + /** + * Prepare is called once for each data set, before getScaled() will be + * called. This function can be used to extract global parameters such as + * means, minimums or maximums from the score array. + * + * The method using a full {@link OutlierResult} is preferred, as it will + * allow access to the metadata. + * + * @param array Data to process + * @param adapter Array adapter + */ + public <A> void prepare(A array, NumberArrayAdapter<?, A> adapter); }
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/OutlierSqrtScaling.java b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/OutlierSqrtScaling.java index 41a4d721..302602d4 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/OutlierSqrtScaling.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/OutlierSqrtScaling.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.scaling.outlier; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -27,6 +27,7 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.math.DoubleMinMax; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; @@ -43,30 +44,14 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; */ public class OutlierSqrtScaling implements OutlierScalingFunction { /** - * Parameter to specify the fixed minimum to use. - * <p> - * Key: {@code -sqrtscale.min} - * </p> + * Minimum and maximum values. */ - public static final OptionID MIN_ID = new OptionID("sqrtscale.min", "Fixed minimum to use in sqrt scaling."); + protected double min, max; /** - * Parameter to specify the fixed maximum to use. - * <p> - * Key: {@code -sqrtscale.max} - * </p> + * Predefined minimum and maximum values. */ - public static final OptionID MAX_ID = new OptionID("sqrtscale.max", "Fixed maximum to use in sqrt scaling."); - - /** - * Field storing the minimum value - */ - protected Double min = null; - - /** - * Field storing the Maximum value - */ - protected Double max = null; + protected Double pmin = null, pmax = null; /** * Scaling factor @@ -76,19 +61,19 @@ public class OutlierSqrtScaling implements OutlierScalingFunction { /** * Constructor. * - * @param min - * @param max + * @param pmin Predefined minimum + * @param pmax Predefined maximum */ - public OutlierSqrtScaling(Double min, Double max) { + public OutlierSqrtScaling(Double pmin, Double pmax) { super(); - this.min = min; - this.max = max; + this.pmin = pmin; + this.pmax = pmax; } @Override public double getScaled(double value) { assert (factor != 0) : "prepare() was not run prior to using the scaling function."; - if(value <= min) { + if (value <= min) { return 0; } return Math.min(1, (Math.sqrt(value - min) / factor)); @@ -96,21 +81,34 @@ public class OutlierSqrtScaling implements OutlierScalingFunction { @Override public void prepare(OutlierResult or) { - if(min == null || max == null) { + if (pmin == null || pmax == null) { DoubleMinMax mm = new DoubleMinMax(); Relation<Double> scores = or.getScores(); - for(DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) { + for (DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) { double val = scores.get(id); - if(!Double.isNaN(val) && !Double.isInfinite(val)) { + if (!Double.isInfinite(val)) { mm.put(val); } } - if(min == null) { - min = mm.getMin(); - } - if(max == null) { - max = mm.getMax(); + min = (pmin == null) ? mm.getMin() : pmin; + max = (pmax == null) ? mm.getMax() : pmax; + } + factor = Math.sqrt(max - min); + } + + @Override + public <A> void prepare(A array, NumberArrayAdapter<?, A> adapter) { + if (pmin == null || pmax == null) { + DoubleMinMax mm = new DoubleMinMax(); + final int size = adapter.size(array); + for (int i = 0; i < size; i++) { + double val = adapter.getDouble(array, i); + if (!Double.isInfinite(val)) { + mm.put(val); + } } + min = (pmin == null) ? mm.getMin() : pmin; + max = (pmax == null) ? mm.getMax() : pmax; } factor = Math.sqrt(max - min); } @@ -133,8 +131,30 @@ public class OutlierSqrtScaling implements OutlierScalingFunction { * @apiviz.exclude */ public static class Parameterizer extends AbstractParameterizer { + /** + * Parameter to specify the fixed minimum to use. + * <p> + * Key: {@code -sqrtscale.min} + * </p> + */ + public static final OptionID MIN_ID = new OptionID("sqrtscale.min", "Fixed minimum to use in sqrt scaling."); + + /** + * Parameter to specify the fixed maximum to use. + * <p> + * Key: {@code -sqrtscale.max} + * </p> + */ + public static final OptionID MAX_ID = new OptionID("sqrtscale.max", "Fixed maximum to use in sqrt scaling."); + + /** + * Predefined minimum value. + */ protected double min; + /** + * Predefined maximum value. + */ protected double max; @Override @@ -142,12 +162,12 @@ public class OutlierSqrtScaling implements OutlierScalingFunction { super.makeOptions(config); DoubleParameter minP = new DoubleParameter(MIN_ID); minP.setOptional(true); - if(config.grab(minP)) { + if (config.grab(minP)) { min = minP.getValue(); } DoubleParameter maxP = new DoubleParameter(MAX_ID); maxP.setOptional(true); - if(config.grab(maxP)) { + if (config.grab(maxP)) { max = maxP.getValue(); } } @@ -157,4 +177,4 @@ public class OutlierSqrtScaling implements OutlierScalingFunction { return new OutlierSqrtScaling(min, max); } } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/RankingPseudoOutlierScaling.java b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/RankingPseudoOutlierScaling.java index 91f66587..505f2002 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/RankingPseudoOutlierScaling.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/RankingPseudoOutlierScaling.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.scaling.outlier; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -29,6 +29,8 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayLikeUtil; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; /** @@ -69,6 +71,12 @@ public class RankingPseudoOutlierScaling implements OutlierScalingFunction { // TODO: Inverted scores! Arrays.sort(scores); } + + @Override + public <A> void prepare(A array, NumberArrayAdapter<?, A> adapter) { + scores = ArrayLikeUtil.toPrimitiveDoubleArray(array, adapter); + Arrays.sort(scores); + } @Override public double getMax() { diff --git a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/SigmoidOutlierScalingFunction.java b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/SigmoidOutlierScalingFunction.java index 16115e92..34911169 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/SigmoidOutlierScalingFunction.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/SigmoidOutlierScalingFunction.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.scaling.outlier; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -26,12 +26,14 @@ package de.lmu.ifi.dbs.elki.utilities.scaling.outlier; import java.util.BitSet; import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs; +import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.math.MeanVariance; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; /** @@ -46,7 +48,7 @@ public class SigmoidOutlierScalingFunction implements OutlierScalingFunction { * The logger for this class. */ private static final Logging LOG = Logging.getLogger(SigmoidOutlierScalingFunction.class); - + /** * Sigmoid parameter */ @@ -62,30 +64,31 @@ public class SigmoidOutlierScalingFunction implements OutlierScalingFunction { // Initial parameters - are these defaults sounds? MeanVariance mv = new MeanVariance(); Relation<Double> scores = or.getScores(); - for(DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) { + for (DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) { double val = scores.get(id); mv.put(val); } double a = 1.0; - double b = - mv.getMean(); + double b = -mv.getMean(); int iter = 0; ArrayDBIDs ids = DBIDUtil.ensureArray(or.getScores().getDBIDs()); + DBIDArrayIter it = ids.iter(); BitSet t = new BitSet(ids.size()); boolean changing = true; - while(changing) { + while (changing) { changing = false; // E-Step - for(int i = 0; i < ids.size(); i++) { - double val = or.getScores().get(ids.get(i)); + it.seek(0); + for (int i = 0; i < ids.size(); i++, it.advance()) { + double val = or.getScores().get(it); double targ = a * val + b; - if(targ > 0) { - if (!t.get(i)) { + if (targ > 0) { + if (!t.get(i)) { t.set(i); changing = true; } - } - else { + } else { if (t.get(i)) { t.clear(i); changing = true; @@ -95,7 +98,7 @@ public class SigmoidOutlierScalingFunction implements OutlierScalingFunction { if (!changing) { break; } - //logger.debugFine("Number of outliers in sigmoid: " + t.cardinality()); + // logger.debugFine("Number of outliers in sigmoid: " + t.cardinality()); // M-Step // Implementation based on:<br /> // H.-T. Lin, C.-J. Lin, R. C. Weng:<br /> @@ -107,14 +110,74 @@ public class SigmoidOutlierScalingFunction implements OutlierScalingFunction { } iter++; - if(iter > 100) { + if (iter > 100) { + LOG.warning("Max iterations met in sigmoid fitting."); + break; + } + } + Afinal = a; + Bfinal = b; + LOG.debugFine("A = " + Afinal + " B = " + Bfinal); + } + + @Override + public <A> void prepare(A array, NumberArrayAdapter<?, A> adapter) { + // Initial parameters - are these defaults sounds? + MeanVariance mv = new MeanVariance(); + final int size = adapter.size(array); + for (int i = 0; i < size; i++) { + double val = adapter.getDouble(array, i); + if (!Double.isInfinite(val)) { + mv.put(val); + } + } + double a = 1.0; + double b = -mv.getMean(); + int iter = 0; + + BitSet t = new BitSet(size); + boolean changing = true; + while (changing) { + changing = false; + // E-Step + for (int i = 0; i < size; i++) { + double val = adapter.getDouble(array, i); + double targ = a * val + b; + if (targ > 0) { + if (!t.get(i)) { + t.set(i); + changing = true; + } + } else { + if (t.get(i)) { + t.clear(i); + changing = true; + } + } + } + if (!changing) { + break; + } + // logger.debugFine("Number of outliers in sigmoid: " + t.cardinality()); + // M-Step + // Implementation based on:<br /> + // H.-T. Lin, C.-J. Lin, R. C. Weng:<br /> + // A Note on Platt’s Probabilistic Outputs for Support Vector Machines + { + double[] newab = MStepLevenbergMarquardt(a, b, t, array, adapter); + a = newab[0]; + b = newab[1]; + } + + iter++; + if (iter > 100) { LOG.warning("Max iterations met in sigmoid fitting."); break; } } Afinal = a; Bfinal = b; - LOG.debugFine("A = "+Afinal+" B = "+Bfinal); + LOG.debugFine("A = " + Afinal + " B = " + Bfinal); } /** @@ -136,6 +199,7 @@ public class SigmoidOutlierScalingFunction implements OutlierScalingFunction { private final double[] MStepLevenbergMarquardt(double a, double b, ArrayDBIDs ids, BitSet t, Relation<Double> scores) { final int prior1 = t.cardinality(); final int prior0 = ids.size() - prior1; + DBIDArrayIter iter = ids.iter(); final int maxiter = 10; final double minstep = 1e-8; @@ -147,38 +211,38 @@ public class SigmoidOutlierScalingFunction implements OutlierScalingFunction { // t[i] := t.get(i) ? hiTarget : loTarget. // Reset, or continue with previous values? - //a = 0.0; - //b = Math.log((prior0 + 1.0) / (prior1 + 1.0)); + // a = 0.0; + // b = Math.log((prior0 + 1.0) / (prior1 + 1.0)); double fval = 0.0; - for(int i = 0; i < ids.size(); i++) { - final double val = scores.get(ids.get(i)); + iter.seek(0); + for (int i = 0; i < ids.size(); i++, iter.advance()) { + final double val = scores.get(iter); final double fApB = val * a + b; final double ti = t.get(i) ? hiTarget : loTarget; - if(fApB >= 0) { + if (fApB >= 0) { fval += ti * fApB + Math.log(1 + Math.exp(-fApB)); - } - else { + } else { fval += (ti - 1) * fApB + Math.log(1 + Math.exp(fApB)); } } - for(int it = 0; it < maxiter; it++) { - //logger.debugFinest("Iter: " + it + "a: " + a + " b: " + b); + for (int it = 0; it < maxiter; it++) { + // logger.debugFinest("Iter: " + it + "a: " + a + " b: " + b); // Update Gradient and Hessian (use H’ = H + sigma I) double h11 = sigma; double h22 = sigma; double h21 = 0.0; double g1 = 0.0; double g2 = 0.0; - for(int i = 0; i < ids.size(); i++) { - final double val = scores.get(ids.get(i)); + iter.seek(0); + for (int i = 0; i < ids.size(); i++, iter.advance()) { + final double val = scores.get(iter); final double fApB = val * a + b; final double p; final double q; - if(fApB >= 0) { + if (fApB >= 0) { p = Math.exp(-fApB) / (1.0 + Math.exp(-fApB)); q = 1.0 / (1.0 + Math.exp(-fApB)); - } - else { + } else { p = 1.0 / (1.0 + Math.exp(fApB)); q = Math.exp(fApB) / (1.0 + Math.exp(fApB)); } @@ -191,7 +255,7 @@ public class SigmoidOutlierScalingFunction implements OutlierScalingFunction { g2 += d1; } // Stop condition - if(Math.abs(g1) < 1e-5 && Math.abs(g2) < 1e-5) { + if (Math.abs(g1) < 1e-5 && Math.abs(g2) < 1e-5) { break; } // Compute modified Newton directions @@ -200,36 +264,152 @@ public class SigmoidOutlierScalingFunction implements OutlierScalingFunction { final double dB = -(-h21 * g1 + h11 * g2) / det; final double gd = g1 * dA + g2 * dB; double stepsize = 1.0; - while(stepsize >= minstep) { // Line search + while (stepsize >= minstep) { // Line search final double newA = a + stepsize * dA; final double newB = b + stepsize * dB; double newf = 0.0; - for(int i = 0; i < ids.size(); i++) { - final double val = scores.get(ids.get(i)); + iter.seek(0); + for (int i = 0; i < ids.size(); i++, iter.advance()) { + final double val = scores.get(iter); final double fApB = val * newA + newB; final double ti = t.get(i) ? hiTarget : loTarget; - if(fApB >= 0) { + if (fApB >= 0) { newf += ti * fApB + Math.log(1 + Math.exp(-fApB)); - } - else { + } else { newf += (ti - 1) * fApB + Math.log(1 + Math.exp(fApB)); } } - if(newf < fval + 0.0001 * stepsize * gd) { + if (newf < fval + 0.0001 * stepsize * gd) { a = newA; b = newB; fval = newf; break; // Sufficient decrease satisfied + } else { + stepsize /= 2.0; + } + if (stepsize < minstep) { + LOG.debug("Minstep hit."); + break; + } + } + if (it + 1 >= maxiter) { + LOG.debug("Maximum iterations hit."); + break; + } + } + return new double[] { a, b }; + } + + /** + * M-Step using a modified Levenberg-Marquardt method. + * + * <p> + * Implementation based on:<br /> + * H.-T. Lin, C.-J. Lin, R. C. Weng:<br /> + * A Note on Platt’s Probabilistic Outputs for Support Vector Machines + * </p> + * + * @param a A parameter + * @param b B parameter + * @param t Bitset containing the assignment + * @param array Score array + * @param adapter Array adapter + * @return new values for A and B. + */ + private final <A> double[] MStepLevenbergMarquardt(double a, double b, BitSet t, A array, NumberArrayAdapter<?, A> adapter) { + final int size = adapter.size(array); + final int prior1 = t.cardinality(); + final int prior0 = size - prior1; + + final int maxiter = 10; + final double minstep = 1e-8; + final double sigma = 1e-12; + // target value for "set" objects + final double loTarget = (prior1 + 1.0) / (prior1 + 2.0); + // target value for "unset" objects + final double hiTarget = 1.0 / (prior0 + 2.0); + // t[i] := t.get(i) ? hiTarget : loTarget. + + // Reset, or continue with previous values? + // a = 0.0; + // b = Math.log((prior0 + 1.0) / (prior1 + 1.0)); + double fval = 0.0; + for (int i = 0; i < size; i++) { + final double val = adapter.getDouble(array, i); + final double fApB = val * a + b; + final double ti = t.get(i) ? hiTarget : loTarget; + if (fApB >= 0) { + fval += ti * fApB + Math.log(1 + Math.exp(-fApB)); + } else { + fval += (ti - 1) * fApB + Math.log(1 + Math.exp(fApB)); + } + } + for (int it = 0; it < maxiter; it++) { + // logger.debugFinest("Iter: " + it + "a: " + a + " b: " + b); + // Update Gradient and Hessian (use H’ = H + sigma I) + double h11 = sigma; + double h22 = sigma; + double h21 = 0.0; + double g1 = 0.0; + double g2 = 0.0; + for (int i = 0; i < size; i++) { + final double val = adapter.getDouble(array, i); + final double fApB = val * a + b; + final double p; + final double q; + if (fApB >= 0) { + p = Math.exp(-fApB) / (1.0 + Math.exp(-fApB)); + q = 1.0 / (1.0 + Math.exp(-fApB)); + } else { + p = 1.0 / (1.0 + Math.exp(fApB)); + q = Math.exp(fApB) / (1.0 + Math.exp(fApB)); + } + final double d2 = p * q; + h11 += val * val * d2; + h22 += d2; + h21 += val * d2; + final double d1 = (t.get(i) ? hiTarget : loTarget) - p; + g1 += val * d1; + g2 += d1; + } + // Stop condition + if (Math.abs(g1) < 1e-5 && Math.abs(g2) < 1e-5) { + break; + } + // Compute modified Newton directions + final double det = h11 * h22 - h21 * h21; + final double dA = -(h22 * g1 - h21 * g2) / det; + final double dB = -(-h21 * g1 + h11 * g2) / det; + final double gd = g1 * dA + g2 * dB; + double stepsize = 1.0; + while (stepsize >= minstep) { // Line search + final double newA = a + stepsize * dA; + final double newB = b + stepsize * dB; + double newf = 0.0; + for (int i = 0; i < size; i++) { + final double val = adapter.getDouble(array, i); + final double fApB = val * newA + newB; + final double ti = t.get(i) ? hiTarget : loTarget; + if (fApB >= 0) { + newf += ti * fApB + Math.log(1 + Math.exp(-fApB)); + } else { + newf += (ti - 1) * fApB + Math.log(1 + Math.exp(fApB)); + } } - else { + if (newf < fval + 0.0001 * stepsize * gd) { + a = newA; + b = newB; + fval = newf; + break; // Sufficient decrease satisfied + } else { stepsize /= 2.0; } - if(stepsize < minstep) { + if (stepsize < minstep) { LOG.debug("Minstep hit."); break; } } - if(it + 1 >= maxiter) { + if (it + 1 >= maxiter) { LOG.debug("Maximum iterations hit."); break; } @@ -251,4 +431,4 @@ public class SigmoidOutlierScalingFunction implements OutlierScalingFunction { public double getScaled(double value) { return 1.0 / (1 + Math.exp(-Afinal * value - Bfinal)); } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/SqrtStandardDeviationScaling.java b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/SqrtStandardDeviationScaling.java index 6c38c781..2110570e 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/SqrtStandardDeviationScaling.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/SqrtStandardDeviationScaling.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.scaling.outlier; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -25,11 +25,12 @@ package de.lmu.ifi.dbs.elki.utilities.scaling.outlier; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.relation.Relation; -import de.lmu.ifi.dbs.elki.math.DoubleMinMax; import de.lmu.ifi.dbs.elki.math.MathUtil; -import de.lmu.ifi.dbs.elki.math.MeanVariance; +import de.lmu.ifi.dbs.elki.math.Mean; +import de.lmu.ifi.dbs.elki.math.MeanVarianceMinMax; import de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; @@ -53,71 +54,42 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; @Reference(authors = "H.-P. Kriegel, P. Kröger, E. Schubert, A. Zimek", title = "Interpreting and Unifying Outlier Scores", booktitle = "Proc. 11th SIAM International Conference on Data Mining (SDM), Mesa, AZ, 2011", url = "http://siam.omnibooksonline.com/2011datamining/data/papers/018.pdf") public class SqrtStandardDeviationScaling implements OutlierScalingFunction { /** - * Parameter to specify the fixed minimum to use. - * <p> - * Key: {@code -sqrtstddevscale.min} - * </p> + * Effective parameters. */ - public static final OptionID MIN_ID = new OptionID("sqrtstddevscale.min", "Fixed minimum to use in sqrt scaling."); + double min, mean, factor; /** - * Parameter to specify a fixed mean to use. - * <p> - * Key: {@code -sqrtstddevscale.mean} - * </p> + * Predefined parameters. */ - public static final OptionID MEAN_ID = new OptionID("sqrtstddevscale.mean", "Fixed mean to use in standard deviation scaling."); + Double pmin = null, pmean = null; /** - * Parameter to specify the lambda value - * <p> - * Key: {@code -sqrtstddevscale.lambda} - * </p> + * Predefined lambda scaling factor. */ - public static final OptionID LAMBDA_ID = new OptionID("sqrtstddevscale.lambda", "Significance level to use for error function."); - - /** - * Field storing the lambda value - */ - protected Double lambda = null; - - /** - * Min to use - */ - Double min = null; - - /** - * Mean to use - */ - Double mean = null; - - /** - * Scaling factor to use (usually: Lambda * Stddev * Sqrt(2)) - */ - double factor; + double plambda; /** * Constructor. * - * @param min - * @param mean - * @param lambda + * @param pmin Predefined minimum + * @param pmean Predefined mean + * @param plambda Lambda parameter */ - public SqrtStandardDeviationScaling(Double min, Double mean, Double lambda) { + public SqrtStandardDeviationScaling(Double pmin, Double pmean, double plambda) { super(); - this.min = min; - this.mean = mean; - this.lambda = lambda; + this.pmin = pmin; + this.pmean = pmean; + this.plambda = plambda; } @Override public double getScaled(double value) { assert (factor != 0) : "prepare() was not run prior to using the scaling function."; - if(value <= min) { + if (value <= min) { return 0; } value = (value <= min) ? 0 : Math.sqrt(value - min); - if(value <= mean) { + if (value <= mean) { return 0; } return Math.max(0, NormalDistribution.erf((value - mean) / factor)); @@ -125,39 +97,61 @@ public class SqrtStandardDeviationScaling implements OutlierScalingFunction { @Override public void prepare(OutlierResult or) { - if(min == null) { - DoubleMinMax mm = new DoubleMinMax(); - Relation<Double> scores = or.getScores(); - for(DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) { - double val = scores.get(id); - if(!Double.isNaN(val) && !Double.isInfinite(val)) { - mm.put(val); - } - } - min = mm.getMin(); - } - if(mean == null) { - MeanVariance mv = new MeanVariance(); + if (pmean == null) { + MeanVarianceMinMax mv = new MeanVarianceMinMax(); Relation<Double> scores = or.getScores(); - for(DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) { + for (DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) { double val = scores.get(id); val = (val <= min) ? 0 : Math.sqrt(val - min); mv.put(val); } + min = (pmin == null) ? mv.getMin() : pmin; mean = mv.getMean(); - factor = lambda * mv.getSampleStddev() * MathUtil.SQRT2; - } - else { + factor = plambda * mv.getSampleStddev() * MathUtil.SQRT2; + } else { + mean = pmean; double sqsum = 0; int cnt = 0; Relation<Double> scores = or.getScores(); - for(DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) { + double mm = Double.POSITIVE_INFINITY; + for (DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) { double val = scores.get(id); + mm = Math.min(mm, val); val = (val <= min) ? 0 : Math.sqrt(val - min); sqsum += (val - mean) * (val - mean); cnt += 1; } - factor = lambda * Math.sqrt(sqsum / cnt) * MathUtil.SQRT2; + min = (pmin == null) ? mm : pmin; + factor = plambda * Math.sqrt(sqsum / cnt) * MathUtil.SQRT2; + } + } + + @Override + public <A> void prepare(A array, NumberArrayAdapter<?, A> adapter) { + if (pmean == null) { + MeanVarianceMinMax mv = new MeanVarianceMinMax(); + final int size = adapter.size(array); + for (int i = 0; i < size; i++) { + double val = adapter.getDouble(array, i); + val = (val <= min) ? 0 : Math.sqrt(val - min); + mv.put(val); + } + min = (pmin == null) ? mv.getMin() : pmin; + mean = mv.getMean(); + factor = plambda * mv.getSampleStddev() * MathUtil.SQRT2; + } else { + mean = pmean; + Mean sqsum = new Mean(); + double mm = Double.POSITIVE_INFINITY; + final int size = adapter.size(array); + for (int i = 0; i < size; i++) { + double val = adapter.getDouble(array, i); + mm = Math.min(mm, val); + val = (val <= min) ? 0 : Math.sqrt(val - min); + sqsum.put((val - mean) * (val - mean)); + } + min = (pmin == null) ? mm : pmin; + factor = plambda * Math.sqrt(sqsum.getMean()) * MathUtil.SQRT2; } } @@ -179,30 +173,54 @@ public class SqrtStandardDeviationScaling implements OutlierScalingFunction { * @apiviz.exclude */ public static class Parameterizer extends AbstractParameterizer { + /** + * Parameter to specify the fixed minimum to use. + * <p> + * Key: {@code -sqrtstddevscale.min} + * </p> + */ + public static final OptionID MIN_ID = new OptionID("sqrtstddevscale.min", "Fixed minimum to use in sqrt scaling."); + + /** + * Parameter to specify a fixed mean to use. + * <p> + * Key: {@code -sqrtstddevscale.mean} + * </p> + */ + public static final OptionID MEAN_ID = new OptionID("sqrtstddevscale.mean", "Fixed mean to use in standard deviation scaling."); + + /** + * Parameter to specify the lambda value + * <p> + * Key: {@code -sqrtstddevscale.lambda} + * </p> + */ + public static final OptionID LAMBDA_ID = new OptionID("sqrtstddevscale.lambda", "Significance level to use for error function."); + protected Double min = null; protected Double mean = null; - protected Double lambda = null; + protected double lambda; @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); DoubleParameter minP = new DoubleParameter(MIN_ID); minP.setOptional(true); - if(config.grab(minP)) { - min = minP.getValue(); + if (config.grab(minP)) { + min = minP.doubleValue(); } DoubleParameter meanP = new DoubleParameter(MEAN_ID); meanP.setOptional(true); - if(config.grab(meanP)) { - mean = meanP.getValue(); + if (config.grab(meanP)) { + mean = meanP.doubleValue(); } DoubleParameter lambdaP = new DoubleParameter(LAMBDA_ID, 3.0); - if(config.grab(lambdaP)) { - lambda = lambdaP.getValue(); + if (config.grab(lambdaP)) { + lambda = lambdaP.doubleValue(); } } @@ -211,4 +229,4 @@ public class SqrtStandardDeviationScaling implements OutlierScalingFunction { return new SqrtStandardDeviationScaling(min, mean, lambda); } } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/StandardDeviationScaling.java b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/StandardDeviationScaling.java index c8ec0905..b0040571 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/StandardDeviationScaling.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/StandardDeviationScaling.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.scaling.outlier; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -30,6 +30,7 @@ import de.lmu.ifi.dbs.elki.math.Mean; import de.lmu.ifi.dbs.elki.math.MeanVariance; import de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; @@ -45,27 +46,18 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; * Where mean can be fixed to a given value, and stddev is then computed against * this mean. * + * Reference: + * <p> + * H.-P. Kriegel, P. Kröger, E. Schubert, A. Zimek<br /> + * Interpreting and Unifying Outlier Scores<br /> + * Proc. 11th SIAM International Conference on Data Mining (SDM), Mesa, AZ, 2011 + * </p> + * * @author Erich Schubert */ -@Reference(authors="H.-P. Kriegel, P. Kröger, E. Schubert, A. Zimek", title="Interpreting and Unifying Outlier Scores", booktitle="Proc. 11th SIAM International Conference on Data Mining (SDM), Mesa, AZ, 2011", url="http://siam.omnibooksonline.com/2011datamining/data/papers/018.pdf") +@Reference(authors = "H.-P. Kriegel, P. Kröger, E. Schubert, A. Zimek", title = "Interpreting and Unifying Outlier Scores", booktitle = "Proc. 11th SIAM International Conference on Data Mining (SDM), Mesa, AZ, 2011", url = "http://siam.omnibooksonline.com/2011datamining/data/papers/018.pdf") public class StandardDeviationScaling implements OutlierScalingFunction { /** - * Parameter to specify a fixed mean to use. - * <p> - * Key: {@code -stddevscale.mean} - * </p> - */ - public static final OptionID MEAN_ID = new OptionID("stddevscale.mean", "Fixed mean to use in standard deviation scaling."); - - /** - * Parameter to specify the lambda value - * <p> - * Key: {@code -stddevscale.lambda} - * </p> - */ - public static final OptionID LAMBDA_ID = new OptionID("stddevscale.lambda", "Significance level to use for error function."); - - /** * Field storing the fixed mean to use */ protected Double fixedmean = null; @@ -73,7 +65,7 @@ public class StandardDeviationScaling implements OutlierScalingFunction { /** * Field storing the lambda value */ - protected Double lambda = null; + protected double lambda; /** * Mean to use @@ -88,10 +80,10 @@ public class StandardDeviationScaling implements OutlierScalingFunction { /** * Constructor. * - * @param fixedmean - * @param lambda + * @param fixedmean Fixed mean + * @param lambda Scaling factor lambda */ - public StandardDeviationScaling(Double fixedmean, Double lambda) { + public StandardDeviationScaling(Double fixedmean, double lambda) { super(); this.fixedmean = fixedmean; this.lambda = lambda; @@ -107,7 +99,7 @@ public class StandardDeviationScaling implements OutlierScalingFunction { @Override public double getScaled(double value) { assert (factor != 0) : "prepare() was not run prior to using the scaling function."; - if(value <= mean) { + if (value <= mean) { return 0; } return Math.max(0, NormalDistribution.erf((value - mean) / factor)); @@ -115,12 +107,12 @@ public class StandardDeviationScaling implements OutlierScalingFunction { @Override public void prepare(OutlierResult or) { - if(fixedmean == null) { + if (fixedmean == null) { MeanVariance mv = new MeanVariance(); Relation<Double> scores = or.getScores(); - for(DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) { + for (DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) { double val = scores.get(id); - if(!Double.isNaN(val) && !Double.isInfinite(val)) { + if (!Double.isNaN(val) && !Double.isInfinite(val)) { mv.put(val); } } @@ -129,14 +121,46 @@ public class StandardDeviationScaling implements OutlierScalingFunction { if (factor == 0.0) { factor = Double.MIN_NORMAL; } - } - else { + } else { mean = fixedmean; Mean sqsum = new Mean(); Relation<Double> scores = or.getScores(); - for(DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) { + for (DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) { double val = scores.get(id); - if(!Double.isNaN(val) && !Double.isInfinite(val)) { + if (!Double.isNaN(val) && !Double.isInfinite(val)) { + sqsum.put((val - mean) * (val - mean)); + } + } + factor = lambda * Math.sqrt(sqsum.getMean()) * MathUtil.SQRT2; + if (factor == 0.0) { + factor = Double.MIN_NORMAL; + } + } + } + + @Override + public <A> void prepare(A array, NumberArrayAdapter<?, A> adapter) { + if (fixedmean == null) { + MeanVariance mv = new MeanVariance(); + final int size = adapter.size(array); + for (int i = 0; i < size; i++) { + double val = adapter.getDouble(array, i); + if (!Double.isInfinite(val)) { + mv.put(val); + } + } + mean = mv.getMean(); + factor = lambda * mv.getSampleStddev() * MathUtil.SQRT2; + if (factor == 0.0) { + factor = Double.MIN_NORMAL; + } + } else { + mean = fixedmean; + Mean sqsum = new Mean(); + final int size = adapter.size(array); + for (int i = 0; i < size; i++) { + double val = adapter.getDouble(array, i); + if (!Double.isInfinite(val)) { sqsum.put((val - mean) * (val - mean)); } } @@ -165,20 +189,36 @@ public class StandardDeviationScaling implements OutlierScalingFunction { * @apiviz.exclude */ public static class Parameterizer extends AbstractParameterizer { + /** + * Parameter to specify a fixed mean to use. + * <p> + * Key: {@code -stddevscale.mean} + * </p> + */ + public static final OptionID MEAN_ID = new OptionID("stddevscale.mean", "Fixed mean to use in standard deviation scaling."); + + /** + * Parameter to specify the lambda value + * <p> + * Key: {@code -stddevscale.lambda} + * </p> + */ + public static final OptionID LAMBDA_ID = new OptionID("stddevscale.lambda", "Significance level to use for error function."); + protected Double fixedmean = null; - protected Double lambda = null; + protected double lambda; @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); DoubleParameter meanP = new DoubleParameter(MEAN_ID); meanP.setOptional(true); - if(config.grab(meanP)) { + if (config.grab(meanP)) { fixedmean = meanP.getValue(); } DoubleParameter lambdaP = new DoubleParameter(LAMBDA_ID, 3.0); - if(config.grab(lambdaP)) { + if (config.grab(lambdaP)) { lambda = lambdaP.getValue(); } } @@ -188,4 +228,4 @@ public class StandardDeviationScaling implements OutlierScalingFunction { return new StandardDeviationScaling(fixedmean, lambda); } } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/TopKOutlierScaling.java b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/TopKOutlierScaling.java index f8609f3b..25103dbc 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/TopKOutlierScaling.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/TopKOutlierScaling.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.scaling.outlier; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -26,6 +26,9 @@ package de.lmu.ifi.dbs.elki.utilities.scaling.outlier; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.logging.LoggingUtil; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.utilities.datastructures.QuickSelect; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayLikeUtil; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint; @@ -94,29 +97,44 @@ public class TopKOutlierScaling implements OutlierScalingFunction { @Override public void prepare(OutlierResult or) { - if(k <= 0) { + if (k <= 0) { LoggingUtil.warning("No k configured for Top-k outlier scaling!"); } DBIDIter order = or.getOrdering().iter(or.getOrdering().getDBIDs()).iter(); - for(int i = 0; i < k && order.valid(); i++, order.advance()) { + for (int i = 0; i < k && order.valid(); i++, order.advance()) { cutoff = or.getScores().get(order); } max = or.getOutlierMeta().getActualMaximum(); ground = or.getOutlierMeta().getTheoreticalBaseline(); - if(Double.isInfinite(ground) || Double.isNaN(ground)) { + if (Double.isInfinite(ground) || Double.isNaN(ground)) { ground = or.getOutlierMeta().getTheoreticalMinimum(); } - if(Double.isInfinite(ground) || Double.isNaN(ground)) { + if (Double.isInfinite(ground) || Double.isNaN(ground)) { ground = or.getOutlierMeta().getActualMinimum(); } - if(Double.isInfinite(ground) || Double.isNaN(ground)) { + if (Double.isInfinite(ground) || Double.isNaN(ground)) { ground = Math.min(0.0, cutoff); } } @Override + public <A> void prepare(A array, NumberArrayAdapter<?, A> adapter) { + if (k <= 0) { + LoggingUtil.warning("No k configured for Top-k outlier scaling!"); + } + double[] scores = ArrayLikeUtil.toPrimitiveDoubleArray(array, adapter); + QuickSelect.quickSelect(scores, k); + cutoff = scores[k - 1]; + max = Double.NEGATIVE_INFINITY; + for (double v : scores) { + max = Math.max(max, v); + } + ground = Math.min(0.0, cutoff); + } + + @Override public double getMax() { - if(binary) { + if (binary) { return 1.0; } return max; @@ -124,7 +142,7 @@ public class TopKOutlierScaling implements OutlierScalingFunction { @Override public double getMin() { - if(binary) { + if (binary) { return 0.0; } return ground; @@ -132,19 +150,16 @@ public class TopKOutlierScaling implements OutlierScalingFunction { @Override public double getScaled(double value) { - if(binary) { - if(value >= cutoff) { + if (binary) { + if (value >= cutoff) { return 1; - } - else { + } else { return 0; } - } - else { - if(value >= cutoff) { + } else { + if (value >= cutoff) { return (value - ground) / (max - ground); - } - else { + } else { return 0.0; } } @@ -167,12 +182,12 @@ public class TopKOutlierScaling implements OutlierScalingFunction { super.makeOptions(config); IntParameter kP = new IntParameter(K_ID); kP.addConstraint(new GreaterConstraint(1)); - if(config.grab(kP)) { + if (config.grab(kP)) { k = kP.intValue(); } Flag binaryF = new Flag(BINARY_ID); - if(config.grab(binaryF)) { + if (config.grab(binaryF)) { binary = binaryF.isTrue(); } } @@ -182,4 +197,4 @@ public class TopKOutlierScaling implements OutlierScalingFunction { return new TopKOutlierScaling(k, binary); } } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/package-info.java b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/package-info.java index c2adba6b..a34aac08 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/package-info.java @@ -5,7 +5,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2012 +Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/scaling/package-info.java b/src/de/lmu/ifi/dbs/elki/utilities/scaling/package-info.java index 430f1bf7..633aa65a 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/scaling/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/scaling/package-info.java @@ -5,7 +5,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2012 +Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/xml/DOMCloner.java b/src/de/lmu/ifi/dbs/elki/utilities/xml/DOMCloner.java index dc3b48a8..44116dfe 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/xml/DOMCloner.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/xml/DOMCloner.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.xml; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/xml/HTMLUtil.java b/src/de/lmu/ifi/dbs/elki/utilities/xml/HTMLUtil.java index 6c65c7f9..a68631d5 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/xml/HTMLUtil.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/xml/HTMLUtil.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.xml; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/xml/XMLNodeIterator.java b/src/de/lmu/ifi/dbs/elki/utilities/xml/XMLNodeIterator.java index a5b7cfa6..c1decfe1 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/xml/XMLNodeIterator.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/xml/XMLNodeIterator.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.xml; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/xml/XMLNodeListIterator.java b/src/de/lmu/ifi/dbs/elki/utilities/xml/XMLNodeListIterator.java index b413d1df..1fb39663 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/xml/XMLNodeListIterator.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/xml/XMLNodeListIterator.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.xml; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/utilities/xml/package-info.java b/src/de/lmu/ifi/dbs/elki/utilities/xml/package-info.java index 9b961687..8f6d8a04 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/xml/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/xml/package-info.java @@ -6,7 +6,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2012 +Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team |