diff options
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies')
32 files changed, 3185 insertions, 0 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/bulk/AbstractBulkSplit.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/bulk/AbstractBulkSplit.java new file mode 100644 index 00000000..4a0304f1 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/bulk/AbstractBulkSplit.java @@ -0,0 +1,91 @@ +package de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.strategies.bulk; + +import java.util.ArrayList; +import java.util.List; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +/** + * Encapsulates the required parameters for a bulk split of a spatial index. + * + * @author Elke Achtert + */ +public abstract class AbstractBulkSplit implements BulkSplit { + /** + * Constructor + */ + public AbstractBulkSplit() { + // Nothing to do + } + + /** + * Computes and returns the best split point. + * + * @param numEntries the number of entries to be split + * @param minEntries the number of minimum entries in the node to be split + * @param maxEntries number of maximum entries in the node to be split + * @return the best split point + */ + protected int chooseBulkSplitPoint(int numEntries, int minEntries, int maxEntries) { + if(numEntries < minEntries) { + throw new IllegalArgumentException("numEntries < minEntries!"); + } + + if(numEntries <= maxEntries) { + return numEntries; + } + else if(numEntries < maxEntries + minEntries) { + return (numEntries - minEntries); + } + else { + return maxEntries; + } + } + + /** + * Perform the trivial partitioning of the given list. + * + * @param objects Objects to partition + * @param minEntries Minimum number of objects per page + * @param maxEntries Maximum number of objects per page. + * @return List with partitions + */ + protected <T> List<List<T>> trivialPartition(List<T> objects, int minEntries, int maxEntries) { + // build partitions + final int size = objects.size(); + final int numberPartitions = (int) Math.ceil(((double) size) / maxEntries); + List<List<T>> partitions = new ArrayList<List<T>>(numberPartitions); + int start = 0; + for(int pnum = 0; pnum < numberPartitions; pnum++) { + int end = (int) ((pnum + 1.) * size / numberPartitions); + if(pnum == numberPartitions - 1) { + end = size; + } + assert ((end - start) >= minEntries && (end - start) <= maxEntries); + partitions.add(objects.subList(start, end)); + start = end; + } + return partitions; + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/bulk/BulkSplit.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/bulk/BulkSplit.java new file mode 100644 index 00000000..1eb88f7c --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/bulk/BulkSplit.java @@ -0,0 +1,47 @@ +package de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.strategies.bulk; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.List; + +import de.lmu.ifi.dbs.elki.data.spatial.SpatialComparable; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.Parameterizable; + +/** + * Interface for a bulk split strategy. + * + * @author Erich Schubert + */ +public interface BulkSplit extends Parameterizable { + /** + * Partitions the specified feature vectors + * + * @param <T> actual type we split + * @param spatialObjects the spatial objects to be partitioned + * @param minEntries the minimum number of entries in a partition + * @param maxEntries the maximum number of entries in a partition + * @return the partition of the specified spatial objects + */ + public <T extends SpatialComparable> List<List<T>> partition(List<T> spatialObjects, int minEntries, int maxEntries); +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/bulk/FileOrderBulkSplit.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/bulk/FileOrderBulkSplit.java new file mode 100644 index 00000000..308c70cb --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/bulk/FileOrderBulkSplit.java @@ -0,0 +1,67 @@ +package de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.strategies.bulk; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import java.util.List; + +import de.lmu.ifi.dbs.elki.data.spatial.SpatialComparable; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Trivial bulk loading - assumes that the file has been appropriately sorted + * before. + * + * @author Erich Schubert + */ +public class FileOrderBulkSplit extends AbstractBulkSplit { + /** + * Static instance + */ + public static final FileOrderBulkSplit STATIC = new FileOrderBulkSplit(); + + /** + * Constructor. + */ + protected FileOrderBulkSplit() { + super(); + } + + @Override + public <T extends SpatialComparable> List<List<T>> partition(List<T> spatialObjects, int minEntries, int maxEntries) { + return trivialPartition(spatialObjects, minEntries, maxEntries); + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected FileOrderBulkSplit makeInstance() { + return FileOrderBulkSplit.STATIC; + } + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/bulk/MaxExtensionBulkSplit.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/bulk/MaxExtensionBulkSplit.java new file mode 100644 index 00000000..90a8b622 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/bulk/MaxExtensionBulkSplit.java @@ -0,0 +1,169 @@ +package de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.strategies.bulk; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +import de.lmu.ifi.dbs.elki.data.spatial.SpatialComparable; +import de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.util.SpatialComparator; +import de.lmu.ifi.dbs.elki.logging.Logging; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Split strategy for bulk-loading a spatial tree where the split axes are the + * dimensions with maximum extension. + * + * @author Elke Achtert + * + * @apiviz.uses SpatialComparator + */ +public class MaxExtensionBulkSplit extends AbstractBulkSplit { + /** + * Logger. + */ + private static final Logging logger = Logging.getLogger(MaxExtensionBulkSplit.class); + + /** + * Static instance + */ + public static final MaxExtensionBulkSplit STATIC = new MaxExtensionBulkSplit(); + + /** + * Constructor + */ + public MaxExtensionBulkSplit() { + // Nothing to do + } + + /** + * Partitions the specified feature vectors where the split axes are the + * dimensions with maximum extension. + * + * @param spatialObjects the spatial objects to be partitioned + * @param minEntries the minimum number of entries in a partition + * @param maxEntries the maximum number of entries in a partition + * @return the partition of the specified spatial objects + */ + @Override + public <N extends SpatialComparable> List<List<N>> partition(List<N> spatialObjects, int minEntries, int maxEntries) { + List<List<N>> partitions = new ArrayList<List<N>>(); + List<N> objects = new ArrayList<N>(spatialObjects); + + while(objects.size() > 0) { + StringBuffer msg = new StringBuffer(); + + // get the split axis and split point + int splitAxis = chooseMaximalExtendedSplitAxis(objects); + int splitPoint = chooseBulkSplitPoint(objects.size(), minEntries, maxEntries); + if(logger.isDebugging()) { + msg.append("\nsplitAxis ").append(splitAxis); + msg.append("\nsplitPoint ").append(splitPoint); + } + + // sort in the right dimension + Collections.sort(objects, new SpatialComparator(splitAxis, SpatialComparator.MIN)); + + // insert into partition + List<N> partition1 = new ArrayList<N>(); + for(int i = 0; i < splitPoint; i++) { + N o = objects.remove(0); + partition1.add(o); + } + partitions.add(partition1); + + // copy array + if(logger.isDebugging()) { + msg.append("\ncurrent partition " + partition1); + msg.append("\nremaining objects # ").append(objects.size()); + logger.debugFine(msg.toString()); + } + } + + if(logger.isDebugging()) { + logger.debugFine("partitions " + partitions); + } + return partitions; + } + + /** + * Computes and returns the best split axis. The best split axis is the split + * axes with the maximal extension. + * + * @param objects the spatial objects to be split + * @return the best split axis + */ + private int chooseMaximalExtendedSplitAxis(List<? extends SpatialComparable> objects) { + // maximum and minimum value for the extension + int dimension = objects.get(0).getDimensionality(); + double[] maxExtension = new double[dimension]; + double[] minExtension = new double[dimension]; + Arrays.fill(minExtension, Double.MAX_VALUE); + + // compute min and max value in each dimension + for(SpatialComparable object : objects) { + for(int d = 1; d <= dimension; d++) { + double min, max; + min = object.getMin(d); + max = object.getMax(d); + + if(maxExtension[d - 1] < max) { + maxExtension[d - 1] = max; + } + + if(minExtension[d - 1] > min) { + minExtension[d - 1] = min; + } + } + } + + // set split axis to dim with maximal extension + int splitAxis = -1; + double max = 0; + for(int d = 1; d <= dimension; d++) { + double currentExtension = maxExtension[d - 1] - minExtension[d - 1]; + if(max < currentExtension) { + max = currentExtension; + splitAxis = d; + } + } + return splitAxis; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected MaxExtensionBulkSplit makeInstance() { + return MaxExtensionBulkSplit.STATIC; + } + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/bulk/OneDimSortBulkSplit.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/bulk/OneDimSortBulkSplit.java new file mode 100644 index 00000000..b99ae01e --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/bulk/OneDimSortBulkSplit.java @@ -0,0 +1,86 @@ +package de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.strategies.bulk; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import java.util.Collections; +import java.util.Comparator; +import java.util.List; + +import de.lmu.ifi.dbs.elki.data.spatial.SpatialComparable; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Simple bulk loading strategy by sorting the data along the first dimension. + * + * This is also known as Nearest-X, and attributed to: + * <p> + * Roussopoulos, N. and Leifker, D.:<br /> + * Direct spatial search on pictorial databases using packed R-trees<br /> + * In: ACM SIGMOD Record 14-4 + * </p> + * + * @author Erich Schubert + */ +@Reference(authors = "Roussopoulos, N. and Leifker, D.", title = "Direct spatial search on pictorial databases using packed R-trees", booktitle = "ACM SIGMOD Record 14-4", url = "http://dx.doi.org/10.1145/971699.318900") +public class OneDimSortBulkSplit extends AbstractBulkSplit { + /** + * Static instance + */ + public static final AbstractBulkSplit STATIC = new OneDimSortBulkSplit(); + + /** + * Constructor. + */ + protected OneDimSortBulkSplit() { + super(); + } + + @Override + public <T extends SpatialComparable> List<List<T>> partition(List<T> spatialObjects, int minEntries, int maxEntries) { + // Sort by first dimension + Collections.sort(spatialObjects, new Comparator<SpatialComparable>() { + @Override + public int compare(SpatialComparable o1, SpatialComparable o2) { + double min1 = (o1.getMax(1) + o1.getMin(1)) / 2; + double min2 = (o2.getMax(1) + o2.getMin(1)) / 2; + return Double.compare(min1, min2); + } + }); + return trivialPartition(spatialObjects, minEntries, maxEntries); + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected AbstractBulkSplit makeInstance() { + return OneDimSortBulkSplit.STATIC; + } + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/bulk/SortTileRecursiveBulkSplit.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/bulk/SortTileRecursiveBulkSplit.java new file mode 100644 index 00000000..28e96da6 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/bulk/SortTileRecursiveBulkSplit.java @@ -0,0 +1,115 @@ +package de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.strategies.bulk; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; + +import de.lmu.ifi.dbs.elki.data.spatial.SpatialComparable; +import de.lmu.ifi.dbs.elki.utilities.datastructures.QuickSelect; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; + +/** + * Sort-Tile-Recursive aims at tiling the data space with a grid-like structure + * for partitioning the dataset into the required number of buckets. + * + * Reference: + * <p> + * Leutenegger, S.T. and Lopez, M.A. and Edgington, J.:<br /> + * STR: A simple and efficient algorithm for R-tree packing<br /> + * In: Proc. 13th International Conference on Data Engineering, 1997 + * </p> + * + * @author Erich Schubert + */ +@Reference(authors = "Leutenegger, S.T. and Lopez, M.A. and Edgington, J.", title = "STR: A simple and efficient algorithm for R-tree packing", booktitle = "Proc. 13th International Conference on Data Engineering, 1997", url = "http://dx.doi.org/10.1109/ICDE.1997.582015") +public class SortTileRecursiveBulkSplit extends AbstractBulkSplit { + @Override + public <T extends SpatialComparable> List<List<T>> partition(List<T> spatialObjects, int minEntries, int maxEntries) { + final int dims = spatialObjects.get(0).getDimensionality(); + final int p = (int) Math.ceil(spatialObjects.size() / (double) maxEntries); + List<List<T>> ret = new ArrayList<List<T>>(p); + strPartition(spatialObjects, 0, spatialObjects.size(), 0, dims, maxEntries, new Compare<T>(), ret); + return ret; + } + + /** + * Recursively partition. + * + * @param objs Object list + * @param start Subinterval start + * @param end Subinteval end + * @param depth Iteration depth (must be less than dimensionality!) + * @param dims Total number of dimensions + * @param maxEntries Maximum page size + * @param c Comparison helper + * @param ret Output list + */ + protected <T extends SpatialComparable> void strPartition(List<T> objs, int start, int end, int depth, int dims, int maxEntries, Compare<T> c, List<List<T>> ret) { + c.dim = depth + 1; + final int p = (int) Math.ceil((end - start) / (double) maxEntries); + final int s = (int) Math.ceil(Math.pow(p, 1.0 / (dims - depth))); + + final double len = end - start; // double intentional! + for(int i = 0; i < s; i++) { + // We don't completely sort, but only ensure the quantile is invariant. + int s2 = start + (int) ((i * len) / s); + int e2 = start + (int) (((i + 1) * len) / s); + // LoggingUtil.warning("STR " + dim + " s2:" + s2 + " e2:" + e2); + if(e2 < end) { + QuickSelect.quickSelect(objs, c, s2, end, e2); + } + if(depth + 1 == dims) { + ret.add(objs.subList(s2, e2)); + } + else { + // Descend + strPartition(objs, s2, e2, depth + 1, dims, maxEntries, c, ret); + } + } + } + + /** + * Comparison helper. + * + * @apiviz.exclude + * + * @author Erich Schubert + * + * @param <T> Type + */ + private static class Compare<T extends SpatialComparable> implements Comparator<T> { + /** + * Current dimension + */ + public int dim; + + @Override + public int compare(T o1, T o2) { + final double v1 = o1.getMin(dim) + o1.getMax(dim); + final double v2 = o2.getMin(dim) + o2.getMax(dim); + return Double.compare(v1, v2); + } + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/bulk/SpatialSortBulkSplit.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/bulk/SpatialSortBulkSplit.java new file mode 100644 index 00000000..9c3a41a1 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/bulk/SpatialSortBulkSplit.java @@ -0,0 +1,107 @@ +package de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.strategies.bulk; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import java.util.List; + +import de.lmu.ifi.dbs.elki.data.spatial.SpatialComparable; +import de.lmu.ifi.dbs.elki.math.spacefillingcurves.SpatialSorter; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; + +/** + * Bulk loading by spatially sorting the objects, then partitioning the sorted + * list appropriately. + * + * Based conceptually on: + * <p> + * On packing R-trees<br/> + * Kamel, I. and Faloutsos, C.<br/> + * Proc. 2of the second international conference on Information and knowledge + * management + * </p> + * + * @apiviz.composedOf SpatialSorter + * + * @author Erich Schubert + */ +@Reference(title = "On packing R-trees", authors = "Kamel, I. and Faloutsos, C.", booktitle = "Proc. 2of the second international conference on Information and knowledge management", url = "http://dx.doi.org/10.1145/170088.170403") +public class SpatialSortBulkSplit extends AbstractBulkSplit { + /** + * Sorting class + */ + final SpatialSorter sorter; + + /** + * Constructor. + * + * @param sorter Sorting strategy + */ + protected SpatialSortBulkSplit(SpatialSorter sorter) { + super(); + this.sorter = sorter; + } + + @Override + public <T extends SpatialComparable> List<List<T>> partition(List<T> spatialObjects, int minEntries, int maxEntries) { + sorter.sort(spatialObjects); + return super.trivialPartition(spatialObjects, minEntries, maxEntries); + } + + /** + * Parametization class + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + /** + * Option ID for spatial sorting + */ + public static final OptionID SORTER_ID = OptionID.getOrCreateOptionID("rtree.bulk.spatial-sort", "Strategy for spatial sorting in bulk loading."); + + /** + * Sorting class + */ + SpatialSorter sorter; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + + ObjectParameter<SpatialSorter> sorterP = new ObjectParameter<SpatialSorter>(SORTER_ID, SpatialSorter.class); + if(config.grab(sorterP)) { + sorter = sorterP.instantiateClass(config); + } + } + + @Override + protected SpatialSortBulkSplit makeInstance() { + return new SpatialSortBulkSplit(sorter); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/bulk/package-info.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/bulk/package-info.java new file mode 100644 index 00000000..0d01ba83 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/bulk/package-info.java @@ -0,0 +1,26 @@ +/** + * <p>Packages for bulk-loading R*-Trees.</p> + */ +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2012 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ +package de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.strategies.bulk;
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/insert/ApproximativeLeastOverlapInsertionStrategy.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/insert/ApproximativeLeastOverlapInsertionStrategy.java new file mode 100644 index 00000000..418e92c5 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/insert/ApproximativeLeastOverlapInsertionStrategy.java @@ -0,0 +1,168 @@ +package de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.strategies.insert; +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Collections; + +import de.lmu.ifi.dbs.elki.data.HyperBoundingBox; +import de.lmu.ifi.dbs.elki.data.spatial.SpatialComparable; +import de.lmu.ifi.dbs.elki.data.spatial.SpatialUtil; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayAdapter; +import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TopBoundedHeap; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; +import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleIntPair; + +/** + * The choose subtree method proposed by the R*-Tree with slightly better + * performance for large leaf sizes (linear approximation). + * + * <p> + * N. Beckmann, H.-P. Kriegel, R. Schneider, B. Seeger:<br /> + * The R*-tree: an efficient and robust access method for points and rectangles<br /> + * in: Proceedings of the 1990 ACM SIGMOD International Conference on Management + * of Data, Atlantic City, NJ, May 23-25, 1990 + * </p> + * + * @author Erich Schubert + * @author Franz Graf + * @author Marisa Petri + */ +@Reference(authors = "N. Beckmann, H.-P. Kriegel, R. Schneider, B. Seeger", title = "The R*-tree: an efficient and robust access method for points and rectangles", booktitle = "Proceedings of the 1990 ACM SIGMOD International Conference on Management of Data, Atlantic City, NJ, May 23-25, 1990", url = "http://dx.doi.org/10.1145/93597.98741") +public class ApproximativeLeastOverlapInsertionStrategy extends LeastOverlapInsertionStrategy { + /** + * Number of candidates to consider + */ + private int numCandidates = 32; + + /** + * Constructor. + */ + public ApproximativeLeastOverlapInsertionStrategy(int candidates) { + super(); + this.numCandidates = candidates; + } + + @Override + public <A> int choose(A options, ArrayAdapter<? extends SpatialComparable, A> getter, SpatialComparable obj, int height, int depth) { + final int size = getter.size(options); + assert (size > 0) : "Choose from empty set?"; + if(size <= numCandidates) { + // Skip building the heap. + return super.choose(options, getter, obj, height, depth); + } + + // Heap of candidates + TopBoundedHeap<DoubleIntPair> candidates = new TopBoundedHeap<DoubleIntPair>(numCandidates, Collections.reverseOrder()); + for(int i = 0; i < size; i++) { + // Existing object and extended rectangle: + SpatialComparable entry = getter.get(options, i); + HyperBoundingBox mbr = SpatialUtil.union(entry, obj); + // Area increase + final double inc_area = SpatialUtil.volume(mbr) - SpatialUtil.volume(entry); + candidates.add(new DoubleIntPair(inc_area, i)); + } + + // R*-Tree: overlap increase for leaves. + int best = -1; + double least_overlap = Double.POSITIVE_INFINITY; + double least_areainc = Double.POSITIVE_INFINITY; + double least_area = Double.POSITIVE_INFINITY; + // least overlap increase, on reduced candidate set: + while(!candidates.isEmpty()) { + DoubleIntPair pair = candidates.poll(); + final double inc_area = pair.first; + + // Existing object and extended rectangle: + SpatialComparable entry = getter.get(options, pair.second); + HyperBoundingBox mbr = SpatialUtil.union(entry, obj); + // Compute relative overlap increase. + double overlap_wout = 0.0; + double overlap_with = 0.0; + for(int k = 0; k < size; k++) { + if(pair.second != k) { + SpatialComparable other = getter.get(options, k); + overlap_wout += SpatialUtil.relativeOverlap(entry, other); + overlap_with += SpatialUtil.relativeOverlap(mbr, other); + } + } + double inc_overlap = overlap_with - overlap_wout; + if(inc_overlap < least_overlap) { + final double area = SpatialUtil.volume(entry); + // Volume increase and overlap increase: + least_overlap = inc_overlap; + least_areainc = inc_area; + least_area = area; + best = pair.second; + } + else if(inc_overlap == least_overlap) { + final double area = SpatialUtil.volume(entry); + if(inc_area < least_areainc || (inc_area == least_areainc && area < least_area)) { + least_overlap = inc_overlap; + least_areainc = inc_area; + least_area = area; + best = pair.second; + } + } + } + assert (best > -1) : "No split found? Volume outside of double precision?"; + return best; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + /** + * Fast-insertion parameter. Optional. + */ + public static OptionID INSERTION_CANDIDATES_ID = OptionID.getOrCreateOptionID("rtree.insertion-candidates", "defines how many children are tested for finding the child generating the least overlap when inserting an object."); + + /** + * The number of candidates to use + */ + int numCandidates = 32; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + IntParameter insertionCandidatesP = new IntParameter(INSERTION_CANDIDATES_ID, new GreaterConstraint(0), numCandidates); + if(config.grab(insertionCandidatesP)) { + numCandidates = insertionCandidatesP.getValue(); + } + } + + @Override + protected ApproximativeLeastOverlapInsertionStrategy makeInstance() { + return new ApproximativeLeastOverlapInsertionStrategy(numCandidates); + } + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/insert/CombinedInsertionStrategy.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/insert/CombinedInsertionStrategy.java new file mode 100644 index 00000000..c90d99b2 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/insert/CombinedInsertionStrategy.java @@ -0,0 +1,127 @@ +package de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.strategies.insert; +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.data.spatial.SpatialComparable; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayAdapter; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ClassParameter; + +/** + * Use two different insertion strategies for directory and leaf nodes. + * + * Using two different strategies was likely first suggested in: + * <p> + * N. Beckmann, H.-P. Kriegel, R. Schneider, B. Seeger:<br /> + * The R*-tree: an efficient and robust access method for points and rectangles<br /> + * in: Proceedings of the 1990 ACM SIGMOD International Conference on Management + * of Data, Atlantic City, NJ, May 23-25, 1990 + * </p> + * + * @author Erich Schubert + */ +@Reference(authors = "N. Beckmann, H.-P. Kriegel, R. Schneider, B. Seeger", title = "The R*-tree: an efficient and robust access method for points and rectangles", booktitle = "Proceedings of the 1990 ACM SIGMOD International Conference on Management of Data, Atlantic City, NJ, May 23-25, 1990", url = "http://dx.doi.org/10.1145/93597.98741") +public class CombinedInsertionStrategy implements InsertionStrategy { + /** + * Strategy when inserting into directory nodes + */ + InsertionStrategy dirStrategy; + + /** + * Strategy when inserting into leaf nodes. + */ + InsertionStrategy leafStrategy; + + /** + * Constructor. + * + * @param dirStrategy Strategy for directory nodes + * @param leafStrategy Strategy for leaf nodes + */ + public CombinedInsertionStrategy(InsertionStrategy dirStrategy, InsertionStrategy leafStrategy) { + super(); + this.dirStrategy = dirStrategy; + this.leafStrategy = leafStrategy; + } + + @Override + public <A> int choose(A options, ArrayAdapter<? extends SpatialComparable, A> getter, SpatialComparable obj, int height, int depth) { + if(depth + 1 >= height) { + return leafStrategy.choose(options, getter, obj, height, depth); + } + else { + return dirStrategy.choose(options, getter, obj, height, depth); + } + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + /** + * Insertion strategy for directory nodes. + */ + public static final OptionID DIR_STRATEGY_ID = OptionID.getOrCreateOptionID("rtree.insert-directory", "Insertion strategy for directory nodes."); + + /** + * Insertion strategy for leaf nodes. + */ + public static final OptionID LEAF_STRATEGY_ID = OptionID.getOrCreateOptionID("rtree.insert-leaf", "Insertion strategy for leaf nodes."); + + /** + * Strategy when inserting into directory nodes + */ + InsertionStrategy dirStrategy; + + /** + * Strategy when inserting into leaf nodes. + */ + InsertionStrategy leafStrategy; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + ClassParameter<InsertionStrategy> dirP = new ClassParameter<InsertionStrategy>(DIR_STRATEGY_ID, InsertionStrategy.class, LeastEnlargementWithAreaInsertionStrategy.class); + if(config.grab(dirP)) { + dirStrategy = dirP.instantiateClass(config); + } + + ClassParameter<InsertionStrategy> leafP = new ClassParameter<InsertionStrategy>(LEAF_STRATEGY_ID, InsertionStrategy.class, LeastOverlapInsertionStrategy.class); + if(config.grab(leafP)) { + leafStrategy = leafP.instantiateClass(config); + } + } + + @Override + protected CombinedInsertionStrategy makeInstance() { + return new CombinedInsertionStrategy(dirStrategy, leafStrategy); + } + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/insert/InsertionStrategy.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/insert/InsertionStrategy.java new file mode 100644 index 00000000..96294514 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/insert/InsertionStrategy.java @@ -0,0 +1,46 @@ +package de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.strategies.insert; +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.data.spatial.SpatialComparable; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayAdapter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.Parameterizable; + +/** + * RTree insertion strategy interface. + * + * @author Erich Schubert + */ +public interface InsertionStrategy extends Parameterizable { + /** + * Choose insertion rectangle. + * + * @param options Options to choose from + * @param getter Array adapter for options + * @param obj Insertion object + * @param height Tree height + * @param depth Insertion depth (depth == height - 1 indicates leaf level) + * @return Subtree index in array. + */ + public <A> int choose(A options, ArrayAdapter<? extends SpatialComparable, A> getter, SpatialComparable obj, int height, int depth); +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/insert/LeastEnlargementInsertionStrategy.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/insert/LeastEnlargementInsertionStrategy.java new file mode 100644 index 00000000..eb211cb6 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/insert/LeastEnlargementInsertionStrategy.java @@ -0,0 +1,89 @@ +package de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.strategies.insert; +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.data.spatial.SpatialComparable; +import de.lmu.ifi.dbs.elki.data.spatial.SpatialUtil; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayAdapter; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * The default R-Tree insertion strategy: find rectangle with least volume + * enlargement. + * + * <p> + * Antonin Guttman:<br/> + * R-Trees: A Dynamic Index Structure For Spatial Searching<br /> + * in Proceedings of the 1984 ACM SIGMOD international conference on Management + * of data. + * </p> + * + * @author Erich Schubert + */ +@Reference(authors = "Antonin Guttman", title = "R-Trees: A Dynamic Index Structure For Spatial Searching", booktitle = "Proceedings of the 1984 ACM SIGMOD international conference on Management of data", url = "http://dx.doi.org/10.1145/971697.602266") +public class LeastEnlargementInsertionStrategy implements InsertionStrategy { + /** + * Static instance. + */ + public static final LeastEnlargementInsertionStrategy STATIC = new LeastEnlargementInsertionStrategy(); + + /** + * Constructor. + */ + public LeastEnlargementInsertionStrategy() { + super(); + } + + @Override + public <A> int choose(A options, ArrayAdapter<? extends SpatialComparable, A> getter, SpatialComparable obj, int height, int depth) { + final int size = getter.size(options); + assert (size > 0) : "Choose from empty set?"; + double leastEnlargement = Double.POSITIVE_INFINITY; + int best = -1; + for(int i = 0; i < size; i++) { + SpatialComparable entry = getter.get(options, i); + double enlargement = SpatialUtil.enlargement(entry, obj); + if(enlargement < leastEnlargement) { + leastEnlargement = enlargement; + best = i; + } + } + assert (best > -1); + return best; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected LeastEnlargementInsertionStrategy makeInstance() { + return LeastEnlargementInsertionStrategy.STATIC; + } + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/insert/LeastEnlargementWithAreaInsertionStrategy.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/insert/LeastEnlargementWithAreaInsertionStrategy.java new file mode 100644 index 00000000..977a132b --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/insert/LeastEnlargementWithAreaInsertionStrategy.java @@ -0,0 +1,102 @@ +package de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.strategies.insert; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.data.spatial.SpatialComparable; +import de.lmu.ifi.dbs.elki.data.spatial.SpatialUtil; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayAdapter; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * A slight modification of the default R-Tree insertion strategy: find + * rectangle with least volume enlargement, but choose least area on ties. + * + * Proposed for non-leaf entries in: + * <p> + * N. Beckmann, H.-P. Kriegel, R. Schneider, B. Seeger:<br /> + * The R*-tree: an efficient and robust access method for points and rectangles<br /> + * in: Proceedings of the 1990 ACM SIGMOD International Conference on Management + * of Data, Atlantic City, NJ, May 23-25, 1990 + * </p> + * + * @author Erich Schubert + */ +@Reference(authors = "N. Beckmann, H.-P. Kriegel, R. Schneider, B. Seeger", title = "The R*-tree: an efficient and robust access method for points and rectangles", booktitle = "Proceedings of the 1990 ACM SIGMOD International Conference on Management of Data, Atlantic City, NJ, May 23-25, 1990", url = "http://dx.doi.org/10.1145/93597.98741") +public class LeastEnlargementWithAreaInsertionStrategy implements InsertionStrategy { + /** + * Static instance. + */ + public static final LeastEnlargementWithAreaInsertionStrategy STATIC = new LeastEnlargementWithAreaInsertionStrategy(); + + /** + * Constructor. + */ + public LeastEnlargementWithAreaInsertionStrategy() { + super(); + } + + @Override + public <A> int choose(A options, ArrayAdapter<? extends SpatialComparable, A> getter, SpatialComparable obj, int height, int depth) { + final int size = getter.size(options); + assert (size > 0) : "Choose from empty set?"; + // As in R-Tree, with a slight modification for ties + double leastEnlargement = Double.POSITIVE_INFINITY; + double minArea = -1; + int best = -1; + for(int i = 0; i < size; i++) { + SpatialComparable entry = getter.get(options, i); + double enlargement = SpatialUtil.enlargement(entry, obj); + if(enlargement < leastEnlargement) { + leastEnlargement = enlargement; + best = i; + minArea = SpatialUtil.volume(entry); + } + else if(enlargement == leastEnlargement) { + final double area = SpatialUtil.volume(entry); + if(area < minArea) { + // Tie handling proposed by R*: + best = i; + minArea = area; + } + } + } + assert (best > -1); + return best; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected LeastEnlargementWithAreaInsertionStrategy makeInstance() { + return LeastEnlargementWithAreaInsertionStrategy.STATIC; + } + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/insert/LeastOverlapInsertionStrategy.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/insert/LeastOverlapInsertionStrategy.java new file mode 100644 index 00000000..2eba8912 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/insert/LeastOverlapInsertionStrategy.java @@ -0,0 +1,120 @@ +package de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.strategies.insert; +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.data.HyperBoundingBox; +import de.lmu.ifi.dbs.elki.data.spatial.SpatialComparable; +import de.lmu.ifi.dbs.elki.data.spatial.SpatialUtil; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayAdapter; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * The choose subtree method proposed by the R*-Tree for leaf nodes. + * + * <p> + * N. Beckmann, H.-P. Kriegel, R. Schneider, B. Seeger:<br /> + * The R*-tree: an efficient and robust access method for points and rectangles<br /> + * in: Proceedings of the 1990 ACM SIGMOD International Conference on Management + * of Data, Atlantic City, NJ, May 23-25, 1990 + * </p> + * + * @author Erich Schubert + */ +@Reference(authors = "N. Beckmann, H.-P. Kriegel, R. Schneider, B. Seeger", title = "The R*-tree: an efficient and robust access method for points and rectangles", booktitle = "Proceedings of the 1990 ACM SIGMOD International Conference on Management of Data, Atlantic City, NJ, May 23-25, 1990", url = "http://dx.doi.org/10.1145/93597.98741") +public class LeastOverlapInsertionStrategy implements InsertionStrategy { + /** + * Static instance. + */ + public static final LeastOverlapInsertionStrategy STATIC = new LeastOverlapInsertionStrategy(); + + /** + * Constructor. + */ + public LeastOverlapInsertionStrategy() { + super(); + } + + @Override + public <A> int choose(A options, ArrayAdapter<? extends SpatialComparable, A> getter, SpatialComparable obj, int height, int depth) { + final int size = getter.size(options); + assert (size > 0) : "Choose from empty set?"; + // R*-Tree: overlap increase for leaves. + int best = -1; + double least_overlap = Double.POSITIVE_INFINITY; + double least_areainc = Double.POSITIVE_INFINITY; + double least_area = Double.POSITIVE_INFINITY; + // least overlap increase, on reduced candidate set: + for(int i = 0; i < size; i++) { + // Existing object and extended rectangle: + SpatialComparable entry = getter.get(options, i); + HyperBoundingBox mbr = SpatialUtil.union(entry, obj); + // Compute relative overlap increase. + double overlap_wout = 0.0; + double overlap_with = 0.0; + for(int k = 0; k < size; k++) { + if(i != k) { + SpatialComparable other = getter.get(options, k); + overlap_wout += SpatialUtil.relativeOverlap(entry, other); + overlap_with += SpatialUtil.relativeOverlap(mbr, other); + } + } + double inc_overlap = overlap_with - overlap_wout; + if(inc_overlap < least_overlap) { + final double area = SpatialUtil.volume(entry); + final double inc_area = SpatialUtil.volume(mbr) - area; + // Volume increase and overlap increase: + least_overlap = inc_overlap; + least_areainc = inc_area; + least_area = area; + best = i; + } + else if(inc_overlap == least_overlap) { + final double area = SpatialUtil.volume(entry); + final double inc_area = SpatialUtil.volume(mbr) - area; + if(inc_area < least_areainc || (inc_area == least_areainc && area < least_area)) { + least_overlap = inc_overlap; + least_areainc = inc_area; + least_area = area; + best = i; + } + } + } + assert (best > -1) : "No split found? Volume outside of double precision?"; + return best; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected LeastOverlapInsertionStrategy makeInstance() { + return LeastOverlapInsertionStrategy.STATIC; + } + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/insert/package-info.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/insert/package-info.java new file mode 100644 index 00000000..ad54e1e1 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/insert/package-info.java @@ -0,0 +1,26 @@ +/** + * <p>Insertion strategies for R-Trees</p> + */ +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +package de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.strategies.insert;
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/overflow/LimitedReinsertOverflowTreatment.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/overflow/LimitedReinsertOverflowTreatment.java new file mode 100644 index 00000000..2532f351 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/overflow/LimitedReinsertOverflowTreatment.java @@ -0,0 +1,135 @@ +package de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.strategies.overflow; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.BitSet; + +import de.lmu.ifi.dbs.elki.distance.distancefunction.SquaredEuclideanDistanceFunction; +import de.lmu.ifi.dbs.elki.index.tree.IndexTreePath; +import de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialEntry; +import de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.AbstractRStarTree; +import de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.AbstractRStarTreeNode; +import de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.strategies.reinsert.CloseReinsert; +import de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.strategies.reinsert.ReinsertStrategy; +import de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.util.NodeArrayAdapter; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; + +/** + * Limited reinsertions, as proposed by the R*-Tree: For each real insert, allow + * reinsertions to happen only once per level. + * + * @author Erich Schubert + */ +@Reference(authors = "N. Beckmann, H.-P. Kriegel, R. Schneider, B. Seeger", title = "The R*-tree: an efficient and robust access method for points and rectangles", booktitle = "Proceedings of the 1990 ACM SIGMOD International Conference on Management of Data, Atlantic City, NJ, May 23-25, 1990", url = "http://dx.doi.org/10.1145/93597.98741") +public class LimitedReinsertOverflowTreatment implements OverflowTreatment { + /** + * Default insert strategy used by R*-tree + */ + public static final LimitedReinsertOverflowTreatment RSTAR_OVERFLOW = new LimitedReinsertOverflowTreatment(new CloseReinsert(0.3, SquaredEuclideanDistanceFunction.STATIC)); + + /** + * Bitset to keep track of levels a reinsert has been performed at. + */ + private BitSet reinsertions = new BitSet(); + + /** + * Strategy for the actual reinsertions + */ + private final ReinsertStrategy reinsertStrategy; + + /** + * Constructor. + * + * @param reinsertStrategy Reinsertion strategy + */ + public LimitedReinsertOverflowTreatment(ReinsertStrategy reinsertStrategy) { + super(); + this.reinsertStrategy = reinsertStrategy; + } + + @Override + public <N extends AbstractRStarTreeNode<N, E>, E extends SpatialEntry> boolean handleOverflow(AbstractRStarTree<N, E> tree, N node, IndexTreePath<E> path) { + final int level = /* tree.getHeight() - */(path.getPathCount() - 1); + // No reinsertions at root level + if(path.getPathCount() == 1) { + return false; + } + // Earlier reinsertions at the same level + if(reinsertions.get(level)) { + return false; + } + + reinsertions.set(level); + final E entry = path.getLastPathComponent().getEntry(); + assert (!entry.isLeafEntry()) : "Unexpected leaf entry"; + int[] cands = reinsertStrategy.computeReinserts(node, NodeArrayAdapter.STATIC, entry); + if(cands == null || cands.length == 0) { + return false; + } + tree.reInsert(node, path, cands); + return true; + } + + @Override + public void reinitialize() { + reinsertions.clear(); + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + /** + * Fast-insertion parameter. Optional. + */ + public static OptionID REINSERT_STRATEGY_ID = OptionID.getOrCreateOptionID("rtree.reinsertion-strategy", "The strategy to select candidates for reinsertion."); + + /** + * The actual reinsertion strategy + */ + ReinsertStrategy reinsertStrategy = null; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + ObjectParameter<ReinsertStrategy> strategyP = new ObjectParameter<ReinsertStrategy>(REINSERT_STRATEGY_ID, ReinsertStrategy.class, CloseReinsert.class); + if(config.grab(strategyP)) { + reinsertStrategy = strategyP.instantiateClass(config); + } + } + + @Override + protected LimitedReinsertOverflowTreatment makeInstance() { + return new LimitedReinsertOverflowTreatment(reinsertStrategy); + } + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/overflow/OverflowTreatment.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/overflow/OverflowTreatment.java new file mode 100644 index 00000000..87d09038 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/overflow/OverflowTreatment.java @@ -0,0 +1,53 @@ +package de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.strategies.overflow; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.index.tree.IndexTreePath; +import de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialEntry; +import de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.AbstractRStarTree; +import de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.AbstractRStarTreeNode; + +/** + * Reinsertion strategy to resolve overflows in the RStarTree. + * + * @author Erich Schubert + */ +public interface OverflowTreatment { + /** + * Reinitialize the reinsertion treatment (for a new primary insertion). + */ + public void reinitialize(); + + /** + * Handle overflow in the given node. + * + * @param <N> Node + * @param <E> Entry + * @param tree Tree + * @param node Node + * @param path Path + * @return true when already handled (e.g. by reinserting) + */ + <N extends AbstractRStarTreeNode<N, E>, E extends SpatialEntry> boolean handleOverflow(AbstractRStarTree<N, E> tree, N node, IndexTreePath<E> path); +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/overflow/SplitOnlyOverflowTreatment.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/overflow/SplitOnlyOverflowTreatment.java new file mode 100644 index 00000000..cfbcf6a9 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/overflow/SplitOnlyOverflowTreatment.java @@ -0,0 +1,73 @@ +package de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.strategies.overflow; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.index.tree.IndexTreePath; +import de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialEntry; +import de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.AbstractRStarTree; +import de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.AbstractRStarTreeNode; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Always split, as in the original R-Tree + * + * @author Erich Schubert + */ +public class SplitOnlyOverflowTreatment implements OverflowTreatment { + /** + * Static instance + */ + public static final SplitOnlyOverflowTreatment STATIC = new SplitOnlyOverflowTreatment(); + + /** + * Constructor + */ + public SplitOnlyOverflowTreatment() { + super(); + } + + @Override + public <N extends AbstractRStarTreeNode<N, E>, E extends SpatialEntry> boolean handleOverflow(AbstractRStarTree<N, E> tree, N node, IndexTreePath<E> path) { + return false; + } + + @Override + public void reinitialize() { + // Nothing to do + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected SplitOnlyOverflowTreatment makeInstance() { + return SplitOnlyOverflowTreatment.STATIC; + } + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/overflow/package-info.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/overflow/package-info.java new file mode 100644 index 00000000..30899736 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/overflow/package-info.java @@ -0,0 +1,26 @@ +/** + * <p>Overflow treatment strategies for R-Trees</p> + */ +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +package de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.strategies.overflow;
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/package-info.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/package-info.java new file mode 100644 index 00000000..41e2eb0b --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/package-info.java @@ -0,0 +1,26 @@ +/** + * <p>Various strategies for R-Trees and variants.</p> + */ +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +package de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.strategies;
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/reinsert/AbstractPartialReinsert.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/reinsert/AbstractPartialReinsert.java new file mode 100644 index 00000000..e0277606 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/reinsert/AbstractPartialReinsert.java @@ -0,0 +1,105 @@ +package de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.strategies.reinsert; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.distance.distancefunction.SpatialPrimitiveDoubleDistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distancefunction.SquaredEuclideanDistanceFunction; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.IntervalConstraint; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; + +/** + * Abstract base class for reinsertion strategies that have a "relative amount" + * parameter to partially reinsert entries. + * + * @author Erich Schubert + */ +public abstract class AbstractPartialReinsert implements ReinsertStrategy { + /** + * Amount of entries to reinsert + */ + protected double reinsertAmount = 0.3; + + /** + * Distance function to use for measuring + */ + SpatialPrimitiveDoubleDistanceFunction<?> distanceFunction; + + /** + * Constructor. + * + * @param reinsertAmount Relative amount of objects to reinsert. + * @param distanceFunction Distance function to use + */ + public AbstractPartialReinsert(double reinsertAmount, SpatialPrimitiveDoubleDistanceFunction<?> distanceFunction) { + super(); + this.reinsertAmount = reinsertAmount; + this.distanceFunction = distanceFunction; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static abstract class Parameterizer extends AbstractParameterizer { + /** + * Reinsertion share + */ + public static OptionID REINSERT_AMOUNT_ID = OptionID.getOrCreateOptionID("rtree.reinsertion-amount", "The amount of entries to reinsert."); + + /** + * Reinsertion share + */ + public static OptionID REINSERT_DISTANCE_ID = OptionID.getOrCreateOptionID("rtree.reinsertion-distancce", "The distance function to compute reinsertion candidates by."); + + /** + * The actual reinsertion strategy + */ + double reinsertAmount = 0.3; + + /** + * Distance function to use for measuring + */ + SpatialPrimitiveDoubleDistanceFunction<?> distanceFunction; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + DoubleParameter reinsertAmountP = new DoubleParameter(REINSERT_AMOUNT_ID, new IntervalConstraint(0.0, IntervalConstraint.IntervalBoundary.OPEN, 0.5, IntervalConstraint.IntervalBoundary.OPEN), 0.3); + if(config.grab(reinsertAmountP)) { + reinsertAmount = reinsertAmountP.getValue(); + } + ObjectParameter<SpatialPrimitiveDoubleDistanceFunction<?>> distanceP = new ObjectParameter<SpatialPrimitiveDoubleDistanceFunction<?>>(REINSERT_DISTANCE_ID, SpatialPrimitiveDoubleDistanceFunction.class, SquaredEuclideanDistanceFunction.class); + if(config.grab(distanceP)) { + distanceFunction = distanceP.instantiateClass(config); + } + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/reinsert/CloseReinsert.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/reinsert/CloseReinsert.java new file mode 100644 index 00000000..12a4ed0f --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/reinsert/CloseReinsert.java @@ -0,0 +1,88 @@ +package de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.strategies.reinsert; + +import java.util.Arrays; +import java.util.Collections; + +import de.lmu.ifi.dbs.elki.data.DoubleVector; +import de.lmu.ifi.dbs.elki.data.spatial.SpatialComparable; +import de.lmu.ifi.dbs.elki.data.spatial.SpatialUtil; +import de.lmu.ifi.dbs.elki.distance.distancefunction.SpatialPrimitiveDoubleDistanceFunction; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayAdapter; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleIntPair; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +/** + * Reinsert objects on page overflow, starting with close objects first (even + * when they will likely be inserted into the same page again!) + * + * The strategy preferred by the R*-Tree + * + * @author Erich Schubert + */ +@Reference(authors = "N. Beckmann, H.-P. Kriegel, R. Schneider, B. Seeger", title = "The R*-tree: an efficient and robust access method for points and rectangles", booktitle = "Proceedings of the 1990 ACM SIGMOD International Conference on Management of Data, Atlantic City, NJ, May 23-25, 1990", url = "http://dx.doi.org/10.1145/93597.98741") +public class CloseReinsert extends AbstractPartialReinsert { + /** + * Constructor. + * + * @param reinsertAmount Amount of objects to reinsert + * @param distanceFunction Distance function to use for reinsertion + */ + public CloseReinsert(double reinsertAmount, SpatialPrimitiveDoubleDistanceFunction<?> distanceFunction) { + super(reinsertAmount, distanceFunction); + } + + @Override + public <A> int[] computeReinserts(A entries, ArrayAdapter<? extends SpatialComparable, ? super A> getter, SpatialComparable page) { + DoubleIntPair[] order = new DoubleIntPair[getter.size(entries)]; + DoubleVector centroid = new DoubleVector(SpatialUtil.centroid(page)); + for(int i = 0; i < order.length; i++) { + double distance = distanceFunction.doubleMinDist(new DoubleVector(SpatialUtil.centroid(getter.get(entries, i))), centroid); + order[i] = new DoubleIntPair(distance, i); + } + Arrays.sort(order, Collections.reverseOrder()); + + int num = (int) (reinsertAmount * order.length); + int[] re = new int[num]; + for(int i = 0; i < num; i++) { + re[i] = order[num - 1 - i].second; + } + return re; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractPartialReinsert.Parameterizer { + @Override + protected Object makeInstance() { + return new CloseReinsert(reinsertAmount, distanceFunction); + } + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/reinsert/FarReinsert.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/reinsert/FarReinsert.java new file mode 100644 index 00000000..771f56fb --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/reinsert/FarReinsert.java @@ -0,0 +1,88 @@ +package de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.strategies.reinsert; + +import java.util.Arrays; +import java.util.Collections; + +import de.lmu.ifi.dbs.elki.data.DoubleVector; +import de.lmu.ifi.dbs.elki.data.spatial.SpatialComparable; +import de.lmu.ifi.dbs.elki.data.spatial.SpatialUtil; +import de.lmu.ifi.dbs.elki.distance.distancefunction.SpatialPrimitiveDoubleDistanceFunction; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayAdapter; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleIntPair; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +/** + * Reinsert objects on page overflow, starting with farther objects first (even + * when they will likely be inserted into the same page again!) + * + * Alternative strategy mentioned in the R*-tree + * + * @author Erich Schubert + */ +@Reference(authors = "N. Beckmann, H.-P. Kriegel, R. Schneider, B. Seeger", title = "The R*-tree: an efficient and robust access method for points and rectangles", booktitle = "Proceedings of the 1990 ACM SIGMOD International Conference on Management of Data, Atlantic City, NJ, May 23-25, 1990", url = "http://dx.doi.org/10.1145/93597.98741") +public class FarReinsert extends AbstractPartialReinsert { + /** + * Constructor. + * + * @param reinsertAmount Amount to reinsert + * @param distanceFunction Distance function + */ + public FarReinsert(double reinsertAmount, SpatialPrimitiveDoubleDistanceFunction<?> distanceFunction) { + super(reinsertAmount, distanceFunction); + } + + @Override + public <A> int[] computeReinserts(A entries, ArrayAdapter<? extends SpatialComparable, ? super A> getter, SpatialComparable page) { + DoubleIntPair[] order = new DoubleIntPair[getter.size(entries)]; + DoubleVector centroid = new DoubleVector(SpatialUtil.centroid(page)); + for(int i = 0; i < order.length; i++) { + double distance = distanceFunction.doubleMinDist(new DoubleVector(SpatialUtil.centroid(getter.get(entries, i))), centroid); + order[i] = new DoubleIntPair(distance, i); + } + Arrays.sort(order, Collections.reverseOrder()); + + int num = (int) (reinsertAmount * order.length); + int[] re = new int[num]; + for(int i = 0; i < num; i++) { + re[i] = order[i].second; + } + return re; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractPartialReinsert.Parameterizer { + @Override + protected Object makeInstance() { + return new CloseReinsert(reinsertAmount, distanceFunction); + } + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/reinsert/ReinsertStrategy.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/reinsert/ReinsertStrategy.java new file mode 100644 index 00000000..cba96367 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/reinsert/ReinsertStrategy.java @@ -0,0 +1,44 @@ +package de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.strategies.reinsert; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.data.spatial.SpatialComparable; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayAdapter; + +/** + * Reinsertion strategy to resolve overflows in the RStarTree. + * + * @author Erich Schubert + */ +public interface ReinsertStrategy { + /** + * Perform reinsertions. + * + * @param entries Entries in overflowing node + * @param getter Adapter for the entries array + * @param page Spatial extend of the page + * @return index of pages to reinsert. + */ + public <A> int[] computeReinserts(A entries, ArrayAdapter<? extends SpatialComparable, ? super A> getter, SpatialComparable page); +} diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/reinsert/package-info.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/reinsert/package-info.java new file mode 100644 index 00000000..f6a6f6e9 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/reinsert/package-info.java @@ -0,0 +1,26 @@ +/** + * <p>Reinsertion strategies for R-Trees</p> + */ +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +package de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.strategies.reinsert;
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/split/AngTanLinearSplit.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/split/AngTanLinearSplit.java new file mode 100644 index 00000000..e59fe10e --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/split/AngTanLinearSplit.java @@ -0,0 +1,193 @@ +package de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.strategies.split; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.BitSet; +import java.util.Random; + +import de.lmu.ifi.dbs.elki.data.ModifiableHyperBoundingBox; +import de.lmu.ifi.dbs.elki.data.spatial.SpatialComparable; +import de.lmu.ifi.dbs.elki.data.spatial.SpatialUtil; +import de.lmu.ifi.dbs.elki.logging.Logging; +import de.lmu.ifi.dbs.elki.utilities.Util; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayAdapter; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Line-time complexity split proposed by Ang and Tan. + * + * This split strategy tries to minimize overlap only, which can however + * degenerate to "slices". + * + * <p> + * C. H. Ang and T. C. Tan:<br /> + * New linear node splitting algorithm for R-trees<br /> + * In: Proceedings of the 5th International Symposium on Advances in Spatial + * Databases + * </p> + * + * @author Erich Schubert + */ +@Reference(authors = "C. H. Ang and T. C. Tan", title = "New linear node splitting algorithm for R-trees", booktitle = "Proceedings of the 5th International Symposium on Advances in Spatial Databases", url = "http://dx.doi.org/10.1007/3-540-63238-7_38") +public class AngTanLinearSplit implements SplitStrategy { + /** + * Logger class + */ + private static final Logging logger = Logging.getLogger(AngTanLinearSplit.class); + + /** + * Static instance. + */ + public static final AngTanLinearSplit STATIC = new AngTanLinearSplit(); + + @Override + public <E extends SpatialComparable, A> BitSet split(A entries, ArrayAdapter<E, A> getter, int minEntries) { + final int num = getter.size(entries); + // We need the overall MBR for computing edge preferences + ModifiableHyperBoundingBox total = new ModifiableHyperBoundingBox(getter.get(entries, 0)); + { + for(int i = 1; i < num; i++) { + total.extend(getter.get(entries, i)); + } + } + final int dim = total.getDimensionality(); + // Prepare the axis lists (we use bitsets) + BitSet[] closer = new BitSet[dim]; + { + for(int d = 0; d < dim; d++) { + closer[d] = new BitSet(); + } + for(int i = 0; i < num; i++) { + E e = getter.get(entries, i); + for(int d = 1; d <= dim; d++) { + double low = e.getMin(d) - total.getMin(d); + double hig = total.getMax(d) - e.getMax(d); + if(low >= hig) { + closer[d - 1].set(i); + } + } + } + } + // Find the most even split + { + int axis = -1; + int bestcard = Integer.MAX_VALUE; + BitSet bestset = null; + double bestover = Double.NaN; + for(int d = 0; d < dim; d++) { + BitSet cand = closer[d]; + int card = cand.cardinality(); + card = Math.max(card, num - card); + if(card == num) { + continue; + } + if(card < bestcard) { + axis = d + 1; + bestcard = card; + bestset = cand; + bestover = Double.NaN; + } + else if(card == bestcard) { + // Tie handling + if(Double.isNaN(bestover)) { + bestover = computeOverlap(entries, getter, bestset); + } + double overlap = computeOverlap(entries, getter, cand); + if(overlap < bestover) { + axis = d + 1; + bestcard = card; + bestset = cand; + bestover = overlap; + } + else if(overlap == bestover) { + double bestlen = total.getMax(axis) - total.getMin(axis); + double candlen = total.getMax(d + 1) - total.getMin(d + 1); + if(candlen < bestlen) { + axis = d + 1; + bestcard = card; + bestset = cand; + bestover = overlap; + } + } + } + } + if(bestset == null) { + logger.warning("No Ang-Tan-Split found. Probably all points are the same? Returning random split."); + return Util.randomBitSet(num / 2, num, new Random()); + } + return bestset; + } + } + + /** + * Compute overlap of assignment + * + * @param entries Entries + * @param getter Entry accessor + * @param assign Assignment + * @return Overlap amount + */ + protected <E extends SpatialComparable, A> double computeOverlap(A entries, ArrayAdapter<E, A> getter, BitSet assign) { + ModifiableHyperBoundingBox mbr1 = null, mbr2 = null; + for(int i = 0; i < getter.size(entries); i++) { + E e = getter.get(entries, i); + if(assign.get(i)) { + if(mbr1 == null) { + mbr1 = new ModifiableHyperBoundingBox(e); + } + else { + mbr1.extend(e); + } + } + else { + if(mbr2 == null) { + mbr2 = new ModifiableHyperBoundingBox(e); + } + else { + mbr2.extend(e); + } + } + } + if(mbr1 == null || mbr2 == null) { + throw new AbortException("Invalid state in split: one of the sets is empty."); + } + return SpatialUtil.overlap(mbr1, mbr2); + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected AngTanLinearSplit makeInstance() { + return AngTanLinearSplit.STATIC; + } + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/split/GreeneSplit.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/split/GreeneSplit.java new file mode 100644 index 00000000..7401fbe5 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/split/GreeneSplit.java @@ -0,0 +1,158 @@ +package de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.strategies.split; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Arrays; +import java.util.BitSet; + +import de.lmu.ifi.dbs.elki.data.ModifiableHyperBoundingBox; +import de.lmu.ifi.dbs.elki.data.spatial.SpatialComparable; +import de.lmu.ifi.dbs.elki.data.spatial.SpatialUtil; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayAdapter; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; +import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleIntPair; + +/** + * Quadratic-time complexity split as used by Diane Greene for the R-Tree. + * + * Seed selection is quadratic, distribution is O(n log n). + * + * This contains a slight modification to improve performance with point data: + * with points as seeds, the normalized separation is always 1, so we choose the + * raw separation then. + * + * <p> + * Diane Greene:<br /> + * An implementation and performance analysis of spatial data access methods<br /> + * In: Proceedings of the Fifth International Conference on Data Engineering + * </p> + * + * @author Erich Schubert + */ +@Reference(authors = "Diane Greene", title = "An implementation and performance analysis of spatial data access methods", booktitle = "Proceedings of the Fifth International Conference on Data Engineering", url = "http://dx.doi.org/10.1109/ICDE.1989.47268") +public class GreeneSplit implements SplitStrategy { + /** + * Static instance. + */ + public static final GreeneSplit STATIC = new GreeneSplit(); + + @Override + public <E extends SpatialComparable, A> BitSet split(A entries, ArrayAdapter<E, A> getter, int minEntries) { + final int num = getter.size(entries); + // Choose axis by best normalized separation + int axis = -1; + { + // PickSeeds - find the two most distant rectangles + double worst = Double.NEGATIVE_INFINITY; + int w1 = 0, w2 = 0; + + // Compute individual areas + double[] areas = new double[num]; + for(int e1 = 0; e1 < num - 1; e1++) { + final E e1i = getter.get(entries, e1); + areas[e1] = SpatialUtil.volume(e1i); + } + // Compute area increase + for(int e1 = 0; e1 < num - 1; e1++) { + final E e1i = getter.get(entries, e1); + for(int e2 = e1 + 1; e2 < num; e2++) { + final E e2i = getter.get(entries, e2); + final double areaJ = SpatialUtil.volumeUnion(e1i, e2i); + final double d = areaJ - areas[e1] - areas[e2]; + if(d > worst) { + worst = d; + w1 = e1; + w2 = e2; + } + } + } + // Data to keep + // Initial mbrs and areas + E m1 = getter.get(entries, w1); + E m2 = getter.get(entries, w2); + + double bestsep = Double.NEGATIVE_INFINITY; + double bestsep2 = Double.NEGATIVE_INFINITY; + for(int d = 1; d <= m1.getDimensionality(); d++) { + final double s1 = m1.getMin(d) - m2.getMax(d); + final double s2 = m2.getMin(d) - m1.getMax(d); + final double sm = Math.max(s1, s2); + final double no = Math.max(m1.getMax(d), m2.getMax(d)) - Math.min(m1.getMin(d), m2.getMin(d)); + final double sep = sm / no; + if(sep > bestsep || (sep == bestsep && sm > bestsep2)) { + bestsep = sep; + bestsep2 = sm; + axis = d; + } + } + } + // Sort by minimum value + DoubleIntPair[] data = new DoubleIntPair[num]; + for(int i = 0; i < num; i++) { + data[i] = new DoubleIntPair(getter.get(entries, i).getMin(axis), i); + } + Arrays.sort(data); + // Object assignment + final BitSet assignment = new BitSet(num); + final int half = (num + 1) / 2; + // Put the first half into second node + for(int i = 0; i < half; i++) { + assignment.set(data[i].second); + } + // Tie handling + if(num % 2 == 0) { + // We need to compute the bounding boxes + ModifiableHyperBoundingBox mbr1 = new ModifiableHyperBoundingBox(getter.get(entries, data[0].second)); + for(int i = 1; i < half; i++) { + mbr1.extend(getter.get(entries, data[i].second)); + } + ModifiableHyperBoundingBox mbr2 = new ModifiableHyperBoundingBox(getter.get(entries, data[num - 1].second)); + for(int i = half + 1; i < num - 1; i++) { + mbr2.extend(getter.get(entries, data[i].second)); + } + E e = getter.get(entries, data[half].second); + double inc1 = SpatialUtil.volumeUnion(mbr1, e) - SpatialUtil.volume(mbr1); + double inc2 = SpatialUtil.volumeUnion(mbr2, e) - SpatialUtil.volume(mbr2); + if(inc1 < inc2) { + assignment.set(data[half].second); + } + } + return assignment; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected GreeneSplit makeInstance() { + return GreeneSplit.STATIC; + } + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/split/RTreeLinearSplit.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/split/RTreeLinearSplit.java new file mode 100644 index 00000000..296f6b3b --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/split/RTreeLinearSplit.java @@ -0,0 +1,219 @@ +package de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.strategies.split; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.BitSet; + +import de.lmu.ifi.dbs.elki.data.ModifiableHyperBoundingBox; +import de.lmu.ifi.dbs.elki.data.spatial.SpatialComparable; +import de.lmu.ifi.dbs.elki.data.spatial.SpatialUtil; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayAdapter; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Linear-time complexity greedy split as used by the original R-Tree. + * + * <p> + * Antonin Guttman:<br/> + * R-Trees: A Dynamic Index Structure For Spatial Searching<br /> + * in Proceedings of the 1984 ACM SIGMOD international conference on Management + * of data. + * </p> + * + * @author Erich Schubert + */ +@Reference(authors = "Antonin Guttman", title = "R-Trees: A Dynamic Index Structure For Spatial Searching", booktitle = "Proceedings of the 1984 ACM SIGMOD international conference on Management of data", url = "http://dx.doi.org/10.1145/971697.602266") +public class RTreeLinearSplit implements SplitStrategy { + /** + * Static instance. + */ + public static final RTreeLinearSplit STATIC = new RTreeLinearSplit(); + + @Override + public <E extends SpatialComparable, A> BitSet split(A entries, ArrayAdapter<E, A> getter, int minEntries) { + final int num = getter.size(entries); + // Object assignment, and processed objects + BitSet assignment = new BitSet(num); + BitSet assigned = new BitSet(num); + // MBRs and Areas of current assignments + ModifiableHyperBoundingBox mbr1, mbr2; + double area1 = 0, area2 = 0; + // LinearPickSeeds - find worst pair + { + final int dim = getter.get(entries, 0).getDimensionality(); + // Best candidates + double bestsep = Double.NEGATIVE_INFINITY; + int w1 = -1, w2 = -1; + // LPS1: find extreme rectangles + for(int d = 1; d <= dim; d++) { + // We need to find two candidates each, in case of el==eh! + double minlow = Double.POSITIVE_INFINITY; + double maxlow = Double.NEGATIVE_INFINITY, maxlow2 = Double.NEGATIVE_INFINITY; + double minhig = Double.POSITIVE_INFINITY, minhig2 = Double.POSITIVE_INFINITY; + double maxhig = Double.NEGATIVE_INFINITY; + int el = -1, el2 = -1; + int eh = -1, eh2 = -1; + for(int i = 0; i < num; i++) { + E ei = getter.get(entries, i); + final double low = ei.getMin(d); + final double hig = ei.getMax(d); + minlow = Math.min(minlow, low); + maxhig = Math.max(maxhig, hig); + if(low >= maxlow) { + maxlow2 = maxlow; + maxlow = low; + el2 = el; + el = i; + } + else if(low > maxlow2) { + maxlow2 = low; + el2 = i; + } + if(hig <= minhig) { + minhig2 = minhig; + minhig = hig; + eh2 = eh; + eh = i; + } + else if(hig < minhig2) { + minhig2 = hig; + eh2 = i; + } + } + // Compute normalized separation + final double normsep; + if(el != eh) { + normsep = minhig - maxlow / (maxhig - minlow); + } + else { + // Resolve tie. + double normsep1 = minhig - maxlow2 / (maxhig - minlow); + double normsep2 = minhig2 - maxlow / (maxhig - minlow); + if(normsep1 > normsep2) { + el = el2; + normsep = normsep1; + } + else { + eh = eh2; + normsep = normsep2; + } + } + assert (eh != -1 && el != -1 && (eh != el)); + if(normsep > bestsep) { + bestsep = normsep; + w1 = el; + w2 = eh; + } + } + + // Data to keep + // Mark both as used + assigned.set(w1); + assigned.set(w2); + // Assign second to second set + assignment.set(w2); + // Initial mbrs and areas + final E w1i = getter.get(entries, w1); + final E w2i = getter.get(entries, w2); + area1 = SpatialUtil.volume(w1i); + area2 = SpatialUtil.volume(w2i); + mbr1 = new ModifiableHyperBoundingBox(w1i); + mbr2 = new ModifiableHyperBoundingBox(w2i); + } + // Second phase, QS2+QS3 + { + int in1 = 1, in2 = 1; + int remaining = num - 2; + // Choose any element, for example the next. + for(int next = assigned.nextClearBit(0); remaining > 0 && next < num; next = assigned.nextClearBit(next + 1)) { + // Shortcut when minEntries must be fulfilled + if(in1 + remaining <= minEntries) { + // No need to updated assigned, no changes to assignment. + break; + } + if(in2 + remaining <= minEntries) { + // Mark unassigned for second. + // Don't bother to update assigned, though + for(; next < num; next = assigned.nextClearBit(next + 1)) { + assignment.set(next); + } + break; + } + // PickNext + boolean preferSecond = false; + + // Cost of putting object into both mbrs + final E next_i = getter.get(entries, next); + final double d1 = SpatialUtil.volumeUnion(mbr1, next_i) - area1; + final double d2 = SpatialUtil.volumeUnion(mbr2, next_i) - area2; + // Prefer smaller increase + preferSecond = (d2 < d1); + // QS3: tie handling + if(d1 == d2) { + // Prefer smaller area + if(area1 != area2) { + preferSecond = (area2 < area1); + } + else { + // Prefer smaller group size + preferSecond = (in2 < in1); + } + } + // Mark as used. + assigned.set(next); + remaining--; + // Assign + if(!preferSecond) { + in1++; + mbr1.extend(next_i); + area1 = SpatialUtil.volume(mbr1); + } + else { + in2++; + assignment.set(next); + mbr2.extend(next_i); + area2 = SpatialUtil.volume(mbr2); + } + // Loop from QS2 + } + // Note: "assigned" and "remaining" likely not updated! + } + return assignment; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected RTreeLinearSplit makeInstance() { + return RTreeLinearSplit.STATIC; + } + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/split/RTreeQuadraticSplit.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/split/RTreeQuadraticSplit.java new file mode 100644 index 00000000..8f61771d --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/split/RTreeQuadraticSplit.java @@ -0,0 +1,183 @@ +package de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.strategies.split; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.BitSet; + +import de.lmu.ifi.dbs.elki.data.ModifiableHyperBoundingBox; +import de.lmu.ifi.dbs.elki.data.spatial.SpatialComparable; +import de.lmu.ifi.dbs.elki.data.spatial.SpatialUtil; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayAdapter; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Quadratic-time complexity greedy split as used by the original R-Tree. + * + * <p> + * Antonin Guttman:<br/> + * R-Trees: A Dynamic Index Structure For Spatial Searching<br /> + * in Proceedings of the 1984 ACM SIGMOD international conference on Management + * of data. + * </p> + * + * @author Erich Schubert + */ +@Reference(authors = "Antonin Guttman", title = "R-Trees: A Dynamic Index Structure For Spatial Searching", booktitle = "Proceedings of the 1984 ACM SIGMOD international conference on Management of data", url = "http://dx.doi.org/10.1145/971697.602266") +public class RTreeQuadraticSplit implements SplitStrategy { + /** + * Static instance. + */ + public static final RTreeQuadraticSplit STATIC = new RTreeQuadraticSplit(); + + @Override + public <E extends SpatialComparable, A> BitSet split(A entries, ArrayAdapter<E, A> getter, int minEntries) { + final int num = getter.size(entries); + // Object assignment, and processed objects + BitSet assignment = new BitSet(num); + BitSet assigned = new BitSet(num); + // MBRs and Areas of current assignments + ModifiableHyperBoundingBox mbr1, mbr2; + double area1 = 0, area2 = 0; + // PickSeeds - find worst pair + { + double worst = Double.NEGATIVE_INFINITY; + int w1 = 0, w2 = 0; + + // Compute individual areas + double[] areas = new double[num]; + for(int e1 = 0; e1 < num - 1; e1++) { + final E e1i = getter.get(entries, e1); + areas[e1] = SpatialUtil.volume(e1i); + } + // Compute area increase + for(int e1 = 0; e1 < num - 1; e1++) { + final E e1i = getter.get(entries, e1); + for(int e2 = e1 + 1; e2 < num; e2++) { + final E e2i = getter.get(entries, e2); + final double areaJ = SpatialUtil.volumeUnion(e1i, e2i); + final double d = areaJ - areas[e1] - areas[e2]; + if(d > worst) { + worst = d; + w1 = e1; + w2 = e2; + } + } + } + // Data to keep + // Mark both as used + assigned.set(w1); + assigned.set(w2); + // Assign second to second set + assignment.set(w2); + // Initial mbrs and areas + area1 = areas[w1]; + area2 = areas[w2]; + mbr1 = new ModifiableHyperBoundingBox(getter.get(entries, w1)); + mbr2 = new ModifiableHyperBoundingBox(getter.get(entries, w2)); + } + // Second phase, QS2+QS3 + { + int in1 = 1, in2 = 1; + int remaining = num - 2; + while(remaining > 0) { + // Shortcut when minEntries must be fulfilled + if(in1 + remaining <= minEntries) { + // No need to updated assigned, no changes to assignment. + break; + } + if(in2 + remaining <= minEntries) { + // Mark unassigned for second. + // Don't bother to update assigned, though + for(int pos = assigned.nextClearBit(0); pos < num; pos = assigned.nextClearBit(pos + 1)) { + assignment.set(pos); + } + break; + } + // PickNext + double greatestPreference = Double.NEGATIVE_INFINITY; + int best = -1; + E best_i = null; + boolean preferSecond = false; + for(int pos = assigned.nextClearBit(0); pos < num; pos = assigned.nextClearBit(pos + 1)) { + // Cost of putting object into both mbrs + final E pos_i = getter.get(entries, pos); + final double d1 = SpatialUtil.volumeUnion(mbr1, pos_i) - area1; + final double d2 = SpatialUtil.volumeUnion(mbr2, pos_i) - area2; + // Preference + final double preference = Math.abs(d1 - d2); + if(preference > greatestPreference) { + greatestPreference = preference; + best = pos; + best_i = pos_i; + // Prefer smaller increase + preferSecond = (d2 < d1); + } + } + // QS3: tie handling + if(greatestPreference == 0) { + // Prefer smaller area + if(area1 != area2) { + preferSecond = (area2 < area1); + } + else { + // Prefer smaller group size + preferSecond = (in2 < in1); + } + } + // Mark as used. + assigned.set(best); + remaining--; + if(!preferSecond) { + in1++; + mbr1.extend(best_i); + area1 = SpatialUtil.volume(mbr1); + } + else { + in2++; + assignment.set(best); + mbr2.extend(best_i); + area2 = SpatialUtil.volume(mbr2); + } + // Loop from QS2 + } + // Note: "assigned" and "remaining" likely not updated! + } + return assignment; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected RTreeQuadraticSplit makeInstance() { + return RTreeQuadraticSplit.STATIC; + } + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/split/SplitStrategy.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/split/SplitStrategy.java new file mode 100644 index 00000000..658a63da --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/split/SplitStrategy.java @@ -0,0 +1,47 @@ +package de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.strategies.split; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.BitSet; + +import de.lmu.ifi.dbs.elki.data.spatial.SpatialComparable; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayAdapter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.Parameterizable; + +/** + * Generic interface for split strategies. + * + * @author Erich Schubert + */ +public interface SplitStrategy extends Parameterizable { + /** + * Split a page + * + * @param entries Entries to split + * @param getter Adapter for the entries array + * @param minEntries Minimum number of entries in each part + * @return BitSet containing the assignment. + */ + public <E extends SpatialComparable, A> BitSet split(A entries, ArrayAdapter<E, A> getter, int minEntries); +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/split/TopologicalSplitter.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/split/TopologicalSplitter.java new file mode 100644 index 00000000..1789ab22 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/split/TopologicalSplitter.java @@ -0,0 +1,309 @@ +package de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.strategies.split; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Arrays; +import java.util.BitSet; + +import de.lmu.ifi.dbs.elki.data.HyperBoundingBox; +import de.lmu.ifi.dbs.elki.data.ModifiableHyperBoundingBox; +import de.lmu.ifi.dbs.elki.data.spatial.SpatialComparable; +import de.lmu.ifi.dbs.elki.data.spatial.SpatialUtil; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayAdapter; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; +import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleIntPair; + +/** + * Encapsulates the required parameters for a topological split of a R*-Tree. + * + * @author Elke Achtert + * + * @apiviz.has Split + */ +@Reference(authors = "N. Beckmann, H.-P. Kriegel, R. Schneider, B. Seeger", title = "The R*-tree: an efficient and robust access method for points and rectangles", booktitle = "Proceedings of the 1990 ACM SIGMOD International Conference on Management of Data, Atlantic City, NJ, May 23-25, 1990", url = "http://dx.doi.org/10.1145/93597.98741") +public class TopologicalSplitter implements SplitStrategy { + /** + * Static instance. + */ + public static final TopologicalSplitter STATIC = new TopologicalSplitter(); + + /** + * constructor. + */ + public TopologicalSplitter() { + // Nothing to do. + } + + @Override + public <E extends SpatialComparable, A> BitSet split(A entries, ArrayAdapter<E, A> getter, int minEntries) { + Split<A, E> split = new Split<A, E>(entries, getter); + split.chooseSplitAxis(minEntries); + split.chooseSplitPoint(minEntries); + + assert (split.splitPoint < split.size) : "Invalid split produced. Size: " + getter.size(entries) + " minEntries: " + minEntries + " split.size: " + split.size; + BitSet assignment = new BitSet(split.size); + for(int i = split.splitPoint; i < split.size; i++) { + assignment.set(split.bestSorting[i].second); + } + return assignment; + } + + /** + * Internal data for an actual split. + * + * @author Erich Schubert + * + * @param <E> Actual entry type + */ + private class Split<A, E extends SpatialComparable> { + /** + * The index of the split point. + */ + int splitPoint = -1; + + /** + * Indicates whether the sorting according to maximal or to minimal value + * has been used for choosing the split axis and split point. + */ + DoubleIntPair[] bestSorting; + + /** + * The entries sorted according to their max values of their MBRs. + */ + DoubleIntPair[] maxSorting; + + /** + * The entries sorted according to their min values of their MBRs. + */ + DoubleIntPair[] minSorting; + + /** + * The entries we process. + */ + private A entries; + + /** + * The getter class for the entries + */ + private ArrayAdapter<E, A> getter; + + /** + * List size + */ + private int size; + + /** + * Dimensionality + */ + private int dimensionality; + + /** + * Constructor. + */ + public Split(A entries, ArrayAdapter<E, A> getter) { + this.entries = entries; + this.getter = getter; + this.size = getter.size(entries); + this.dimensionality = getter.get(entries, 0).getDimensionality(); + initMinMaxArrays(); + } + + /** + * Chooses a split axis. + * + * @param minEntries number of minimum entries in the node to be split + */ + void chooseSplitAxis(int minEntries) { + // best value for the surface + double minSurface = Double.MAX_VALUE; + int splitAxis = -1; + + for(int d = 1; d <= dimensionality; d++) { + double sumOfAllMargins = 0; + fillAndSort(d); + + // Note: this has a somewhat surprising evaluation order. + // We compute the sum as in the original paper: + // it says "sum of all margin-values". + // Except that we don't match them as you would do in a split, but + // Iterate over all possible splits from both sides (as well as min and + // max) in parallel, since union can be computed incrementally. + ModifiableHyperBoundingBox mbr_min_left = new ModifiableHyperBoundingBox(get(minSorting[0])); + ModifiableHyperBoundingBox mbr_min_right = new ModifiableHyperBoundingBox(get(minSorting[size - 1])); + ModifiableHyperBoundingBox mbr_max_left = new ModifiableHyperBoundingBox(get(maxSorting[0])); + ModifiableHyperBoundingBox mbr_max_right = new ModifiableHyperBoundingBox(get(maxSorting[size - 1])); + + for(int k = 1; k < size - minEntries; k++) { + mbr_min_left.extend(get(minSorting[k])); + mbr_min_right.extend(get(minSorting[size - 1 - k])); + mbr_max_left.extend(get(maxSorting[k])); + mbr_max_right.extend(get(maxSorting[size - 1 - k])); + if(k >= minEntries - 1) { + // Yes, build the sum. This value is solely used for finding the + // preferred split axis! + // Note that mbr_min_left and mbr_max_left do not add up to a + // complete split, but when the sum is complete, it will also + // include their proper counterpart. + sumOfAllMargins += SpatialUtil.perimeter(mbr_min_left) + SpatialUtil.perimeter(mbr_min_right) + SpatialUtil.perimeter(mbr_max_left) + SpatialUtil.perimeter(mbr_max_right); + } + } + if(sumOfAllMargins < minSurface) { + splitAxis = d; + minSurface = sumOfAllMargins; + } + } + if(splitAxis != dimensionality) { + fillAndSort(splitAxis); + } + } + + /** + * Init the arrays we use + */ + protected void initMinMaxArrays() { + maxSorting = new DoubleIntPair[size]; + minSorting = new DoubleIntPair[size]; + // Prefill + for(int j = 0; j < size; j++) { + minSorting[j] = new DoubleIntPair(0, -1); + maxSorting[j] = new DoubleIntPair(0, -1); + } + } + + /** + * Fill the array with the dimension projection needed for sorting. + * + * @param dim Relevant dimension. + */ + protected void fillAndSort(final int dim) { + // sort the entries according to their minimal and according to their + // maximal value in the current dimension. + for(int j = 0; j < size; j++) { + E e = get(j); + minSorting[j].first = e.getMin(dim); + minSorting[j].second = j; + maxSorting[j].first = e.getMax(dim); + maxSorting[j].second = j; + } + Arrays.sort(minSorting); + Arrays.sort(maxSorting); + } + + /** + * Chooses a split axis. + * + * @param minEntries number of minimum entries in the node to be split + */ + void chooseSplitPoint(int minEntries) { + // the split point (first set to minimum entries in the node) + splitPoint = size; + // best value for the overlap + double minOverlap = Double.POSITIVE_INFINITY; + // the volume of mbr1 and mbr2 + double volume = Double.POSITIVE_INFINITY; + // indicates whether the sorting according to maximal or to minimal value + // is best for the split axis + bestSorting = null; + + assert (size - 2 * minEntries > 0) : "Cannot split underfull nodes."; + // test the sorting with respect to the minimal values + { + ModifiableHyperBoundingBox mbr1 = mbr(minSorting, 0, minEntries - 1); + for(int i = 0; i <= size - 2 * minEntries; i++) { + mbr1.extend(getter.get(entries, minSorting[minEntries + i - 1].second)); + HyperBoundingBox mbr2 = mbr(minSorting, minEntries + i, size); + double currentOverlap = SpatialUtil.relativeOverlap(mbr1, mbr2); + if(currentOverlap <= minOverlap) { + double vol = SpatialUtil.volume(mbr1) + SpatialUtil.volume(mbr2); + if(currentOverlap < minOverlap || vol < volume) { + minOverlap = currentOverlap; + volume = vol; + splitPoint = minEntries + i; + bestSorting = minSorting; + } + } + } + } + // test the sorting with respect to the maximal values + { + ModifiableHyperBoundingBox mbr1 = mbr(maxSorting, 0, minEntries - 1); + for(int i = 0; i <= size - 2 * minEntries; i++) { + mbr1.extend(getter.get(entries, maxSorting[minEntries + i - 1].second)); + HyperBoundingBox mbr2 = mbr(maxSorting, minEntries + i, size); + double currentOverlap = SpatialUtil.relativeOverlap(mbr1, mbr2); + if(currentOverlap <= minOverlap) { + double vol = SpatialUtil.volume(mbr1) + SpatialUtil.volume(mbr2); + if(currentOverlap < minOverlap || vol < volume) { + minOverlap = currentOverlap; + volume = vol; + splitPoint = minEntries + i; + bestSorting = maxSorting; + } + } + } + } + assert (splitPoint < size) : "No split found? Volume outside of double precision?"; + } + + private E get(int off) { + return getter.get(entries, off); + } + + private E get(DoubleIntPair pair) { + return getter.get(entries, pair.second); + } + + /** + * Computes and returns the mbr of the specified nodes, only the nodes + * between from and to index are considered. + * + * @param sorting the array of nodes + * @param from the start index + * @param to the end index + * @return the mbr of the specified nodes + */ + private ModifiableHyperBoundingBox mbr(final DoubleIntPair[] sorting, final int from, final int to) { + ModifiableHyperBoundingBox mbr = new ModifiableHyperBoundingBox(get(sorting[from])); + for(int i = from + 1; i < to; i++) { + mbr.extend(get(sorting[i])); + } + return mbr; + } + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected TopologicalSplitter makeInstance() { + return TopologicalSplitter.STATIC; + } + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/split/package-info.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/split/package-info.java new file mode 100644 index 00000000..9e8291be --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/split/package-info.java @@ -0,0 +1,26 @@ +/** + * <p>Splitting strategies for R-Trees</p> + */ +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +package de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.strategies.split;
\ No newline at end of file |