summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrej Shadura <andrewsh@debian.org>2019-03-09 22:30:38 +0000
committerAndrej Shadura <andrewsh@debian.org>2019-03-09 22:30:38 +0000
commit14a486343aef55f97f54082d6b542dedebf6f3ba (patch)
tree000fcc4968578771ad265079eef7617d66de2cda
parent8300861dc4c62c5567a4e654976072f854217544 (diff)
Import Upstream version 0.6.0
-rw-r--r--src/META-INF/elki/de.lmu.ifi.dbs.elki.algorithm.AbstractPrimitiveDistanceBasedAlgorithm2
-rw-r--r--src/META-INF/elki/de.lmu.ifi.dbs.elki.algorithm.Algorithm10
-rw-r--r--src/META-INF/elki/de.lmu.ifi.dbs.elki.algorithm.DistanceBasedAlgorithm4
-rw-r--r--src/META-INF/elki/de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm7
-rw-r--r--src/META-INF/elki/de.lmu.ifi.dbs.elki.algorithm.clustering.affinitypropagation.AffinityPropagationInitialization2
-rw-r--r--src/META-INF/elki/de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeans2
-rw-r--r--src/META-INF/elki/de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.SubspaceClusteringAlgorithm2
-rw-r--r--src/META-INF/elki/de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm3
-rw-r--r--src/META-INF/elki/de.lmu.ifi.dbs.elki.data.NumberVector$Factory7
-rw-r--r--src/META-INF/elki/de.lmu.ifi.dbs.elki.data.SparseNumberVector$Factory5
-rw-r--r--src/META-INF/elki/de.lmu.ifi.dbs.elki.datasource.filter.ObjectFilter3
-rw-r--r--src/META-INF/elki/de.lmu.ifi.dbs.elki.datasource.filter.StreamFilter3
-rw-r--r--src/META-INF/elki/de.lmu.ifi.dbs.elki.datasource.parser.Parser1
-rw-r--r--src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction9
-rw-r--r--src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.distancefunction.DoubleNorm3
-rw-r--r--src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.distancefunction.NumberVectorDistanceFunction6
-rw-r--r--src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction9
-rw-r--r--src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction6
-rw-r--r--src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.distancefunction.SpatialPrimitiveDistanceFunction3
-rw-r--r--src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.distancefunction.SpatialPrimitiveDoubleDistanceFunction3
-rw-r--r--src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.LPNormDistanceFunction4
-rw-r--r--src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.DimensionSelectingSubspaceDistanceFunction1
-rw-r--r--src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.similarityfunction.NormalizedSimilarityFunction1
-rw-r--r--src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.similarityfunction.PrimitiveSimilarityFunction12
-rw-r--r--src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.similarityfunction.SimilarityFunction10
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/APRIORI.java11
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/AbstractAlgorithm.java88
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/AbstractPrimitiveDistanceBasedAlgorithm.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/DependencyDerivator.java16
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/KNNDistanceOrder.java9
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/KNNJoin.java93
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/benchmark/ValidateApproximativeKNNIndex.java97
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedClustering.java14
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedDBSCAN.java21
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/DBSCAN.java51
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/DeLiClu.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/EM.java364
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICS.java11
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICSXi.java11
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/SNNClustering.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/affinitypropagation/AffinityPropagationClusteringAlgorithm.java350
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/affinitypropagation/AffinityPropagationInitialization.java59
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/affinitypropagation/DistanceBasedInitializationWithMedian.java148
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/affinitypropagation/SimilarityBasedInitializationWithMedian.java153
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/affinitypropagation/package-info.java27
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/biclustering/AbstractBiclustering.java302
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/biclustering/ChengAndChurch.java900
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/biclustering/package-info.java28
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/CASH.java10
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/COPAC.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/HiCO.java25
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/LMCLUS.java96
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ORCLUS.java89
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/EpsilonNeighborPredicate.java11
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/MinPtsCorePredicate.java5
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/ExtractFlatClusteringFromHierarchy.java217
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeans.java195
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/BestOfMultipleKMeans.java27
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/FarthestPointsInitialMeans.java33
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansBatchedLloyd.java346
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansBisecting.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansHybridLloydMacQueen.java155
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansLloyd.java8
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansMacQueen.java11
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansPlusPlusInitialMeans.java8
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMediansLloyd.java8
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsEM.java53
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsPAM.java75
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyGeneratedInitialMeans.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/SampleKMeansInitialization.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/package-info.java23
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/onedimensional/KNNKernelDensityMinimaClustering.java384
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/onedimensional/package-info.java27
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/CLIQUE.java9
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DOC.java605
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DiSH.java160
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/HiSC.java7
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/P3C.java1000
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PROCLUS.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SUBCLU.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/ABOD.java544
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractAggarwalYuOutlier.java9
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuEvolutionary.java137
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/COP.java93
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/DWOF.java407
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/EMOutlier.java24
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/FastABOD.java219
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianModel.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianUniformMixture.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/LBABOD.java288
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/ODIN.java16
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/OPTICSOF.java71
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/ReferenceBasedOutlierDetection.java10
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleCOP.java14
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/ALOCI.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/FlexibleLOF.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/INFLO.java32
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDF.java66
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDOF.java15
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOF.java203
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LoOP.java93
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimpleKernelDensityLOF.java9
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimplifiedLOF.java50
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/ExternalDoubleOutlierScore.java3
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java59
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/HiCS.java116
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuRandomWalkEC.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/TrimmedMeanApproach.java61
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExtendedNeighborhood.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExternalNeighborhood.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/LinearWeightedExtendedNeighborhood.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OUTRES.java3
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OutRankS1.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/SOD.java371
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAverageCoordinateOutlier.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialGeneratedOutlier.java44
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/statistics/AddSingleScale.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/statistics/AveragePrecisionAtK.java46
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/statistics/DistanceStatisticsWithClasses.java90
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/statistics/EvaluateRankingQuality.java24
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/statistics/RankingQualityHistogram.java20
-rw-r--r--src/de/lmu/ifi/dbs/elki/application/AbstractApplication.java55
-rw-r--r--src/de/lmu/ifi/dbs/elki/application/ConvertToBundleApplication.java13
-rw-r--r--src/de/lmu/ifi/dbs/elki/application/ELKILauncher.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/application/cache/CacheDoubleDistanceKNNLists.java34
-rw-r--r--src/de/lmu/ifi/dbs/elki/application/cache/CacheDoubleDistanceRangeQueries.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/application/greedyensemble/ComputeKNNOutlierScores.java94
-rw-r--r--src/de/lmu/ifi/dbs/elki/application/jsmap/JSONResultHandler.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/data/Bit.java36
-rw-r--r--src/de/lmu/ifi/dbs/elki/data/BitVector.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/data/ByteVector.java273
-rw-r--r--src/de/lmu/ifi/dbs/elki/data/DoubleVector.java69
-rw-r--r--src/de/lmu/ifi/dbs/elki/data/FloatVector.java88
-rw-r--r--src/de/lmu/ifi/dbs/elki/data/IntegerVector.java47
-rw-r--r--src/de/lmu/ifi/dbs/elki/data/LabelList.java78
-rw-r--r--src/de/lmu/ifi/dbs/elki/data/RationalNumber.java10
-rw-r--r--src/de/lmu/ifi/dbs/elki/data/ShortVector.java290
-rw-r--r--src/de/lmu/ifi/dbs/elki/data/SparseByteVector.java459
-rw-r--r--src/de/lmu/ifi/dbs/elki/data/SparseDoubleVector.java121
-rw-r--r--src/de/lmu/ifi/dbs/elki/data/SparseFeatureVector.java54
-rw-r--r--src/de/lmu/ifi/dbs/elki/data/SparseFloatVector.java125
-rw-r--r--src/de/lmu/ifi/dbs/elki/data/SparseIntegerVector.java460
-rw-r--r--src/de/lmu/ifi/dbs/elki/data/SparseNumberVector.java125
-rw-r--r--src/de/lmu/ifi/dbs/elki/data/SparseShortVector.java459
-rw-r--r--src/de/lmu/ifi/dbs/elki/data/Subspace.java1
-rw-r--r--src/de/lmu/ifi/dbs/elki/data/VectorUtil.java195
-rw-r--r--src/de/lmu/ifi/dbs/elki/data/images/ComputeHSBColorHistogram.java5
-rw-r--r--src/de/lmu/ifi/dbs/elki/data/model/Bicluster.java194
-rw-r--r--src/de/lmu/ifi/dbs/elki/data/model/BiclusterModel.java54
-rw-r--r--src/de/lmu/ifi/dbs/elki/data/model/BiclusterWithInversionsModel.java66
-rw-r--r--src/de/lmu/ifi/dbs/elki/data/model/BiclusterWithInverted.java110
-rw-r--r--src/de/lmu/ifi/dbs/elki/data/projection/FeatureSelection.java15
-rw-r--r--src/de/lmu/ifi/dbs/elki/data/projection/NumericalFeatureSelection.java17
-rw-r--r--src/de/lmu/ifi/dbs/elki/data/projection/RandomProjection.java10
-rw-r--r--src/de/lmu/ifi/dbs/elki/data/spatial/SpatialUtil.java205
-rw-r--r--src/de/lmu/ifi/dbs/elki/data/synthetic/bymodel/GeneratorSingleCluster.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/data/type/SimpleTypeInformation.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/data/type/TypeUtil.java13
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/AbstractDatabase.java46
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/Database.java11
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/HashmapDatabase.java5
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/QueryUtil.java21
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/StaticArrayDatabase.java60
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/datastore/memory/ArrayDBIDStore.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/datastore/memory/ArrayDoubleDistanceStore.java11
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/datastore/memory/ArrayDoubleStore.java11
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/datastore/memory/ArrayIntegerStore.java13
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/datastore/memory/ArrayRecordStore.java15
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/datastore/memory/ArrayStore.java15
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/datastore/memory/MapIntegerDBIDRecordStore.java16
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/datastore/memory/MapRecordStore.java16
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/ids/ArrayDBIDs.java9
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/ids/DBIDFactory.java9
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/ids/DBIDUtil.java83
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/ids/EmptyDBIDs.java5
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/ids/distance/DoubleDistanceDBIDPairList.java5
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/ids/distance/DoubleDistanceKNNHeap.java9
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/ids/distance/KNNHeap.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/ids/distance/ModifiableDoubleDistanceDBIDList.java5
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/ids/generic/AbstractKNNHeap.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/ids/generic/DistanceDBIDPairKNNHeap.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/ids/generic/DoubleDistanceDBIDPairKNNHeap.java44
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/ids/generic/DoubleDistanceDBIDPairKNNListHeap.java289
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/ids/generic/GenericDistanceDBIDList.java46
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/ids/generic/UnmodifiableArrayDBIDs.java9
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/ids/integer/AbstractIntegerDBIDFactory.java43
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/ids/integer/ArrayModifiableIntegerDBIDs.java208
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/ids/integer/ArrayStaticIntegerDBIDs.java214
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/ids/integer/DoubleDistanceIntegerDBIDKNNHeap.java127
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/ids/integer/DoubleDistanceIntegerDBIDKNNList.java226
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/ids/integer/DoubleDistanceIntegerDBIDList.java (renamed from src/de/lmu/ifi/dbs/elki/database/ids/integer/DoubleDistanceIntegerDBIDKNNListHeap.java)218
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/ids/integer/DoubleDistanceIntegerDBIDPairKNNListHeap.java339
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/ids/integer/DoubleDistanceIntegerDBIDPairList.java234
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/ids/integer/DoubleDistanceIntegerDBIDSortedKNNList.java157
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/ids/integer/IntegerArrayDBIDs.java3
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/ids/integer/IntegerDBID.java38
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/ids/integer/IntegerDBIDArrayQuickSort.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/ids/integer/IntegerDBIDIter.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/ids/integer/IntegerDBIDPair.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/ids/integer/IntegerDBIDRange.java167
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/ids/integer/IntegerDBIDRef.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/ids/integer/IntegerDBIDVar.java81
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/ids/integer/IntegerDBIDs.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/ids/integer/TroveArrayDBIDs.java192
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/ids/integer/TroveArrayModifiableDBIDs.java155
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/ids/integer/TroveHashSetModifiableDBIDs.java45
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/ids/integer/UnmodifiableIntegerArrayDBIDs.java9
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/query/distance/PrimitiveDistanceSimilarityQuery.java9
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/query/knn/DoubleOptimizedDistanceKNNQuery.java91
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/query/knn/DoubleOptimizedKNNQuery.java309
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/query/knn/LinearScanDistanceKNNQuery.java (renamed from src/de/lmu/ifi/dbs/elki/database/query/knn/LinearScanKNNQuery.java)47
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/query/knn/LinearScanPrimitiveDistanceKNNQuery.java21
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/query/range/AbstractDistanceRangeQuery.java5
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/query/range/DoubleOptimizedDistanceRangeQuery.java (renamed from src/de/lmu/ifi/dbs/elki/database/query/range/DoubleOptimizedRangeQuery.java)41
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/query/range/LinearScanDistanceRangeQuery.java (renamed from src/de/lmu/ifi/dbs/elki/database/query/range/LinearScanRangeQuery.java)4
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/query/range/LinearScanPrimitiveDistanceRangeQuery.java28
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/query/range/RangeQuery.java11
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/query/similarity/PrimitiveSimilarityQuery.java11
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/query/similarity/SimilarityQuery.java22
-rw-r--r--src/de/lmu/ifi/dbs/elki/database/relation/RelationUtil.java28
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/AbstractDatabaseConnection.java32
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/ConcatenateFilesDatabaseConnection.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/FileBasedDatabaseConnection.java41
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/GeneratorXMLDatabaseConnection.java27
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/LabelJoinDatabaseConnection.java9
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/RandomDoubleVectorDatabaseConnection.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/bundle/MultipleObjectsBundle.java16
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/ByLabelFilter.java5
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/ClassLabelFilter.java69
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/ClassLabelFromPatternFilter.java5
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/DropNaNFilter.java (renamed from src/de/lmu/ifi/dbs/elki/datasource/filter/NaNFilter.java)10
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/ExternalIDFilter.java50
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/FixedDBIDsFilter.java15
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/HistogramJitterFilter.java16
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/RandomSamplingStreamFilter.java19
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/ReplaceNaNWithRandomFilter.java220
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/ShuffleObjectsFilter.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/SplitNumberVectorFilter.java33
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseCDFNormalization.java28
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseMinMaxNormalization.java48
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseVarianceNormalization.java16
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/InverseDocumentFrequencyNormalization.java23
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/TFIDFNormalization.java13
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/transform/AbstractSupervisedProjectionVectorFilter.java37
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/transform/NumberVectorFeatureSelectionFilter.java9
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/transform/NumberVectorRandomFeatureSelectionFilter.java10
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/parser/AbstractParser.java151
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/parser/AbstractStreamingParser.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/parser/ArffParser.java24
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/parser/BitVectorLabelParser.java50
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/parser/CategorialDataAsNumberVectorParser.java161
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/parser/DoubleVectorLabelParser.java10
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/parser/FloatVectorLabelParser.java8
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/parser/NumberVectorLabelParser.java167
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/parser/SimplePolygonParser.java90
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/parser/SparseBitVectorLabelParser.java43
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/parser/SparseFloatVectorLabelParser.java9
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/parser/SparseNumberVectorLabelParser.java83
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/parser/StringParser.java5
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/parser/TermFrequencyParser.java71
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/parser/Tokenizer.java230
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/distancefunction/AbstractVectorDoubleDistanceFunction.java41
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/distancefunction/MinKDistance.java18
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/distancefunction/adapter/AbstractSimilarityAdapter.java16
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/distancefunction/adapter/ArccosSimilarityAdapter.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/distancefunction/adapter/LinearAdapterLinear.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/distancefunction/adapter/LnSimilarityAdapter.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/distancefunction/colorhistogram/HSBHistogramQuadraticDistanceFunction.java11
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/distancefunction/correlation/ERiCDistanceFunction.java34
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/distancefunction/correlation/PCABasedCorrelationDistanceFunction.java24
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/distancefunction/external/NumberDistanceParser.java80
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/distancefunction/geo/DimensionSelectingLatLngDistanceFunction.java44
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/distancefunction/histogram/package-info.java23
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/EuclideanDistanceFunction.java166
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/LPIntegerNormDistanceFunction.java215
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/LPNormDistanceFunction.java214
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/ManhattanDistanceFunction.java138
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/MaximumDistanceFunction.java135
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/SparseEuclideanDistanceFunction.java64
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/SparseLPNormDistanceFunction.java84
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/SparseManhattanDistanceFunction.java70
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/SparseMaximumDistanceFunction.java87
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/WeightedEuclideanDistanceFunction.java155
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/WeightedLPNormDistanceFunction.java136
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/WeightedManhattanDistanceFunction.java128
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/WeightedMaximumDistanceFunction.java193
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/distancefunction/probabilistic/package-info.java23
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/distancefunction/subspace/AbstractDimensionsSelectingDoubleDistanceFunction.java13
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/distancefunction/subspace/AbstractPreferenceVectorBasedCorrelationDistanceFunction.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/distancefunction/subspace/DimensionSelectingDistanceFunction.java26
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/distancefunction/subspace/SubspaceLPNormDistanceFunction.java7
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/distancefunction/subspace/SubspaceMaximumDistanceFunction.java149
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/distancefunction/timeseries/AbstractEditDistanceFunction.java13
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/distancefunction/timeseries/EDRDistanceFunction.java34
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/distancefunction/timeseries/ERPDistanceFunction.java34
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/distancefunction/timeseries/LCSSDistanceFunction.java11
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/distancevalue/BitDistance.java35
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/distancevalue/DoubleDistance.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/distancevalue/PCACorrelationDistance.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/distancevalue/PreferenceVectorBasedCorrelationDistance.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/distancevalue/SubspaceDistance.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/similarityfunction/AbstractDBIDSimilarityFunction.java5
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/similarityfunction/AbstractPrimitiveSimilarityFunction.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/similarityfunction/AbstractVectorDoubleSimilarityFunction.java50
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/similarityfunction/FractionalSharedNearestNeighborSimilarityFunction.java21
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/similarityfunction/JaccardPrimitiveSimilarityFunction.java205
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/similarityfunction/Kulczynski1SimilarityFunction.java36
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/similarityfunction/Kulczynski2SimilarityFunction.java36
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/similarityfunction/NormalizedPrimitiveSimilarityFunction.java5
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/similarityfunction/NormalizedSimilarityFunction.java5
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/similarityfunction/PrimitiveDoubleSimilarityFunction.java49
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/similarityfunction/PrimitiveSimilarityFunction.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/similarityfunction/SharedNearestNeighborSimilarityFunction.java33
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/FooKernelFunction.java137
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/KernelMatrix.java203
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/LaplaceKernelFunction.java100
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/LinearKernelFunction.java84
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/PolynomialKernelFunction.java94
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/RadialBasisFunctionKernelFunction.java102
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/RationalQuadraticKernelFunction.java101
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/SigmoidKernelFunction.java110
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/clustering/PairCounting.java59
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/histogram/ComputeOutlierHistogram.java38
-rw-r--r--src/de/lmu/ifi/dbs/elki/gui/util/LogPanel.java10
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/lsh/InMemoryLSHIndex.java72
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/lsh/hashfamilies/AbstractHashFunctionFamily.java16
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/AbstractMaterializeKNNPreprocessor.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/MaterializeKNNAndRKNNPreprocessor.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/MaterializeKNNPreprocessor.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/MetricalIndexApproximationMaterializeKNNPreprocessor.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/PartitionApproximationMaterializeKNNPreprocessor.java59
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/RandomSampleKNNPreprocessor.java21
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/SpatialApproximationMaterializeKNNPreprocessor.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/preprocessed/localpca/KNNQueryFilteredPCAIndex.java8
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/DiSHPreferenceVectorIndex.java8
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/HiSCPreferenceVectorIndex.java38
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/preprocessed/snn/SharedNearestNeighborPreprocessor.java8
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/preprocessed/subspaceproj/AbstractSubspaceProjectionIndex.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/preprocessed/subspaceproj/FourCSubspaceIndex.java23
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/preprocessed/subspaceproj/PreDeConSubspaceIndex.java37
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/projected/PINN.java20
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/projected/ProjectedIndex.java29
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/AbstractMkTreeUnifiedFactory.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkapp/MkAppTreeFactory.java12
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkapp/PolynomialApproximation.java14
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkcop/MkCopTreeFactory.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkmax/MkMaxTree.java3
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/query/DoubleDistanceMetricalIndexKNNQuery.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/query/MetricalIndexKNNQuery.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/strategies/split/RandomSplit.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/tree/spatial/kd/MinimalisticMemoryKDTree.java8
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/AbstractRStarTreeFactory.java21
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/query/DoubleDistanceRStarTreeKNNQuery.java58
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/query/DoubleDistanceRStarTreeRangeQuery.java55
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/query/GenericRStarTreeKNNQuery.java140
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/insert/ApproximativeLeastOverlapInsertionStrategy.java5
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/reinsert/AbstractPartialReinsert.java11
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/vafile/PartialVAFile.java10
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/vafile/VAFile.java62
-rw-r--r--src/de/lmu/ifi/dbs/elki/logging/progress/MutableProgress.java106
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/MathUtil.java74
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/Mean.java21
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/MeanVariance.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/HiCSDimensionSimilarity.java39
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/geometry/PrimsMinimumSpanningTree.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/linearalgebra/CovarianceMatrix.java58
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/linearalgebra/EigenvalueDecomposition.java9
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/linearalgebra/Matrix.java8
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/linearalgebra/SingularValueDecomposition.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/linearalgebra/VMath.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/DropEigenPairFilter.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/FirstNEigenPairFilter.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/LimitEigenPairFilter.java39
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredRunner.java17
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PercentageEigenPairFilter.java29
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/ProgressiveEigenPairFilter.java28
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/RANSACCovarianceMatrixBuilder.java20
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/RelativeEigenPairFilter.java14
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/SignificantEigenPairFilter.java18
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/WeakEigenPairFilter.java17
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/linearalgebra/randomprojections/AbstractRandomProjectionFamily.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/linearalgebra/randomprojections/AchlioptasRandomProjectionFamily.java20
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/linearalgebra/randomprojections/RandomSubsetProjectionFamily.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/scales/Scales.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/KernelDensityEstimator.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/MultipleLinearRegression.java3
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/PolynomialRegression.java3
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/ProbabilityWeightedMoments.java10
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/AbstractDistribution.java115
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/BetaDistribution.java79
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/CauchyDistribution.java67
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiDistribution.java70
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiSquaredDistribution.java63
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ConstantDistribution.java41
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/Distribution.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ExponentialDistribution.java71
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ExponentiallyModifiedGaussianDistribution.java75
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GammaDistribution.java78
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GeneralizedExtremeValueDistribution.java73
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GeneralizedLogisticAlternateDistribution.java69
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GeneralizedLogisticDistribution.java66
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GumbelDistribution.java59
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/HaltonUniformDistribution.java51
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/KappaDistribution.java147
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/LaplaceDistribution.java72
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/LogGammaAlternateDistribution.java74
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/LogGammaDistribution.java74
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/LogLogisticDistribution.java65
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/LogNormalDistribution.java85
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/LogisticDistribution.java60
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/NormalDistribution.java56
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/PoissonDistribution.java224
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/RayleighDistribution.java67
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/SkewGeneralizedNormalDistribution.java81
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/StudentsTDistribution.java64
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/UniformDistribution.java79
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/WaldDistribution.java58
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/WeibullDistribution.java74
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/GammaChoiWetteEstimator.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LaplaceLMMEstimator.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LaplaceMADEstimator.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LaplaceMLEEstimator.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/UniformMinMaxEstimator.java3
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/meta/BestFitEstimator.java10
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/meta/TrimmedEstimator.java15
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/meta/WinsorisingEstimator.java17
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/meta/package-info.java23
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/statistics/kernelfunctions/package-info.java23
-rw-r--r--src/de/lmu/ifi/dbs/elki/persistent/AbstractPageFileFactory.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/persistent/LRUCachePageFileFactory.java8
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/BitsUtil.java177
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/FormatUtil.java531
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/InspectionUtil.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/RandomFactory.java14
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/UnsafeRandom.java81
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/Util.java23
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/QuickSelect.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ArrayLikeUtil.java5
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/FlatMatrixAdapter.java85
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMaxHeap.java220
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMinHeap.java220
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparatorMaxHeap.java220
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparatorMinHeap.java220
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleHeap.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerHeap.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerMaxHeap.java258
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerMinHeap.java258
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleLongMaxHeap.java258
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleLongMinHeap.java258
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMaxHeap.java220
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMinHeap.java220
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectHeap.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectMaxHeap.java258
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectMinHeap.java258
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerHeap.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMaxHeap.java220
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMinHeap.java220
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectHeap.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectMaxHeap.java258
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectMinHeap.java258
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ObjectHeap.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntStaticHistogram.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/documentation/Description.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/documentation/Title.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/ensemble/EnsembleVotingMedian.java11
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/AllOrNoneMustBeSetGlobalConstraint.java5
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/CommonConstraints.java98
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/EqualSizeGlobalConstraint.java8
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/GlobalListSizeConstraint.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/LessEqualGlobalConstraint.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/LessGlobalConstraint.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/NoDuplicateValueGlobalConstraint.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/optionhandling/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/AbstractParameter.java64
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/ClassListParameter.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/ClassParameter.java53
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/DistanceParameter.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/DoubleListParameter.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/DoubleParameter.java57
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/EnumParameter.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/FileListParameter.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/FileParameter.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/Flag.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/IntListParameter.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/IntParameter.java71
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/ListParameter.java3
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/LongParameter.java20
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/NumberParameter.java5
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/Parameter.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/PatternParameter.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/RandomParameter.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/StringParameter.java34
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/VectorListParameter.java33
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/referencepoints/AxisBasedReferencePoints.java21
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/referencepoints/GridBasedReferencePoints.java26
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/referencepoints/RandomGeneratedReferencePoints.java16
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/referencepoints/RandomSampleReferencePoints.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/referencepoints/StarBasedReferencePoints.java24
-rw-r--r--src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/TopKOutlierScaling.java41
-rw-r--r--src/de/lmu/ifi/dbs/elki/visualization/ExportVisualizations.java60
-rw-r--r--src/de/lmu/ifi/dbs/elki/visualization/VisualizerParameterizer.java95
-rw-r--r--src/de/lmu/ifi/dbs/elki/visualization/gui/SelectionTableWindow.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/visualization/projections/AffineProjection.java3
-rw-r--r--src/de/lmu/ifi/dbs/elki/visualization/projector/HistogramFactory.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/visualization/projector/ScatterPlotFactory.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/visualization/style/PropertiesBasedStyleLibrary.java9
-rw-r--r--src/de/lmu/ifi/dbs/elki/visualization/visualizers/histogram/ColoredHistogramVisualizer.java77
-rw-r--r--src/de/lmu/ifi/dbs/elki/visualization/visualizers/pairsegments/CircleSegmentsVisualizer.java3
-rw-r--r--src/de/lmu/ifi/dbs/elki/visualization/visualizers/parallel/LineVisualization.java69
-rw-r--r--src/de/lmu/ifi/dbs/elki/visualization/visualizers/parallel/selection/SelectionLineVisualization.java46
-rw-r--r--src/de/lmu/ifi/dbs/elki/visualization/visualizers/scatterplot/AbstractTooltipVisualization.java3
-rw-r--r--src/de/lmu/ifi/dbs/elki/visualization/visualizers/scatterplot/MarkerVisualization.java3
-rw-r--r--src/de/lmu/ifi/dbs/elki/visualization/visualizers/scatterplot/TooltipScoreVisualization.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/visualization/visualizers/scatterplot/cluster/ClusterHullVisualization.java9
-rw-r--r--src/de/lmu/ifi/dbs/elki/visualization/visualizers/scatterplot/cluster/ClusterOrderVisualization.java11
-rw-r--r--src/de/lmu/ifi/dbs/elki/visualization/visualizers/scatterplot/outlier/BubbleVisualization.java60
-rw-r--r--src/de/lmu/ifi/dbs/elki/visualization/visualizers/scatterplot/outlier/COPVectorVisualization.java3
-rw-r--r--src/de/lmu/ifi/dbs/elki/visualization/visualizers/scatterplot/selection/DistanceFunctionVisualization.java3
-rw-r--r--src/de/lmu/ifi/dbs/elki/visualization/visualizers/scatterplot/selection/MoveObjectsToolVisualization.java5
-rw-r--r--src/de/lmu/ifi/dbs/elki/visualization/visualizers/scatterplot/selection/SelectionConvexHullVisualization.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/visualization/visualizers/scatterplot/selection/SelectionDotVisualization.java3
-rw-r--r--src/de/lmu/ifi/dbs/elki/workflow/AlgorithmStep.java2
-rw-r--r--src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering1.java4
-rw-r--r--src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering2.java4
-rw-r--r--src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering3.java4
-rw-r--r--src/tutorial/clustering/SameSizeKMeansAlgorithm.java82
-rw-r--r--src/tutorial/outlier/DistanceStddevOutlier.java4
-rw-r--r--src/tutorial/outlier/ODIN.java4
-rw-r--r--test/de/lmu/ifi/dbs/elki/algorithm/AbstractSimpleAlgorithmTest.java6
-rw-r--r--test/de/lmu/ifi/dbs/elki/algorithm/TestKNNJoin.java18
-rw-r--r--test/de/lmu/ifi/dbs/elki/algorithm/clustering/TestDBSCANResults.java16
-rw-r--r--test/de/lmu/ifi/dbs/elki/algorithm/clustering/TestDeLiCluResults.java2
-rw-r--r--test/de/lmu/ifi/dbs/elki/algorithm/clustering/TestEMResults.java8
-rw-r--r--test/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/TestCOPACResults.java8
-rw-r--r--test/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/TestERiCResults.java8
-rw-r--r--test/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/TestKMeansResults.java6
-rw-r--r--test/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/TestP3C.java84
-rw-r--r--test/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/TestPreDeConResults.java2
-rw-r--r--test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestABOD.java10
-rw-r--r--test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestFastABOD.java60
-rw-r--r--test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestLBABOD.java62
-rw-r--r--test/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/TestOnlineLOF.java2
-rw-r--r--test/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/TestSOD.java2
-rw-r--r--test/de/lmu/ifi/dbs/elki/database/TestRelationSorting.java2
-rw-r--r--test/de/lmu/ifi/dbs/elki/datasource/parser/TestTermFrequencyParser.java8
-rw-r--r--test/de/lmu/ifi/dbs/elki/datasource/parser/TestTokenizer.java133
-rw-r--r--test/de/lmu/ifi/dbs/elki/evaluation/paircounting/TestClusterContingencyTable.java2
-rw-r--r--test/de/lmu/ifi/dbs/elki/index/TestIndexStructures.java26
-rw-r--r--test/de/lmu/ifi/dbs/elki/index/preprocessed/TestMaterializedKNNAndRKNNPreprocessor.java10
-rw-r--r--test/de/lmu/ifi/dbs/elki/math/TestKernelDensityFitting.java2
-rw-r--r--test/de/lmu/ifi/dbs/elki/math/TestWeightFunctions.java10
-rw-r--r--test/de/lmu/ifi/dbs/elki/math/statistics/distribution/AbstractDistributionTest.java23
-rw-r--r--test/de/lmu/ifi/dbs/elki/utilities/TestFormatUtil.java84
554 files changed, 21883 insertions, 11561 deletions
diff --git a/src/META-INF/elki/de.lmu.ifi.dbs.elki.algorithm.AbstractPrimitiveDistanceBasedAlgorithm b/src/META-INF/elki/de.lmu.ifi.dbs.elki.algorithm.AbstractPrimitiveDistanceBasedAlgorithm
index 32a6b535..8f43e0f7 100644
--- a/src/META-INF/elki/de.lmu.ifi.dbs.elki.algorithm.AbstractPrimitiveDistanceBasedAlgorithm
+++ b/src/META-INF/elki/de.lmu.ifi.dbs.elki.algorithm.AbstractPrimitiveDistanceBasedAlgorithm
@@ -2,4 +2,6 @@ de.lmu.ifi.dbs.elki.algorithm.DependencyDerivator
de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeansLloyd
de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeansMacQueen
de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMediansLloyd
+de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeansBatchedLloyd
+de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeansHybridLloydMacQueen
tutorial.clustering.SameSizeKMeansAlgorithm
diff --git a/src/META-INF/elki/de.lmu.ifi.dbs.elki.algorithm.Algorithm b/src/META-INF/elki/de.lmu.ifi.dbs.elki.algorithm.Algorithm
index de720fef..5fc2ff24 100644
--- a/src/META-INF/elki/de.lmu.ifi.dbs.elki.algorithm.Algorithm
+++ b/src/META-INF/elki/de.lmu.ifi.dbs.elki.algorithm.Algorithm
@@ -3,6 +3,7 @@ de.lmu.ifi.dbs.elki.algorithm.clustering.CanopyPreClustering
de.lmu.ifi.dbs.elki.algorithm.clustering.DBSCAN
de.lmu.ifi.dbs.elki.algorithm.clustering.DeLiClu
de.lmu.ifi.dbs.elki.algorithm.clustering.EM
+de.lmu.ifi.dbs.elki.algorithm.clustering.affinitypropagation.AffinityPropagationClusteringAlgorithm
de.lmu.ifi.dbs.elki.algorithm.clustering.gdbscan.GeneralizedDBSCAN
de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical.ExtractFlatClusteringFromHierarchy
de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical.NaiveAgglomerativeHierarchicalClustering
@@ -14,10 +15,13 @@ de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMedoidsPAM
de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMedoidsEM
de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.BestOfMultipleKMeans
de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeansBisecting
+de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeansBatchedLloyd
+de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeansHybridLloydMacQueen
de.lmu.ifi.dbs.elki.algorithm.clustering.NaiveMeanShiftClustering
de.lmu.ifi.dbs.elki.algorithm.clustering.OPTICSXi
de.lmu.ifi.dbs.elki.algorithm.clustering.OPTICS
de.lmu.ifi.dbs.elki.algorithm.clustering.SNNClustering
+de.lmu.ifi.dbs.elki.algorithm.clustering.biclustering.ChengAndChurch
de.lmu.ifi.dbs.elki.algorithm.clustering.correlation.CASH
de.lmu.ifi.dbs.elki.algorithm.clustering.correlation.COPAC
de.lmu.ifi.dbs.elki.algorithm.clustering.correlation.ERiC
@@ -25,9 +29,12 @@ de.lmu.ifi.dbs.elki.algorithm.clustering.correlation.FourC
de.lmu.ifi.dbs.elki.algorithm.clustering.correlation.HiCO
de.lmu.ifi.dbs.elki.algorithm.clustering.correlation.LMCLUS
de.lmu.ifi.dbs.elki.algorithm.clustering.correlation.ORCLUS
+de.lmu.ifi.dbs.elki.algorithm.clustering.onedimensional.KNNKernelDensityMinimaClustering
de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.CLIQUE
de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.DiSH
+de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.DOC
de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.HiSC
+de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.P3C
de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.PreDeCon
de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.PROCLUS
de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.SUBCLU
@@ -38,11 +45,14 @@ de.lmu.ifi.dbs.elki.algorithm.clustering.trivial.TrivialAllInOne
de.lmu.ifi.dbs.elki.algorithm.clustering.trivial.TrivialAllNoise
de.lmu.ifi.dbs.elki.algorithm.clustering.trivial.ByLabelOrAllInOneClustering
de.lmu.ifi.dbs.elki.algorithm.outlier.ABOD
+de.lmu.ifi.dbs.elki.algorithm.outlier.FastABOD
+de.lmu.ifi.dbs.elki.algorithm.outlier.LBABOD
de.lmu.ifi.dbs.elki.algorithm.outlier.AggarwalYuEvolutionary
de.lmu.ifi.dbs.elki.algorithm.outlier.AggarwalYuNaive
de.lmu.ifi.dbs.elki.algorithm.outlier.COP
de.lmu.ifi.dbs.elki.algorithm.outlier.DBOutlierDetection
de.lmu.ifi.dbs.elki.algorithm.outlier.DBOutlierScore
+de.lmu.ifi.dbs.elki.algorithm.outlier.DWOF
de.lmu.ifi.dbs.elki.algorithm.outlier.EMOutlier
de.lmu.ifi.dbs.elki.algorithm.outlier.GaussianModel
de.lmu.ifi.dbs.elki.algorithm.outlier.GaussianUniformMixture
diff --git a/src/META-INF/elki/de.lmu.ifi.dbs.elki.algorithm.DistanceBasedAlgorithm b/src/META-INF/elki/de.lmu.ifi.dbs.elki.algorithm.DistanceBasedAlgorithm
index 8189b637..fceac21e 100644
--- a/src/META-INF/elki/de.lmu.ifi.dbs.elki.algorithm.DistanceBasedAlgorithm
+++ b/src/META-INF/elki/de.lmu.ifi.dbs.elki.algorithm.DistanceBasedAlgorithm
@@ -15,12 +15,14 @@ de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMedoidsPAM
de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMedoidsEM
de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.BestOfMultipleKMeans
de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeansBisecting
+de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeansBatchedLloyd
+de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeansHybridLloydMacQueen
de.lmu.ifi.dbs.elki.algorithm.clustering.correlation.HiCO
de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.HiSC
-de.lmu.ifi.dbs.elki.algorithm.outlier.ABOD
de.lmu.ifi.dbs.elki.algorithm.outlier.COP
de.lmu.ifi.dbs.elki.algorithm.outlier.DBOutlierDetection
de.lmu.ifi.dbs.elki.algorithm.outlier.DBOutlierScore
+de.lmu.ifi.dbs.elki.algorithm.outlier.DWOF
de.lmu.ifi.dbs.elki.algorithm.outlier.HilOut
de.lmu.ifi.dbs.elki.algorithm.outlier.KNNOutlier
de.lmu.ifi.dbs.elki.algorithm.outlier.KNNWeightOutlier
diff --git a/src/META-INF/elki/de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm b/src/META-INF/elki/de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm
index 6d96c265..f0fc5b55 100644
--- a/src/META-INF/elki/de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm
+++ b/src/META-INF/elki/de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm
@@ -1,6 +1,7 @@
de.lmu.ifi.dbs.elki.algorithm.clustering.CanopyPreClustering
de.lmu.ifi.dbs.elki.algorithm.clustering.DBSCAN
de.lmu.ifi.dbs.elki.algorithm.clustering.EM
+de.lmu.ifi.dbs.elki.algorithm.clustering.affinitypropagation.AffinityPropagationClusteringAlgorithm
de.lmu.ifi.dbs.elki.algorithm.clustering.gdbscan.GeneralizedDBSCAN
de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical.ExtractFlatClusteringFromHierarchy
de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeansLloyd
@@ -10,16 +11,22 @@ de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMedoidsPAM
de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMedoidsEM
de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.BestOfMultipleKMeans
de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeansBisecting
+de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeansBatchedLloyd
+de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeansHybridLloydMacQueen
de.lmu.ifi.dbs.elki.algorithm.clustering.NaiveMeanShiftClustering
de.lmu.ifi.dbs.elki.algorithm.clustering.OPTICSXi
de.lmu.ifi.dbs.elki.algorithm.clustering.SNNClustering
+de.lmu.ifi.dbs.elki.algorithm.clustering.biclustering.ChengAndChurch
de.lmu.ifi.dbs.elki.algorithm.clustering.correlation.CASH
de.lmu.ifi.dbs.elki.algorithm.clustering.correlation.COPAC
de.lmu.ifi.dbs.elki.algorithm.clustering.correlation.ERiC
de.lmu.ifi.dbs.elki.algorithm.clustering.correlation.FourC
de.lmu.ifi.dbs.elki.algorithm.clustering.correlation.ORCLUS
+de.lmu.ifi.dbs.elki.algorithm.clustering.onedimensional.KNNKernelDensityMinimaClustering
de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.CLIQUE
de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.DiSH
+de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.DOC
+de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.P3C
de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.PreDeCon
de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.PROCLUS
de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.SUBCLU
diff --git a/src/META-INF/elki/de.lmu.ifi.dbs.elki.algorithm.clustering.affinitypropagation.AffinityPropagationInitialization b/src/META-INF/elki/de.lmu.ifi.dbs.elki.algorithm.clustering.affinitypropagation.AffinityPropagationInitialization
new file mode 100644
index 00000000..67b158dd
--- /dev/null
+++ b/src/META-INF/elki/de.lmu.ifi.dbs.elki.algorithm.clustering.affinitypropagation.AffinityPropagationInitialization
@@ -0,0 +1,2 @@
+de.lmu.ifi.dbs.elki.algorithm.clustering.affinitypropagation.DistanceBasedInitializationWithMedian
+de.lmu.ifi.dbs.elki.algorithm.clustering.affinitypropagation.SimilarityBasedInitializationWithMedian
diff --git a/src/META-INF/elki/de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeans b/src/META-INF/elki/de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeans
index 3fe49ec6..783a9264 100644
--- a/src/META-INF/elki/de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeans
+++ b/src/META-INF/elki/de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeans
@@ -3,4 +3,6 @@ de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeansMacQueen
de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMediansLloyd
de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.BestOfMultipleKMeans
de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeansBisecting
+de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeansBatchedLloyd
+de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeansHybridLloydMacQueen
tutorial.clustering.SameSizeKMeansAlgorithm \ No newline at end of file
diff --git a/src/META-INF/elki/de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.SubspaceClusteringAlgorithm b/src/META-INF/elki/de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.SubspaceClusteringAlgorithm
index c71dd241..b01dc68b 100644
--- a/src/META-INF/elki/de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.SubspaceClusteringAlgorithm
+++ b/src/META-INF/elki/de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.SubspaceClusteringAlgorithm
@@ -1,4 +1,6 @@
de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.CLIQUE
de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.DiSH
+de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.DOC
+de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.P3C
de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.PROCLUS
de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.SUBCLU \ No newline at end of file
diff --git a/src/META-INF/elki/de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm b/src/META-INF/elki/de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm
index f9924515..fbb5c6b2 100644
--- a/src/META-INF/elki/de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm
+++ b/src/META-INF/elki/de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm
@@ -1,9 +1,12 @@
de.lmu.ifi.dbs.elki.algorithm.outlier.ABOD
+de.lmu.ifi.dbs.elki.algorithm.outlier.FastABOD
+de.lmu.ifi.dbs.elki.algorithm.outlier.LBABOD
de.lmu.ifi.dbs.elki.algorithm.outlier.AggarwalYuEvolutionary
de.lmu.ifi.dbs.elki.algorithm.outlier.AggarwalYuNaive
de.lmu.ifi.dbs.elki.algorithm.outlier.COP
de.lmu.ifi.dbs.elki.algorithm.outlier.DBOutlierDetection
de.lmu.ifi.dbs.elki.algorithm.outlier.DBOutlierScore
+de.lmu.ifi.dbs.elki.algorithm.outlier.DWOF
de.lmu.ifi.dbs.elki.algorithm.outlier.EMOutlier
de.lmu.ifi.dbs.elki.algorithm.outlier.GaussianModel
de.lmu.ifi.dbs.elki.algorithm.outlier.GaussianUniformMixture
diff --git a/src/META-INF/elki/de.lmu.ifi.dbs.elki.data.NumberVector$Factory b/src/META-INF/elki/de.lmu.ifi.dbs.elki.data.NumberVector$Factory
index c5e5f3d8..0ae7fa7f 100644
--- a/src/META-INF/elki/de.lmu.ifi.dbs.elki.data.NumberVector$Factory
+++ b/src/META-INF/elki/de.lmu.ifi.dbs.elki.data.NumberVector$Factory
@@ -2,6 +2,11 @@ de.lmu.ifi.dbs.elki.data.DoubleVector$Factory
de.lmu.ifi.dbs.elki.data.BitVector$Factory
de.lmu.ifi.dbs.elki.data.FloatVector$Factory
de.lmu.ifi.dbs.elki.data.IntegerVector$Factory
+de.lmu.ifi.dbs.elki.data.ShortVector$Factory
+de.lmu.ifi.dbs.elki.data.ByteVector$Factory
de.lmu.ifi.dbs.elki.data.OneDimensionalDoubleVector$Factory
de.lmu.ifi.dbs.elki.data.SparseFloatVector$Factory
-de.lmu.ifi.dbs.elki.data.SparseDoubleVector$Factory \ No newline at end of file
+de.lmu.ifi.dbs.elki.data.SparseDoubleVector$Factory
+de.lmu.ifi.dbs.elki.data.SparseByteVector$Factory
+de.lmu.ifi.dbs.elki.data.SparseIntegerVector$Factory
+de.lmu.ifi.dbs.elki.data.SparseShortVector$Factory \ No newline at end of file
diff --git a/src/META-INF/elki/de.lmu.ifi.dbs.elki.data.SparseNumberVector$Factory b/src/META-INF/elki/de.lmu.ifi.dbs.elki.data.SparseNumberVector$Factory
index 959360e1..9d6b1ed0 100644
--- a/src/META-INF/elki/de.lmu.ifi.dbs.elki.data.SparseNumberVector$Factory
+++ b/src/META-INF/elki/de.lmu.ifi.dbs.elki.data.SparseNumberVector$Factory
@@ -1,2 +1,5 @@
de.lmu.ifi.dbs.elki.data.SparseFloatVector$Factory
-de.lmu.ifi.dbs.elki.data.SparseDoubleVector$Factory \ No newline at end of file
+de.lmu.ifi.dbs.elki.data.SparseDoubleVector$Factory
+de.lmu.ifi.dbs.elki.data.SparseByteVector$Factory
+de.lmu.ifi.dbs.elki.data.SparseIntegerVector$Factory
+de.lmu.ifi.dbs.elki.data.SparseShortVector$Factory \ No newline at end of file
diff --git a/src/META-INF/elki/de.lmu.ifi.dbs.elki.datasource.filter.ObjectFilter b/src/META-INF/elki/de.lmu.ifi.dbs.elki.datasource.filter.ObjectFilter
index 3ac4a5ac..a5257291 100644
--- a/src/META-INF/elki/de.lmu.ifi.dbs.elki.datasource.filter.ObjectFilter
+++ b/src/META-INF/elki/de.lmu.ifi.dbs.elki.datasource.filter.ObjectFilter
@@ -8,7 +8,8 @@ de.lmu.ifi.dbs.elki.datasource.filter.ByLabelFilter
de.lmu.ifi.dbs.elki.datasource.filter.RandomSamplingStreamFilter
de.lmu.ifi.dbs.elki.datasource.filter.ShuffleObjectsFilter
de.lmu.ifi.dbs.elki.datasource.filter.SortByLabelFilter
-de.lmu.ifi.dbs.elki.datasource.filter.NaNFilter
+de.lmu.ifi.dbs.elki.datasource.filter.DropNaNFilter
+de.lmu.ifi.dbs.elki.datasource.filter.ReplaceNaNWithRandomFilter
de.lmu.ifi.dbs.elki.datasource.filter.NoMissingValuesFilter
de.lmu.ifi.dbs.elki.datasource.filter.HistogramJitterFilter
de.lmu.ifi.dbs.elki.datasource.filter.SplitNumberVectorFilter
diff --git a/src/META-INF/elki/de.lmu.ifi.dbs.elki.datasource.filter.StreamFilter b/src/META-INF/elki/de.lmu.ifi.dbs.elki.datasource.filter.StreamFilter
index 00e42cc7..8ab5827f 100644
--- a/src/META-INF/elki/de.lmu.ifi.dbs.elki.datasource.filter.StreamFilter
+++ b/src/META-INF/elki/de.lmu.ifi.dbs.elki.datasource.filter.StreamFilter
@@ -2,7 +2,8 @@ de.lmu.ifi.dbs.elki.datasource.filter.NoOpFilter
de.lmu.ifi.dbs.elki.datasource.filter.FixedDBIDsFilter
de.lmu.ifi.dbs.elki.datasource.filter.ByLabelFilter
de.lmu.ifi.dbs.elki.datasource.filter.ClassLabelFromPatternFilter
-de.lmu.ifi.dbs.elki.datasource.filter.NaNFilter
+de.lmu.ifi.dbs.elki.datasource.filter.DropNaNFilter
+de.lmu.ifi.dbs.elki.datasource.filter.ReplaceNaNWithRandomFilter
de.lmu.ifi.dbs.elki.datasource.filter.NoMissingValuesFilter
de.lmu.ifi.dbs.elki.datasource.filter.RandomSamplingStreamFilter
de.lmu.ifi.dbs.elki.datasource.filter.HistogramJitterFilter
diff --git a/src/META-INF/elki/de.lmu.ifi.dbs.elki.datasource.parser.Parser b/src/META-INF/elki/de.lmu.ifi.dbs.elki.datasource.parser.Parser
index f8f352f1..57a71694 100644
--- a/src/META-INF/elki/de.lmu.ifi.dbs.elki.datasource.parser.Parser
+++ b/src/META-INF/elki/de.lmu.ifi.dbs.elki.datasource.parser.Parser
@@ -1,6 +1,7 @@
de.lmu.ifi.dbs.elki.datasource.parser.NumberVectorLabelParser
de.lmu.ifi.dbs.elki.datasource.parser.ArffParser
de.lmu.ifi.dbs.elki.datasource.parser.SparseNumberVectorLabelParser
+de.lmu.ifi.dbs.elki.datasource.parser.CategorialDataAsNumberVectorParser
de.lmu.ifi.dbs.elki.datasource.parser.SparseBitVectorLabelParser
de.lmu.ifi.dbs.elki.datasource.parser.TermFrequencyParser
de.lmu.ifi.dbs.elki.datasource.parser.BitVectorLabelParser
diff --git a/src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction b/src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction
index 14892064..3f1c0e2c 100644
--- a/src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction
+++ b/src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction
@@ -1,6 +1,7 @@
de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.ManhattanDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.LPNormDistanceFunction
+# de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.LPIntegerNormDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.MaximumDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.MinimumDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.ArcCosineDistanceFunction
@@ -21,6 +22,7 @@ de.lmu.ifi.dbs.elki.distance.distancefunction.LocallyWeightedDistanceFunction
# de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.WeightedLPNormDistanceFunction
# de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.WeightedEuclideanDistanceFunction
# de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.WeightedManhattanDistanceFunction
+# de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.WeightedMaximumDistanceFunction
# de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.WeightedSquaredEuclideanDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SparseEuclideanDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SparseManhattanDistanceFunction
@@ -55,6 +57,7 @@ de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.DimensionSelectingDistanc
de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceLPNormDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceManhattanDistanceFunction
+de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceMaximumDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.DiSHDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.HiSCDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.LocalSubspaceDistanceFunction
@@ -66,8 +69,8 @@ de.lmu.ifi.dbs.elki.distance.distancefunction.external.DiskCacheBasedDoubleDista
de.lmu.ifi.dbs.elki.distance.distancefunction.external.DiskCacheBasedFloatDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.external.FileBasedDoubleDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.external.FileBasedFloatDistanceFunction
-# de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.FooKernelFunction
-# de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.LinearKernelFunction
-# de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.PolynomialKernelFunction
+de.lmu.ifi.dbs.elki.distance.similarityfunction.JaccardPrimitiveSimilarityFunction
+de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.LinearKernelFunction
+de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.PolynomialKernelFunction
# tutorial.distancefunction.MultiLPNorm
# tutorial.distancefunction.TutorialDistanceFunction
diff --git a/src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.distancefunction.DoubleNorm b/src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.distancefunction.DoubleNorm
index 69aec8b7..2b2a8e5c 100644
--- a/src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.distancefunction.DoubleNorm
+++ b/src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.distancefunction.DoubleNorm
@@ -3,10 +3,12 @@ de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.ManhattanDistanceFunctio
de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.MaximumDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.MinimumDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.LPNormDistanceFunction
+# de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.LPIntegerNormDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction
# de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.WeightedLPNormDistanceFunction
# de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.WeightedEuclideanDistanceFunction
# de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.WeightedManhattanDistanceFunction
+# de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.WeightedMaximumDistanceFunction
# de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.WeightedSquaredEuclideanDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SparseEuclideanDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SparseManhattanDistanceFunction
@@ -15,5 +17,6 @@ de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SparseMaximumDistanceFun
de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceLPNormDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceManhattanDistanceFunction
+de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceMaximumDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.DimensionSelectingDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.LorentzianDistanceFunction
diff --git a/src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.distancefunction.NumberVectorDistanceFunction b/src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.distancefunction.NumberVectorDistanceFunction
index 67a6ccf1..51c1f64f 100644
--- a/src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.distancefunction.NumberVectorDistanceFunction
+++ b/src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.distancefunction.NumberVectorDistanceFunction
@@ -1,6 +1,7 @@
de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.ManhattanDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.LPNormDistanceFunction
+# de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.LPIntegerNormDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.MaximumDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.MinimumDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.ArcCosineDistanceFunction
@@ -16,6 +17,7 @@ de.lmu.ifi.dbs.elki.distance.distancefunction.LorentzianDistanceFunction
# de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.WeightedLPNormDistanceFunction
# de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.WeightedEuclideanDistanceFunction
# de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.WeightedManhattanDistanceFunction
+# de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.WeightedMaximumDistanceFunction
# de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.WeightedSquaredEuclideanDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.colorhistogram.HSBHistogramQuadraticDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.colorhistogram.HistogramIntersectionDistanceFunction
@@ -35,6 +37,10 @@ de.lmu.ifi.dbs.elki.distance.distancefunction.probabilistic.JensenShannonDiverge
de.lmu.ifi.dbs.elki.distance.distancefunction.probabilistic.KullbackLeiblerDivergenceAsymmetricDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.probabilistic.KullbackLeiblerDivergenceReverseAsymmetricDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.probabilistic.SqrtJensenShannonDivergenceDistanceFunction
+de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction
+de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceLPNormDistanceFunction
+de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceManhattanDistanceFunction
+de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceMaximumDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.DimensionSelectingDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.timeseries.DTWDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.timeseries.EDRDistanceFunction
diff --git a/src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction b/src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction
index 917ac511..38a019c0 100644
--- a/src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction
+++ b/src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction
@@ -1,6 +1,7 @@
de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.ManhattanDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.LPNormDistanceFunction
+# de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.LPIntegerNormDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.ArcCosineDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.CosineDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.MaximumDistanceFunction
@@ -16,6 +17,7 @@ de.lmu.ifi.dbs.elki.distance.distancefunction.LorentzianDistanceFunction
# de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.WeightedLPNormDistanceFunction
# de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.WeightedEuclideanDistanceFunction
# de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.WeightedManhattanDistanceFunction
+# de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.WeightedMaximumDistanceFunction
# de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.WeightedSquaredEuclideanDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SparseEuclideanDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SparseManhattanDistanceFunction
@@ -45,12 +47,13 @@ de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.DimensionSelectingDistanc
de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceLPNormDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceManhattanDistanceFunction
+de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceMaximumDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.timeseries.DTWDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.timeseries.EDRDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.timeseries.ERPDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.timeseries.LCSSDistanceFunction
-# de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.FooKernelFunction
-# de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.LinearKernelFunction
-# de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.PolynomialKernelFunction
+de.lmu.ifi.dbs.elki.distance.similarityfunction.JaccardPrimitiveSimilarityFunction
+de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.LinearKernelFunction
+de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.PolynomialKernelFunction
# tutorial.distancefunction.MultiLPNorm
# tutorial.distancefunction.TutorialDistanceFunction
diff --git a/src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction b/src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction
index a22e3dc0..1cae5172 100644
--- a/src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction
+++ b/src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction
@@ -1,6 +1,7 @@
de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.ManhattanDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.LPNormDistanceFunction
+# de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.LPIntegerNormDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.ArcCosineDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.CosineDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.MaximumDistanceFunction
@@ -16,6 +17,7 @@ de.lmu.ifi.dbs.elki.distance.distancefunction.LorentzianDistanceFunction
# de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.WeightedLPNormDistanceFunction
# de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.WeightedEuclideanDistanceFunction
# de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.WeightedManhattanDistanceFunction
+# de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.WeightedMaximumDistanceFunction
# de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.WeightedSquaredEuclideanDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SparseEuclideanDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SparseManhattanDistanceFunction
@@ -43,9 +45,13 @@ de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.DimensionSelectingDistanc
de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceLPNormDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceManhattanDistanceFunction
+de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceMaximumDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.timeseries.DTWDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.timeseries.EDRDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.timeseries.ERPDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.timeseries.LCSSDistanceFunction
+de.lmu.ifi.dbs.elki.distance.similarityfunction.JaccardPrimitiveSimilarityFunction
+de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.LinearKernelFunction
+de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.PolynomialKernelFunction
# tutorial.distancefunction.MultiLPNorm
# tutorial.distancefunction.TutorialDistanceFunction
diff --git a/src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.distancefunction.SpatialPrimitiveDistanceFunction b/src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.distancefunction.SpatialPrimitiveDistanceFunction
index 383bfde1..138541c0 100644
--- a/src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.distancefunction.SpatialPrimitiveDistanceFunction
+++ b/src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.distancefunction.SpatialPrimitiveDistanceFunction
@@ -1,6 +1,7 @@
de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.ManhattanDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.LPNormDistanceFunction
+# de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.LPIntegerNormDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.MaximumDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.MinimumDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.ArcCosineDistanceFunction
@@ -15,6 +16,7 @@ de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistance
# de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.WeightedLPNormDistanceFunction
# de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.WeightedEuclideanDistanceFunction
# de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.WeightedManhattanDistanceFunction
+# de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.WeightedMaximumDistanceFunction
# de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.WeightedSquaredEuclideanDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.colorhistogram.HistogramIntersectionDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.geo.LatLngDistanceFunction
@@ -27,3 +29,4 @@ de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.DimensionSelectingDistanc
de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceLPNormDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceManhattanDistanceFunction
+de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceMaximumDistanceFunction
diff --git a/src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.distancefunction.SpatialPrimitiveDoubleDistanceFunction b/src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.distancefunction.SpatialPrimitiveDoubleDistanceFunction
index c8ca0d10..ac3a8344 100644
--- a/src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.distancefunction.SpatialPrimitiveDoubleDistanceFunction
+++ b/src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.distancefunction.SpatialPrimitiveDoubleDistanceFunction
@@ -1,6 +1,7 @@
de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.ManhattanDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.LPNormDistanceFunction
+# de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.LPIntegerNormDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.MaximumDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.MinimumDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.ArcCosineDistanceFunction
@@ -14,6 +15,7 @@ de.lmu.ifi.dbs.elki.distance.distancefunction.LorentzianDistanceFunction
# de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.WeightedLPNormDistanceFunction
# de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.WeightedEuclideanDistanceFunction
# de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.WeightedManhattanDistanceFunction
+# de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.WeightedMaximumDistanceFunction
# de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.WeightedSquaredEuclideanDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.colorhistogram.HistogramIntersectionDistanceFunction
@@ -27,3 +29,4 @@ de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.DimensionSelectingDistanc
de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceLPNormDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceManhattanDistanceFunction
+de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceMaximumDistanceFunction
diff --git a/src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.LPNormDistanceFunction b/src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.LPNormDistanceFunction
index 95441a56..9bcd3af8 100644
--- a/src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.LPNormDistanceFunction
+++ b/src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.LPNormDistanceFunction
@@ -1,7 +1,9 @@
de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.ManhattanDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.LPNormDistanceFunction
+# de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.LPIntegerNormDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.MaximumDistanceFunction
# de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.WeightedLPNormDistanceFunction
# de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.WeightedEuclideanDistanceFunction
-# de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.WeightedManhattanDistanceFunction \ No newline at end of file
+# de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.WeightedManhattanDistanceFunction
+# de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.WeightedMaximumDistanceFunction
diff --git a/src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.DimensionSelectingSubspaceDistanceFunction b/src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.DimensionSelectingSubspaceDistanceFunction
index 51b11e3b..a683d421 100644
--- a/src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.DimensionSelectingSubspaceDistanceFunction
+++ b/src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.DimensionSelectingSubspaceDistanceFunction
@@ -1,4 +1,5 @@
de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceLPNormDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceManhattanDistanceFunction
+de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceMaximumDistanceFunction
de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.DimensionSelectingDistanceFunction \ No newline at end of file
diff --git a/src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.similarityfunction.NormalizedSimilarityFunction b/src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.similarityfunction.NormalizedSimilarityFunction
index 6d8add0c..8289c203 100644
--- a/src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.similarityfunction.NormalizedSimilarityFunction
+++ b/src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.similarityfunction.NormalizedSimilarityFunction
@@ -1 +1,2 @@
de.lmu.ifi.dbs.elki.distance.similarityfunction.FractionalSharedNearestNeighborSimilarityFunction
+de.lmu.ifi.dbs.elki.distance.similarityfunction.JaccardPrimitiveSimilarityFunction
diff --git a/src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.similarityfunction.PrimitiveSimilarityFunction b/src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.similarityfunction.PrimitiveSimilarityFunction
index 55b1755e..65a44a1c 100644
--- a/src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.similarityfunction.PrimitiveSimilarityFunction
+++ b/src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.similarityfunction.PrimitiveSimilarityFunction
@@ -1,6 +1,10 @@
-de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.FooKernelFunction
-de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.LinearKernelFunction
-de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.PolynomialKernelFunction
de.lmu.ifi.dbs.elki.distance.similarityfunction.InvertedDistanceSimilarityFunction
+de.lmu.ifi.dbs.elki.distance.similarityfunction.JaccardPrimitiveSimilarityFunction
de.lmu.ifi.dbs.elki.distance.similarityfunction.Kulczynski1SimilarityFunction
-de.lmu.ifi.dbs.elki.distance.similarityfunction.Kulczynski2SimilarityFunction \ No newline at end of file
+de.lmu.ifi.dbs.elki.distance.similarityfunction.Kulczynski2SimilarityFunction
+de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.LinearKernelFunction
+de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.PolynomialKernelFunction
+de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.RadialBasisFunctionKernelFunction
+de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.SigmoidKernelFunction
+de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.LaplaceKernelFunction
+de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.RationalQuadraticKernelFunction \ No newline at end of file
diff --git a/src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.similarityfunction.SimilarityFunction b/src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.similarityfunction.SimilarityFunction
index c8b5c993..2d464325 100644
--- a/src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.similarityfunction.SimilarityFunction
+++ b/src/META-INF/elki/de.lmu.ifi.dbs.elki.distance.similarityfunction.SimilarityFunction
@@ -3,6 +3,10 @@ de.lmu.ifi.dbs.elki.distance.similarityfunction.SharedNearestNeighborSimilarityF
de.lmu.ifi.dbs.elki.distance.similarityfunction.Kulczynski1SimilarityFunction
de.lmu.ifi.dbs.elki.distance.similarityfunction.Kulczynski2SimilarityFunction
de.lmu.ifi.dbs.elki.distance.similarityfunction.InvertedDistanceSimilarityFunction
-# de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.FooKernelFunction
-# de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.LinearKernelFunction
-# de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.PolynomialKernelFunction
+de.lmu.ifi.dbs.elki.distance.similarityfunction.JaccardPrimitiveSimilarityFunction
+de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.LinearKernelFunction
+de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.PolynomialKernelFunction
+de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.RadialBasisFunctionKernelFunction
+de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.SigmoidKernelFunction
+de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.LaplaceKernelFunction
+de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.RationalQuadraticKernelFunction \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/APRIORI.java b/src/de/lmu/ifi/dbs/elki/algorithm/APRIORI.java
index 07aaf3fc..a2f32989 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/APRIORI.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/APRIORI.java
@@ -44,8 +44,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.OneMustBeSetGlobalConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.OnlyOneIsAllowedToBeSetGlobalConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
@@ -296,7 +295,7 @@ public class APRIORI extends AbstractAlgorithm<AprioriResult> {
public TypeInformation[] getInputTypeRestriction() {
return TypeUtil.array(TypeUtil.BIT_VECTOR_FIELD);
}
-
+
@Override
protected Logging getLogger() {
return LOG;
@@ -325,15 +324,15 @@ public class APRIORI extends AbstractAlgorithm<AprioriResult> {
super.makeOptions(config);
DoubleParameter minfreqP = new DoubleParameter(MINFREQ_ID);
minfreqP.setOptional(true);
- minfreqP.addConstraint(new GreaterEqualConstraint(0));
- minfreqP.addConstraint(new LessEqualConstraint(1));
+ minfreqP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE);
+ minfreqP.addConstraint(CommonConstraints.LESS_EQUAL_ONE_DOUBLE);
if(config.grab(minfreqP)) {
minfreq = minfreqP.getValue();
}
IntParameter minsuppP = new IntParameter(MINSUPP_ID);
minsuppP.setOptional(true);
- minsuppP.addConstraint(new GreaterEqualConstraint(0));
+ minsuppP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_INT);
if(config.grab(minsuppP)) {
minsupp = minsuppP.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/AbstractAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/AbstractAlgorithm.java
index 68ac9595..65b86633 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/AbstractAlgorithm.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/AbstractAlgorithm.java
@@ -85,72 +85,48 @@ public abstract class AbstractAlgorithm<R extends Result> implements Algorithm {
}
// Find appropriate run method.
- Method runmethod1 = null;
- Method runmethod2 = null;
try {
- runmethod1 = this.getClass().getMethod("run", signature1);
- runmethod2 = null;
- }
- catch(SecurityException e) {
- throw new APIViolationException("Security exception finding an appropriate 'run' method.", e);
+ Method runmethod1 = this.getClass().getMethod("run", signature1);
+ return (R) runmethod1.invoke(this, relations1);
}
catch(NoSuchMethodException e) {
- runmethod1 = null;
- // Try without "database" parameter.
- try {
- runmethod2 = this.getClass().getMethod("run", signature2);
- }
- catch(NoSuchMethodException e2) {
- runmethod2 = null;
+ // continue below.
+ }
+ catch(IllegalArgumentException | IllegalAccessException | SecurityException e) {
+ throw new APIViolationException("Invoking the real 'run' method failed.", e);
+ }
+ catch(InvocationTargetException e) {
+ final Throwable cause = e.getTargetException();
+ if(cause instanceof RuntimeException) {
+ throw (RuntimeException) cause;
}
- catch(SecurityException e2) {
- throw new APIViolationException("Security exception finding an appropriate 'run' method.", e2);
+ if(cause instanceof Error) {
+ throw (Error) cause;
}
+ throw new APIViolationException("Invoking the real 'run' method failed: " + cause.toString(), cause);
}
- if(runmethod1 != null) {
- try {
- return (R) runmethod1.invoke(this, relations1);
- }
- catch(IllegalArgumentException e) {
- throw new APIViolationException("Invoking the real 'run' method failed.", e);
- }
- catch(IllegalAccessException e) {
- throw new APIViolationException("Invoking the real 'run' method failed.", e);
- }
- catch(InvocationTargetException e) {
- if(e.getTargetException() instanceof RuntimeException) {
- throw (RuntimeException) e.getTargetException();
- }
- if(e.getTargetException() instanceof AssertionError) {
- throw (AssertionError) e.getTargetException();
- }
- throw new APIViolationException("Invoking the real 'run' method failed: " + e.getTargetException().toString(), e.getTargetException());
- }
+ try {
+ Method runmethod2 = this.getClass().getMethod("run", signature2);
+ return (R) runmethod2.invoke(this, relations2);
}
- else if(runmethod2 != null) {
- try {
- return (R) runmethod2.invoke(this, relations2);
- }
- catch(IllegalArgumentException e) {
- throw new APIViolationException("Invoking the real 'run' method failed.", e);
- }
- catch(IllegalAccessException e) {
- throw new APIViolationException("Invoking the real 'run' method failed.", e);
+ catch(NoSuchMethodException e) {
+ // continue below.
+ }
+ catch(IllegalArgumentException | IllegalAccessException | SecurityException e) {
+ throw new APIViolationException("Invoking the real 'run' method failed.", e);
+ }
+ catch(InvocationTargetException e) {
+ final Throwable cause = e.getTargetException();
+ if(cause instanceof RuntimeException) {
+ throw (RuntimeException) cause;
}
- catch(InvocationTargetException e) {
- if(e.getTargetException() instanceof RuntimeException) {
- throw (RuntimeException) e.getTargetException();
- }
- if(e.getTargetException() instanceof AssertionError) {
- throw (AssertionError) e.getTargetException();
- }
- throw new APIViolationException("Invoking the real 'run' method failed: " + e.getTargetException().toString(), e.getTargetException());
+ if(cause instanceof Error) {
+ throw (Error) cause;
}
+ throw new APIViolationException("Invoking the real 'run' method failed: " + cause.toString(), cause);
}
- else {
- throw new APIViolationException("No appropriate 'run' method found.");
- }
+ throw new APIViolationException("No appropriate 'run' method found.");
}
/**
@@ -177,6 +153,6 @@ public abstract class AbstractAlgorithm<R extends Result> implements Algorithm {
* @return Parameter object
*/
public static <F extends DistanceFunction<?, ?>> ObjectParameter<F> makeParameterDistanceFunction(Class<?> defaultDistanceFunction, Class<?> restriction) {
- return new ObjectParameter<>(AbstractDistanceBasedAlgorithm.DISTANCE_FUNCTION_ID, restriction, defaultDistanceFunction);
+ return new ObjectParameter<>(DistanceBasedAlgorithm.DISTANCE_FUNCTION_ID, restriction, defaultDistanceFunction);
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/AbstractPrimitiveDistanceBasedAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/AbstractPrimitiveDistanceBasedAlgorithm.java
index 40fe67c3..5d4b24c1 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/AbstractPrimitiveDistanceBasedAlgorithm.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/AbstractPrimitiveDistanceBasedAlgorithm.java
@@ -48,7 +48,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
public abstract class AbstractPrimitiveDistanceBasedAlgorithm<O, D extends Distance<?>, R extends Result> extends AbstractAlgorithm<R> {
/**
* Holds the instance of the distance function specified by
- * {@link AbstractDistanceBasedAlgorithm#DISTANCE_FUNCTION_ID}.
+ * {@link DistanceBasedAlgorithm#DISTANCE_FUNCTION_ID}.
*/
protected PrimitiveDistanceFunction<? super O, D> distanceFunction;
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/DependencyDerivator.java b/src/de/lmu/ifi/dbs/elki/algorithm/DependencyDerivator.java
index cc40d13b..dca3649e 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/DependencyDerivator.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/DependencyDerivator.java
@@ -51,8 +51,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -68,7 +67,8 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
* E. Achtert, C. Böhm, H.-P. Kriegel, P. Kröger, A. Zimek: Deriving
* Quantitative Dependencies for Correlation Clusters. <br>
* In Proc. 12th Int. Conf. on Knowledge Discovery and Data Mining (KDD '06),
- * Philadelphia, PA 2006. </p>
+ * Philadelphia, PA 2006.
+ * </p>
*
* @author Arthur Zimek
* @param <V> the type of FeatureVector handled by this Algorithm
@@ -303,20 +303,20 @@ public class DependencyDerivator<V extends NumberVector<?>, D extends Distance<D
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
-
+
IntParameter outputAccuracyP = new IntParameter(OUTPUT_ACCURACY_ID, 4);
- outputAccuracyP.addConstraint(new GreaterEqualConstraint(0));
+ outputAccuracyP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_INT);
if(config.grab(outputAccuracyP)) {
outputAccuracy = outputAccuracyP.getValue();
}
-
+
IntParameter sampleSizeP = new IntParameter(SAMPLE_SIZE_ID);
sampleSizeP.setOptional(true);
- sampleSizeP.addConstraint(new GreaterConstraint(0));
+ sampleSizeP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
if(config.grab(sampleSizeP)) {
sampleSize = sampleSizeP.getValue();
}
-
+
Flag randomSampleF = new Flag(DEPENDENCY_DERIVATOR_RANDOM_SAMPLE);
if(config.grab(randomSampleF)) {
randomSample = randomSampleF.getValue();
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/KNNDistanceOrder.java b/src/de/lmu/ifi/dbs/elki/algorithm/KNNDistanceOrder.java
index b696ed36..46cf2246 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/KNNDistanceOrder.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/KNNDistanceOrder.java
@@ -43,8 +43,7 @@ import de.lmu.ifi.dbs.elki.result.KNNDistanceOrderResult;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -164,14 +163,14 @@ public class KNNDistanceOrder<O, D extends Distance<D>> extends AbstractDistance
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
IntParameter kP = new IntParameter(K_ID, 1);
- kP.addConstraint(new GreaterConstraint(0));
+ kP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
if(config.grab(kP)) {
k = kP.getValue();
}
DoubleParameter percentageP = new DoubleParameter(PERCENTAGE_ID, 1.0);
- percentageP.addConstraint(new GreaterConstraint(0));
- percentageP.addConstraint(new LessEqualConstraint(1));
+ percentageP.addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE);
+ percentageP.addConstraint(CommonConstraints.LESS_EQUAL_ONE_DOUBLE);
if(config.grab(percentageP)) {
percentage = percentageP.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/KNNJoin.java b/src/de/lmu/ifi/dbs/elki/algorithm/KNNJoin.java
index dddd8fdb..0f5078fb 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/KNNJoin.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/KNNJoin.java
@@ -61,7 +61,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -121,11 +121,11 @@ public class KNNJoin<V extends NumberVector<?>, D extends Distance<D>, N extends
*/
@SuppressWarnings("unchecked")
public WritableDataStore<KNNList<D>> run(Database database, Relation<V> relation) {
- if (!(getDistanceFunction() instanceof SpatialPrimitiveDistanceFunction)) {
+ if(!(getDistanceFunction() instanceof SpatialPrimitiveDistanceFunction)) {
throw new IllegalStateException("Distance Function must be an instance of " + SpatialPrimitiveDistanceFunction.class.getName());
}
Collection<SpatialIndexTree<N, E>> indexes = ResultUtil.filterResults(database, SpatialIndexTree.class);
- if (indexes.size() != 1) {
+ if(indexes.size() != 1) {
throw new AbortException("KNNJoin found " + indexes.size() + " spatial indexes, expected exactly one.");
}
// FIXME: Ensure were looking at the right relation!
@@ -140,7 +140,7 @@ public class KNNJoin<V extends NumberVector<?>, D extends Distance<D>, N extends
ComparableMinHeap<Task> pq = new ComparableMinHeap<>(ps_candidates.size() * ps_candidates.size() / 10);
// Initialize with the page self-pairing
- for (int i = 0; i < ps_candidates.size(); i++) {
+ for(int i = 0; i < ps_candidates.size(); i++) {
E pr_entry = ps_candidates.get(i);
N pr = index.getNode(pr_entry);
heaps.add(initHeaps(distFunction, pr));
@@ -148,41 +148,42 @@ public class KNNJoin<V extends NumberVector<?>, D extends Distance<D>, N extends
// Build priority queue
final int sqsize = ps_candidates.size() * (ps_candidates.size() - 1) >> 1;
- if (LOG.isDebuggingFine()) {
+ if(LOG.isDebuggingFine()) {
LOG.debugFine("Number of leaves: " + ps_candidates.size() + " so " + sqsize + " MBR computations.");
}
FiniteProgress mprogress = LOG.isVerbose() ? new FiniteProgress("Comparing leaf MBRs", sqsize, LOG) : null;
- for (int i = 0; i < ps_candidates.size(); i++) {
+ for(int i = 0; i < ps_candidates.size(); i++) {
E pr_entry = ps_candidates.get(i);
List<KNNHeap<D>> pr_heaps = heaps.get(i);
D pr_knn_distance = computeStopDistance(pr_heaps);
- for (int j = i + 1; j < ps_candidates.size(); j++) {
+ for(int j = i + 1; j < ps_candidates.size(); j++) {
E ps_entry = ps_candidates.get(j);
List<KNNHeap<D>> ps_heaps = heaps.get(j);
D ps_knn_distance = computeStopDistance(ps_heaps);
D minDist = distFunction.minDist(pr_entry, ps_entry);
// Resolve immediately:
- if (minDist.isNullDistance()) {
+ if(minDist.isNullDistance()) {
N pr = index.getNode(ps_candidates.get(i));
N ps = index.getNode(ps_candidates.get(j));
processDataPagesOptimize(distFunction, pr_heaps, ps_heaps, pr, ps);
- } else if (minDist.compareTo(pr_knn_distance) <= 0 || minDist.compareTo(ps_knn_distance) <= 0) {
+ }
+ else if(minDist.compareTo(pr_knn_distance) <= 0 || minDist.compareTo(ps_knn_distance) <= 0) {
pq.add(new Task(minDist, i, j));
}
- if (mprogress != null) {
+ if(mprogress != null) {
mprogress.incrementProcessed(LOG);
}
}
}
- if (mprogress != null) {
+ if(mprogress != null) {
mprogress.ensureCompleted(LOG);
}
// Process the queue
FiniteProgress qprogress = LOG.isVerbose() ? new FiniteProgress("Processing queue", pq.size(), LOG) : null;
IndefiniteProgress fprogress = LOG.isVerbose() ? new IndefiniteProgress("Full comparisons", LOG) : null;
- while (!pq.isEmpty()) {
+ while(!pq.isEmpty()) {
Task task = pq.poll();
List<KNNHeap<D>> pr_heaps = heaps.get(task.i);
List<KNNHeap<D>> ps_heaps = heaps.get(task.j);
@@ -190,30 +191,32 @@ public class KNNJoin<V extends NumberVector<?>, D extends Distance<D>, N extends
D ps_knn_distance = computeStopDistance(ps_heaps);
boolean dor = task.mindist.compareTo(pr_knn_distance) <= 0;
boolean dos = task.mindist.compareTo(ps_knn_distance) <= 0;
- if (dor || dos) {
+ if(dor || dos) {
N pr = index.getNode(ps_candidates.get(task.i));
N ps = index.getNode(ps_candidates.get(task.j));
- if (dor && dos) {
+ if(dor && dos) {
processDataPagesOptimize(distFunction, pr_heaps, ps_heaps, pr, ps);
- } else {
- if (dor) {
+ }
+ else {
+ if(dor) {
processDataPagesOptimize(distFunction, pr_heaps, null, pr, ps);
- } else /* dos */{
+ }
+ else /* dos */{
processDataPagesOptimize(distFunction, ps_heaps, null, ps, pr);
}
}
- if (fprogress != null) {
+ if(fprogress != null) {
fprogress.incrementProcessed(LOG);
}
}
- if (qprogress != null) {
+ if(qprogress != null) {
qprogress.incrementProcessed(LOG);
}
}
- if (qprogress != null) {
+ if(qprogress != null) {
qprogress.ensureCompleted(LOG);
}
- if (fprogress != null) {
+ if(fprogress != null) {
fprogress.setCompleted(LOG);
}
@@ -223,12 +226,12 @@ public class KNNJoin<V extends NumberVector<?>, D extends Distance<D>, N extends
// null;
FiniteProgress pageprog = LOG.isVerbose() ? new FiniteProgress("Number of processed data pages", ps_candidates.size(), LOG) : null;
// int processed = 0;
- for (int i = 0; i < ps_candidates.size(); i++) {
+ for(int i = 0; i < ps_candidates.size(); i++) {
N pr = index.getNode(ps_candidates.get(i));
List<KNNHeap<D>> pr_heaps = heaps.get(i);
// Finalize lists
- for (int j = 0; j < pr.getNumEntries(); j++) {
+ for(int j = 0; j < pr.getNumEntries(); j++) {
knnLists.put(((LeafEntry) pr.getEntry(j)).getDBID(), pr_heaps.get(j).toKNNList());
}
// Forget heaps and pq
@@ -238,14 +241,14 @@ public class KNNJoin<V extends NumberVector<?>, D extends Distance<D>, N extends
// if(progress != null) {
// progress.setProcessed(processed, logger);
// }
- if (pageprog != null) {
+ if(pageprog != null) {
pageprog.incrementProcessed(LOG);
}
}
// if(progress != null) {
// progress.ensureCompleted(logger);
// }
- if (pageprog != null) {
+ if(pageprog != null) {
pageprog.ensureCompleted(LOG);
}
return knnLists;
@@ -261,7 +264,7 @@ public class KNNJoin<V extends NumberVector<?>, D extends Distance<D>, N extends
private List<KNNHeap<D>> initHeaps(SpatialPrimitiveDistanceFunction<V, D> distFunction, N pr) {
List<KNNHeap<D>> pr_heaps = new ArrayList<>(pr.getNumEntries());
// Create for each data object a knn heap
- for (int j = 0; j < pr.getNumEntries(); j++) {
+ for(int j = 0; j < pr.getNumEntries(); j++) {
pr_heaps.add(DBIDUtil.newHeap(distFunction.getDistanceFactory(), k));
}
// Self-join first, as this is expected to improve most and cannot be
@@ -282,20 +285,21 @@ public class KNNJoin<V extends NumberVector<?>, D extends Distance<D>, N extends
*/
@SuppressWarnings("unchecked")
private void processDataPagesOptimize(SpatialPrimitiveDistanceFunction<V, D> distFunction, List<? extends KNNHeap<D>> pr_heaps, List<? extends KNNHeap<D>> ps_heaps, N pr, N ps) {
- if (DistanceUtil.isDoubleDistanceFunction(distFunction)) {
+ if(DistanceUtil.isDoubleDistanceFunction(distFunction)) {
List<?> khp = (List<?>) pr_heaps;
List<?> khs = (List<?>) ps_heaps;
processDataPagesDouble((SpatialPrimitiveDoubleDistanceFunction<? super V>) distFunction, pr, ps, (List<DoubleDistanceKNNHeap>) khp, (List<DoubleDistanceKNNHeap>) khs);
- } else {
- for (int j = 0; j < ps.getNumEntries(); j++) {
+ }
+ else {
+ for(int j = 0; j < ps.getNumEntries(); j++) {
final SpatialPointLeafEntry s_e = (SpatialPointLeafEntry) ps.getEntry(j);
DBID s_id = s_e.getDBID();
- for (int i = 0; i < pr.getNumEntries(); i++) {
+ for(int i = 0; i < pr.getNumEntries(); i++) {
final SpatialPointLeafEntry r_e = (SpatialPointLeafEntry) pr.getEntry(i);
D distance = distFunction.minDist(s_e, r_e);
- pr_heaps.get(i).add(distance, s_id);
- if (pr != ps && ps_heaps != null) {
- ps_heaps.get(j).add(distance, r_e.getDBID());
+ pr_heaps.get(i).insert(distance, s_id);
+ if(pr != ps && ps_heaps != null) {
+ ps_heaps.get(j).insert(distance, r_e.getDBID());
}
}
}
@@ -314,15 +318,15 @@ public class KNNJoin<V extends NumberVector<?>, D extends Distance<D>, N extends
*/
private void processDataPagesDouble(SpatialPrimitiveDoubleDistanceFunction<? super V> df, N pr, N ps, List<DoubleDistanceKNNHeap> pr_heaps, List<DoubleDistanceKNNHeap> ps_heaps) {
// Compare pairwise
- for (int j = 0; j < ps.getNumEntries(); j++) {
+ for(int j = 0; j < ps.getNumEntries(); j++) {
final SpatialPointLeafEntry s_e = (SpatialPointLeafEntry) ps.getEntry(j);
DBID s_id = s_e.getDBID();
- for (int i = 0; i < pr.getNumEntries(); i++) {
+ for(int i = 0; i < pr.getNumEntries(); i++) {
final SpatialPointLeafEntry r_e = (SpatialPointLeafEntry) pr.getEntry(i);
double distance = df.doubleMinDist(s_e, r_e);
- pr_heaps.get(i).add(distance, s_id);
- if (pr != ps && ps_heaps != null) {
- ps_heaps.get(j).add(distance, r_e.getDBID());
+ pr_heaps.get(i).insert(distance, s_id);
+ if(pr != ps && ps_heaps != null) {
+ ps_heaps.get(j).insert(distance, r_e.getDBID());
}
}
}
@@ -337,15 +341,16 @@ public class KNNJoin<V extends NumberVector<?>, D extends Distance<D>, N extends
private D computeStopDistance(List<KNNHeap<D>> heaps) {
// Update pruning distance
D pr_knn_distance = null;
- for (KNNHeap<D> knnList : heaps) {
+ for(KNNHeap<D> knnList : heaps) {
// set kNN distance of r
- if (pr_knn_distance == null) {
+ if(pr_knn_distance == null) {
pr_knn_distance = knnList.getKNNDistance();
- } else {
+ }
+ else {
pr_knn_distance = DistanceUtil.max(knnList.getKNNDistance(), pr_knn_distance);
}
}
- if (pr_knn_distance == null) {
+ if(pr_knn_distance == null) {
return getDistanceFunction().getDistanceFactory().infiniteDistance();
}
return pr_knn_distance;
@@ -421,8 +426,8 @@ public class KNNJoin<V extends NumberVector<?>, D extends Distance<D>, N extends
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
IntParameter kP = new IntParameter(K_ID, 1);
- kP.addConstraint(new GreaterConstraint(0));
- if (config.grab(kP)) {
+ kP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
+ if(config.grab(kP)) {
k = kP.getValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/benchmark/ValidateApproximativeKNNIndex.java b/src/de/lmu/ifi/dbs/elki/algorithm/benchmark/ValidateApproximativeKNNIndex.java
index 3d0ea52a..8b83b5d4 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/benchmark/ValidateApproximativeKNNIndex.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/benchmark/ValidateApproximativeKNNIndex.java
@@ -36,9 +36,9 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
+import de.lmu.ifi.dbs.elki.database.query.LinearScanQuery;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.query.knn.LinearScanKNNQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.datasource.DatabaseConnection;
import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle;
@@ -141,32 +141,35 @@ public class ValidateApproximativeKNNIndex<O, D extends Distance<D>> extends Abs
DistanceQuery<O, D> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
// Approximate query:
KNNQuery<O, D> knnQuery = database.getKNNQuery(distQuery, k, DatabaseQuery.HINT_OPTIMIZED_ONLY);
- if (knnQuery == null || knnQuery instanceof LinearScanKNNQuery) {
+ if(knnQuery == null || knnQuery instanceof LinearScanQuery) {
throw new AbortException("Expected an accelerated query, but got a linear scan -- index is not used.");
}
// Exact query:
KNNQuery<O, D> truekNNQuery;
- if (forcelinear) {
+ if(forcelinear) {
truekNNQuery = QueryUtil.getLinearScanKNNQuery(distQuery);
- } else {
+ }
+ else {
truekNNQuery = database.getKNNQuery(distQuery, k, DatabaseQuery.HINT_EXACT);
}
- if (knnQuery.getClass().equals(truekNNQuery.getClass())) {
+ if(knnQuery.getClass().equals(truekNNQuery.getClass())) {
LOG.warning("Query classes are the same. This experiment may be invalid!");
}
// No query set - use original database.
- if (queries == null || pattern != null) {
+ if(queries == null || pattern != null) {
// Relation to filter on
Relation<String> lrel = (pattern != null) ? DatabaseUtil.guessLabelRepresentation(database) : null;
final DBIDs sample;
- if (sampling <= 0) {
+ if(sampling <= 0) {
sample = relation.getDBIDs();
- } else if (sampling < 1.1) {
+ }
+ else if(sampling < 1.1) {
int size = (int) Math.min(sampling * relation.size(), relation.size());
sample = DBIDUtil.randomSample(relation.getDBIDs(), size, random);
- } else {
+ }
+ else {
int size = (int) Math.min(sampling, relation.size());
sample = DBIDUtil.randomSample(relation.getDBIDs(), size, random);
}
@@ -174,8 +177,8 @@ public class ValidateApproximativeKNNIndex<O, D extends Distance<D>> extends Abs
MeanVariance mv = new MeanVariance(), mvrec = new MeanVariance();
MeanVariance mvdist = new MeanVariance(), mvdaerr = new MeanVariance(), mvdrerr = new MeanVariance();
int misses = 0;
- for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
- if (pattern == null || pattern.matcher(lrel.get(iditer)).find()) {
+ for(DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
+ if(pattern == null || pattern.matcher(lrel.get(iditer)).find()) {
// Query index:
KNNList<D> knns = knnQuery.getKNNForDBID(iditer, k);
// Query reference:
@@ -187,53 +190,55 @@ public class ValidateApproximativeKNNIndex<O, D extends Distance<D>> extends Abs
// Put recall:
mvrec.put(DBIDUtil.intersectionSize(knns, trueknns) / trueknns.size());
- if (knns.size() >= k) {
+ if(knns.size() >= k) {
D kdist = knns.getKNNDistance();
- if (kdist instanceof NumberDistance) {
+ if(kdist instanceof NumberDistance) {
final double dist = ((NumberDistance<?, ?>) kdist).doubleValue();
final double tdist = ((NumberDistance<?, ?>) trueknns.getKNNDistance()).doubleValue();
- if (tdist > 0.0) {
+ if(tdist > 0.0) {
mvdist.put(dist);
mvdaerr.put(dist - tdist);
mvdrerr.put(dist / tdist);
}
}
- } else {
+ }
+ else {
// Less than k objects.
misses++;
}
}
- if (prog != null) {
+ if(prog != null) {
prog.incrementProcessed(LOG);
}
}
- if (prog != null) {
+ if(prog != null) {
prog.ensureCompleted(LOG);
}
- if (LOG.isStatistics()) {
+ if(LOG.isStatistics()) {
LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
LOG.statistics("Recall of true results: " + mvrec.getMean() + " +- " + mvrec.getNaiveStddev());
- if (mvdist.getCount() > 0) {
+ if(mvdist.getCount() > 0) {
LOG.statistics("Mean k-distance: " + mvdist.getMean() + " +- " + mvdist.getNaiveStddev());
LOG.statistics("Mean absolute k-error: " + mvdaerr.getMean() + " +- " + mvdaerr.getNaiveStddev());
LOG.statistics("Mean relative k-error: " + mvdrerr.getMean() + " +- " + mvdrerr.getNaiveStddev());
}
- if (misses > 0) {
+ if(misses > 0) {
LOG.statistics(String.format("Number of queries that returned less than k=%d objects: %d (%.2f%%)", k, misses, misses * 100. / mv.getCount()));
}
}
- } else {
+ }
+ else {
// Separate query set.
TypeInformation res = getDistanceFunction().getInputTypeRestriction();
MultipleObjectsBundle bundle = queries.loadData();
int col = -1;
- for (int i = 0; i < bundle.metaLength(); i++) {
- if (res.isAssignableFromType(bundle.meta(i))) {
+ for(int i = 0; i < bundle.metaLength(); i++) {
+ if(res.isAssignableFromType(bundle.meta(i))) {
col = i;
break;
}
}
- if (col < 0) {
+ if(col < 0) {
throw new AbortException("No compatible data type in query input was found. Expected: " + res.toString());
}
// Random sampling is a bit of hack, sorry.
@@ -241,12 +246,14 @@ public class ValidateApproximativeKNNIndex<O, D extends Distance<D>> extends Abs
DBIDRange sids = DBIDUtil.generateStaticDBIDRange(bundle.dataLength());
final DBIDs sample;
- if (sampling <= 0) {
+ if(sampling <= 0) {
sample = sids;
- } else if (sampling < 1.1) {
+ }
+ else if(sampling < 1.1) {
int size = (int) Math.min(sampling * relation.size(), relation.size());
sample = DBIDUtil.randomSample(sids, size, random);
- } else {
+ }
+ else {
int size = (int) Math.min(sampling, sids.size());
sample = DBIDUtil.randomSample(sids, size, random);
}
@@ -254,7 +261,7 @@ public class ValidateApproximativeKNNIndex<O, D extends Distance<D>> extends Abs
MeanVariance mv = new MeanVariance(), mvrec = new MeanVariance();
MeanVariance mvdist = new MeanVariance(), mvdaerr = new MeanVariance(), mvdrerr = new MeanVariance();
int misses = 0;
- for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
+ for(DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
int off = sids.binarySearch(iditer);
assert (off >= 0);
@SuppressWarnings("unchecked")
@@ -271,36 +278,37 @@ public class ValidateApproximativeKNNIndex<O, D extends Distance<D>> extends Abs
// Put recall:
mvrec.put(DBIDUtil.intersectionSize(knns, trueknns) / trueknns.size());
- if (knns.size() >= k) {
+ if(knns.size() >= k) {
D kdist = knns.getKNNDistance();
- if (kdist instanceof NumberDistance) {
+ if(kdist instanceof NumberDistance) {
final double dist = ((NumberDistance<?, ?>) kdist).doubleValue();
final double tdist = ((NumberDistance<?, ?>) trueknns.getKNNDistance()).doubleValue();
- if (tdist > 0.0) {
+ if(tdist > 0.0) {
mvdist.put(dist);
mvdaerr.put(dist - tdist);
mvdrerr.put(dist / tdist);
}
}
- } else {
+ }
+ else {
// Less than k objects.
misses++;
}
- if (prog != null) {
+ if(prog != null) {
prog.incrementProcessed(LOG);
}
}
- if (prog != null) {
+ if(prog != null) {
prog.ensureCompleted(LOG);
}
- if (LOG.isStatistics()) {
+ if(LOG.isStatistics()) {
LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
LOG.statistics("Recall of true results: " + mvrec.getMean() + " +- " + mvrec.getNaiveStddev());
- if (mvdist.getCount() > 0) {
+ if(mvdist.getCount() > 0) {
LOG.statistics("Mean absolute k-error: " + mvdaerr.getMean() + " +- " + mvdaerr.getNaiveStddev());
LOG.statistics("Mean relative k-error: " + mvdrerr.getMean() + " +- " + mvdrerr.getNaiveStddev());
}
- if (misses > 0) {
+ if(misses > 0) {
LOG.statistics(String.format("Number of queries that returned less than k=%d objects: %d (%.2f%%)", k, misses, misses * 100. / mv.getCount()));
}
}
@@ -393,31 +401,32 @@ public class ValidateApproximativeKNNIndex<O, D extends Distance<D>> extends Abs
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
IntParameter kP = new IntParameter(K_ID);
- if (config.grab(kP)) {
+ if(config.grab(kP)) {
k = kP.intValue();
}
PatternParameter patternP = new PatternParameter(PATTERN_ID);
patternP.setOptional(true);
- if (config.grab(patternP)) {
+ if(config.grab(patternP)) {
pattern = patternP.getValue();
- } else {
+ }
+ else {
ObjectParameter<DatabaseConnection> queryP = new ObjectParameter<>(QUERY_ID, DatabaseConnection.class);
queryP.setOptional(true);
- if (config.grab(queryP)) {
+ if(config.grab(queryP)) {
queries = queryP.instantiateClass(config);
}
}
DoubleParameter samplingP = new DoubleParameter(SAMPLING_ID);
samplingP.setOptional(true);
- if (config.grab(samplingP)) {
+ if(config.grab(samplingP)) {
sampling = samplingP.doubleValue();
}
Flag forceP = new Flag(FORCE_ID);
- if (config.grab(forceP)) {
+ if(config.grab(forceP)) {
forcelinear = forceP.isTrue();
}
RandomParameter randomP = new RandomParameter(RANDOM_ID, RandomFactory.DEFAULT);
- if (config.grab(randomP)) {
+ if(config.grab(randomP)) {
random = randomP.getValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedClustering.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedClustering.java
index 0c4eb5fc..96c95a9f 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedClustering.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedClustering.java
@@ -35,7 +35,7 @@ import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistance
import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -152,8 +152,8 @@ public abstract class AbstractProjectedClustering<R extends Clustering<?>, V ext
*/
protected void configK(Parameterization config) {
IntParameter kP = new IntParameter(K_ID);
- kP.addConstraint(new GreaterConstraint(0));
- if (config.grab(kP)) {
+ kP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
+ if(config.grab(kP)) {
k = kP.getValue();
}
}
@@ -165,8 +165,8 @@ public abstract class AbstractProjectedClustering<R extends Clustering<?>, V ext
*/
protected void configKI(Parameterization config) {
IntParameter k_iP = new IntParameter(K_I_ID, 30);
- k_iP.addConstraint(new GreaterConstraint(0));
- if (config.grab(k_iP)) {
+ k_iP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
+ if(config.grab(k_iP)) {
k_i = k_iP.getValue();
}
}
@@ -178,8 +178,8 @@ public abstract class AbstractProjectedClustering<R extends Clustering<?>, V ext
*/
protected void configL(Parameterization config) {
IntParameter lP = new IntParameter(L_ID);
- lP.addConstraint(new GreaterConstraint(0));
- if (config.grab(lP)) {
+ lP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
+ if(config.grab(lP)) {
l = lP.getValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedDBSCAN.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedDBSCAN.java
index ee3b234c..52e37197 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedDBSCAN.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedDBSCAN.java
@@ -55,7 +55,7 @@ import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ChainedParameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
@@ -294,7 +294,7 @@ public abstract class AbstractProjectedDBSCAN<R extends Clustering<Model>, V ext
// try to expand the cluster
ModifiableDBIDs currentCluster = DBIDUtil.newArray();
ModifiableDBIDs seeds = DBIDUtil.newHashSet();
- for (DistanceDBIDListIter<DoubleDistance> seed = neighbors.iter(); seed.valid(); seed.advance()) {
+ for(DistanceDBIDListIter<DoubleDistance> seed = neighbors.iter(); seed.valid(); seed.advance()) {
int nextID_corrDim = distFunc.getIndex().getLocalProjection(seed).getCorrelationDimension();
// nextID is not reachable from start object
if(nextID_corrDim > lambda) {
@@ -322,9 +322,9 @@ public abstract class AbstractProjectedDBSCAN<R extends Clustering<Model>, V ext
DistanceDBIDList<DoubleDistance> reachables = rangeQuery.getRangeForDBID(iter, epsilon);
iter.remove();
-
+
if(reachables.size() > minpts) {
- for (DistanceDBIDListIter<DoubleDistance> r = reachables.iter(); r.valid(); r.advance()) {
+ for(DistanceDBIDListIter<DoubleDistance> r = reachables.iter(); r.valid(); r.advance()) {
int corrDim_r = distFunc.getIndex().getLocalProjection(r).getCorrelationDimension();
// r is not reachable from q
if(corrDim_r > lambda) {
@@ -351,9 +351,10 @@ public abstract class AbstractProjectedDBSCAN<R extends Clustering<Model>, V ext
}
}
- /* if(processedIDs.size() == relation.size() && noise.size() == 0) {
- break;
- } */
+ /*
+ * if(processedIDs.size() == relation.size() && noise.size() == 0) {
+ * break; }
+ */
}
if(currentCluster.size() >= minpts) {
@@ -375,7 +376,7 @@ public abstract class AbstractProjectedDBSCAN<R extends Clustering<Model>, V ext
public TypeInformation[] getInputTypeRestriction() {
return TypeUtil.array(distanceFunction.getInputTypeRestriction());
}
-
+
/**
* Parameterization class.
*
@@ -411,7 +412,7 @@ public abstract class AbstractProjectedDBSCAN<R extends Clustering<Model>, V ext
protected void configMinPts(Parameterization config) {
IntParameter minptsP = new IntParameter(MINPTS_ID);
- minptsP.addConstraint(new GreaterConstraint(0));
+ minptsP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
if(config.grab(minptsP)) {
minpts = minptsP.getValue();
}
@@ -435,7 +436,7 @@ public abstract class AbstractProjectedDBSCAN<R extends Clustering<Model>, V ext
protected void configLambda(Parameterization config) {
IntParameter lambdaP = new IntParameter(LAMBDA_ID);
- lambdaP.addConstraint(new GreaterConstraint(0));
+ lambdaP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
if(config.grab(lambdaP)) {
lambda = lambdaP.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/DBSCAN.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/DBSCAN.java
index 57dcb435..09c78fec 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/DBSCAN.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/DBSCAN.java
@@ -38,9 +38,8 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDMIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
-import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDList;
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
@@ -52,7 +51,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DistanceParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -82,24 +81,12 @@ public class DBSCAN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
private static final Logging LOG = Logging.getLogger(DBSCAN.class);
/**
- * Parameter to specify the maximum radius of the neighborhood to be
- * considered, must be suitable to the distance function specified.
+ * Holds the epsilon radius threshold.
*/
- public static final OptionID EPSILON_ID = new OptionID("dbscan.epsilon", "The maximum radius of the neighborhood to be considered.");
+ protected D epsilon;
/**
- * Holds the value of {@link #EPSILON_ID}.
- */
- private D epsilon;
-
- /**
- * Parameter to specify the threshold for minimum number of points in the
- * epsilon-neighborhood of a point, must be an integer greater than 0.
- */
- public static final OptionID MINPTS_ID = new OptionID("dbscan.minpts", "Threshold for minimum number of points in the epsilon-neighborhood of a point.");
-
- /**
- * Holds the value of {@link #MINPTS_ID}.
+ * Holds the minimum cluster size.
*/
protected int minpts;
@@ -146,7 +133,9 @@ public class DBSCAN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
if(size < minpts) {
// The can't be any clusters
noise.addDBIDs(relation.getDBIDs());
- objprog.setProcessed(noise.size(), LOG);
+ if(objprog != null) {
+ objprog.setProcessed(noise.size(), LOG);
+ }
}
else {
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
@@ -193,7 +182,7 @@ public class DBSCAN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
* @param objprog the progress object for logging the current status
*/
protected void expandCluster(Relation<O> relation, RangeQuery<O, D> rangeQuery, DBIDRef startObjectID, FiniteProgress objprog, IndefiniteProgress clusprog) {
- DistanceDBIDList<D> neighbors = rangeQuery.getRangeForDBID(startObjectID, epsilon);
+ DBIDs neighbors = rangeQuery.getRangeForDBID(startObjectID, epsilon);
// startObject is no core-object
if(neighbors.size() < minpts) {
@@ -207,7 +196,7 @@ public class DBSCAN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
}
// try to expand the cluster
- HashSetModifiableDBIDs seeds = DBIDUtil.newHashSet();
+ ModifiableDBIDs seeds = DBIDUtil.newHashSet();
ModifiableDBIDs currentCluster = DBIDUtil.newArray();
for(DBIDIter seed = neighbors.iter(); seed.valid(); seed.advance()) {
if(!processedIDs.contains(seed)) {
@@ -222,9 +211,9 @@ public class DBSCAN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
}
seeds.remove(startObjectID);
- while(seeds.size() > 0) {
+ while(!seeds.isEmpty()) {
DBIDMIter o = seeds.iter();
- DistanceDBIDList<D> neighborhood = rangeQuery.getRangeForDBID(o, epsilon);
+ DBIDs neighborhood = rangeQuery.getRangeForDBID(o, epsilon);
o.remove();
if(neighborhood.size() >= minpts) {
@@ -282,6 +271,18 @@ public class DBSCAN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
* @apiviz.exclude
*/
public static class Parameterizer<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ /**
+ * Parameter to specify the maximum radius of the neighborhood to be
+ * considered, must be suitable to the distance function specified.
+ */
+ public static final OptionID EPSILON_ID = new OptionID("dbscan.epsilon", "The maximum radius of the neighborhood to be considered.");
+
+ /**
+ * Parameter to specify the threshold for minimum number of points in the
+ * epsilon-neighborhood of a point, must be an integer greater than 0.
+ */
+ public static final OptionID MINPTS_ID = new OptionID("dbscan.minpts", "Threshold for minimum number of points in the epsilon-neighborhood of a point.");
+
protected D epsilon = null;
protected int minpts = 0;
@@ -295,7 +296,7 @@ public class DBSCAN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
}
IntParameter minptsP = new IntParameter(MINPTS_ID);
- minptsP.addConstraint(new GreaterConstraint(0));
+ minptsP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
if(config.grab(minptsP)) {
minpts = minptsP.getValue();
}
@@ -306,4 +307,4 @@ public class DBSCAN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
return new DBSCAN<>(distanceFunction, epsilon, minpts);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/DeLiClu.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/DeLiClu.java
index 3c2e0278..814b4cc4 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/DeLiClu.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/DeLiClu.java
@@ -62,7 +62,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -496,7 +496,7 @@ public class DeLiClu<NV extends NumberVector<?>, D extends Distance<D>> extends
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
IntParameter minptsP = new IntParameter(MINPTS_ID);
- minptsP.addConstraint(new GreaterConstraint(0));
+ minptsP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
if(config.grab(minptsP)) {
minpts = minptsP.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/EM.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/EM.java
index c66442a1..e82ec674 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/EM.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/EM.java
@@ -34,6 +34,7 @@ import de.lmu.ifi.dbs.elki.data.Cluster;
import de.lmu.ifi.dbs.elki.data.Clustering;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.model.EMModel;
+import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
@@ -41,14 +42,15 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.MathUtil;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
import de.lmu.ifi.dbs.elki.utilities.FormatUtil;
@@ -57,8 +59,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -72,8 +73,8 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
* zero-covariance and variance=1 in covariance matrices.
* </p>
* <p>
- * Reference: A. P. Dempster, N. M. Laird, D. B. Rubin: Maximum Likelihood from
- * Incomplete Data via the EM algorithm. <br>
+ * Reference: A. P. Dempster, N. M. Laird, D. B. Rubin:<br />
+ * Maximum Likelihood from Incomplete Data via the EM algorithm.<br>
* In Journal of the Royal Statistical Society, Series B, 39(1), 1977, pp. 1-31
* </p>
*
@@ -100,48 +101,36 @@ public class EM<V extends NumberVector<?>> extends AbstractAlgorithm<Clustering<
private static final double SINGULARITY_CHEAT = 1E-9;
/**
- * Parameter to specify the number of clusters to find, must be an integer
- * greater than 0.
- */
- public static final OptionID K_ID = new OptionID("em.k", "The number of clusters to find.");
-
- /**
- * Holds the value of {@link #K_ID}.
+ * Number of clusters
*/
private int k;
/**
- * Parameter to specify the termination criterion for maximization of E(M):
- * E(M) - E(M') < em.delta, must be a double equal to or greater than 0.
+ * Delta parameter
*/
- public static final OptionID DELTA_ID = new OptionID("em.delta", "The termination criterion for maximization of E(M): " + "E(M) - E(M') < em.delta");
+ private double delta;
/**
- * Parameter to specify the initialization method
+ * Class to choose the initial means
*/
- public static final OptionID INIT_ID = new OptionID("kmeans.initialization", "Method to choose the initial means.");
-
- private static final double MIN_LOGLIKELIHOOD = -100000;
+ private KMeansInitialization<V> initializer;
/**
- * Holds the value of {@link #DELTA_ID}.
+ * Maximum number of iterations to allow
*/
- private double delta;
+ private int maxiter;
/**
- * Store the individual probabilities, for use by EMOutlierDetection etc.
+ * Retain soft assignments.
*/
- private WritableDataStore<double[]> probClusterIGivenX;
+ private boolean soft;
- /**
- * Class to choose the initial means
- */
- private KMeansInitialization<V> initializer;
+ private static final double MIN_LOGLIKELIHOOD = -100000;
/**
- * Maximum number of iterations to allow
+ * Soft assignment result type.
*/
- private int maxiter;
+ public static final SimpleTypeInformation<double[]> SOFT_TYPE = new SimpleTypeInformation<>(double[].class);
/**
* Constructor.
@@ -150,13 +139,15 @@ public class EM<V extends NumberVector<?>> extends AbstractAlgorithm<Clustering<
* @param delta delta parameter
* @param initializer Class to choose the initial means
* @param maxiter Maximum number of iterations
+ * @param soft Include soft assignments
*/
- public EM(int k, double delta, KMeansInitialization<V> initializer, int maxiter) {
+ public EM(int k, double delta, KMeansInitialization<V> initializer, int maxiter, boolean soft) {
super();
this.k = k;
this.delta = delta;
this.initializer = initializer;
this.maxiter = maxiter;
+ this.setSoft(soft);
}
/**
@@ -172,137 +163,80 @@ public class EM<V extends NumberVector<?>> extends AbstractAlgorithm<Clustering<
* @return Result
*/
public Clustering<EMModel<V>> run(Database database, Relation<V> relation) {
- if (relation.size() == 0) {
+ if(relation.size() == 0) {
throw new IllegalArgumentException("database empty: must contain elements");
}
// initial models
- if (LOG.isVerbose()) {
+ if(LOG.isVerbose()) {
LOG.verbose("initializing " + k + " models");
}
- List<Vector> means = new ArrayList<>();
- for (NumberVector<?> nv : initializer.chooseInitialMeans(database, relation, k, EuclideanDistanceFunction.STATIC)) {
- means.add(nv.getColumnVector());
+ final List<V> initialMeans = initializer.chooseInitialMeans(database, relation, k, EuclideanDistanceFunction.STATIC);
+ assert (initialMeans.size() == k);
+ Vector[] means = new Vector[k];
+ {
+ int i = 0;
+ for(NumberVector<?> nv : initialMeans) {
+ means[i] = nv.getColumnVector();
+ i++;
+ }
}
- List<Matrix> covarianceMatrices = new ArrayList<>(k);
+ Matrix[] covarianceMatrices = new Matrix[k];
double[] normDistrFactor = new double[k];
- List<Matrix> invCovMatr = new ArrayList<>(k);
+ Matrix[] invCovMatr = new Matrix[k];
double[] clusterWeights = new double[k];
- probClusterIGivenX = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_SORTED, double[].class);
+ WritableDataStore<double[]> probClusterIGivenX = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_SORTED, double[].class);
- final int dimensionality = means.get(0).getDimensionality();
- for (int i = 0; i < k; i++) {
+ final int dimensionality = means[0].getDimensionality();
+ final double norm = MathUtil.powi(MathUtil.TWOPI, dimensionality);
+ for(int i = 0; i < k; i++) {
Matrix m = Matrix.identity(dimensionality, dimensionality);
- covarianceMatrices.add(m);
- final double det = m.det();
- if (det > 0.) {
- normDistrFactor[i] = 1.0 / Math.sqrt(Math.pow(MathUtil.TWOPI, dimensionality) * det);
- } else {
- LOG.warning("Encountered matrix with 0 determinant - degenerated.");
- normDistrFactor[i] = 1.0; // Not really well defined
- }
- invCovMatr.add(m.inverse());
+ covarianceMatrices[i] = m;
+ normDistrFactor[i] = 1.0 / Math.sqrt(norm);
+ invCovMatr[i] = Matrix.identity(dimensionality, dimensionality);
clusterWeights[i] = 1.0 / k;
- if (LOG.isDebuggingFinest()) {
- StringBuilder msg = new StringBuilder();
- msg.append(" model ").append(i).append(":\n");
- msg.append(" mean: ").append(means.get(i)).append('\n');
- msg.append(" m:\n").append(FormatUtil.format(m, " ")).append('\n');
- msg.append(" m.det(): ").append(det).append('\n');
- msg.append(" cluster weight: ").append(clusterWeights[i]).append('\n');
- msg.append(" normDistFact: ").append(normDistrFactor[i]).append('\n');
- LOG.debugFine(msg.toString());
- }
}
double emNew = assignProbabilitiesToInstances(relation, normDistrFactor, means, invCovMatr, clusterWeights, probClusterIGivenX);
// iteration unless no change
- if (LOG.isVerbose()) {
+ if(LOG.isVerbose()) {
LOG.verbose("iterating EM");
}
- if (LOG.isVerbose()) {
+ if(LOG.isVerbose()) {
LOG.verbose("iteration " + 0 + " - expectation value: " + emNew);
}
- double em;
- for (int it = 1; it <= maxiter || maxiter < 0; it++) {
- em = emNew;
-
- // recompute models
- List<Vector> meanSums = new ArrayList<>(k);
- double[] sumOfClusterProbabilities = new double[k];
-
- for (int i = 0; i < k; i++) {
- clusterWeights[i] = 0.0;
- meanSums.add(new Vector(dimensionality));
- covarianceMatrices.set(i, Matrix.zeroMatrix(dimensionality));
- }
-
- // weights and means
- for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- double[] clusterProbabilities = probClusterIGivenX.get(iditer);
-
- for (int i = 0; i < k; i++) {
- sumOfClusterProbabilities[i] += clusterProbabilities[i];
- Vector summand = relation.get(iditer).getColumnVector().timesEquals(clusterProbabilities[i]);
- meanSums.get(i).plusEquals(summand);
- }
- }
- final int n = relation.size();
- for (int i = 0; i < k; i++) {
- clusterWeights[i] = sumOfClusterProbabilities[i] / n;
- Vector newMean = meanSums.get(i).timesEquals(1 / sumOfClusterProbabilities[i]);
- means.set(i, newMean);
- }
- // covariance matrices
- for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- double[] clusterProbabilities = probClusterIGivenX.get(iditer);
- Vector instance = relation.get(iditer).getColumnVector();
- for (int i = 0; i < k; i++) {
- Vector difference = instance.minus(means.get(i));
- covarianceMatrices.get(i).plusEquals(difference.timesTranspose(difference).timesEquals(clusterProbabilities[i]));
- }
- }
- for (int i = 0; i < k; i++) {
- covarianceMatrices.set(i, covarianceMatrices.get(i).times(1 / sumOfClusterProbabilities[i]).cheatToAvoidSingularity(SINGULARITY_CHEAT));
- }
- for (int i = 0; i < k; i++) {
- final double det = covarianceMatrices.get(i).det();
- if (det > 0.) {
- normDistrFactor[i] = 1.0 / Math.sqrt(Math.pow(MathUtil.TWOPI, dimensionality) * det);
- } else {
- LOG.warning("Encountered matrix with 0 determinant - degenerated.");
- normDistrFactor[i] = 1.0; // Not really well defined
- }
- invCovMatr.set(i, covarianceMatrices.get(i).inverse());
- }
+ for(int it = 1; it <= maxiter || maxiter < 0; it++) {
+ final double emOld = emNew;
+ recomputeCovarianceMatrices(relation, probClusterIGivenX, means, covarianceMatrices, dimensionality);
+ computeInverseMatrixes(covarianceMatrices, invCovMatr, normDistrFactor, norm);
// reassign probabilities
emNew = assignProbabilitiesToInstances(relation, normDistrFactor, means, invCovMatr, clusterWeights, probClusterIGivenX);
- if (LOG.isVerbose()) {
+ if(LOG.isVerbose()) {
LOG.verbose("iteration " + it + " - expectation value: " + emNew);
}
- if (Math.abs(em - emNew) <= delta) {
+ if(Math.abs(emOld - emNew) <= delta) {
break;
}
}
- if (LOG.isVerbose()) {
+ if(LOG.isVerbose()) {
LOG.verbose("assigning clusters");
}
// fill result with clusters and models
List<ModifiableDBIDs> hardClusters = new ArrayList<>(k);
- for (int i = 0; i < k; i++) {
+ for(int i = 0; i < k; i++) {
hardClusters.add(DBIDUtil.newHashSet());
}
// provide a hard clustering
- for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double[] clusterProbabilities = probClusterIGivenX.get(iditer);
int maxIndex = 0;
double currentMax = 0.0;
- for (int i = 0; i < k; i++) {
- if (clusterProbabilities[i] > currentMax) {
+ for(int i = 0; i < k; i++) {
+ if(clusterProbabilities[i] > currentMax) {
maxIndex = i;
currentMax = clusterProbabilities[i];
}
@@ -312,24 +246,89 @@ public class EM<V extends NumberVector<?>> extends AbstractAlgorithm<Clustering<
final NumberVector.Factory<V, ?> factory = RelationUtil.getNumberVectorFactory(relation);
Clustering<EMModel<V>> result = new Clustering<>("EM Clustering", "em-clustering");
// provide models within the result
- for (int i = 0; i < k; i++) {
+ for(int i = 0; i < k; i++) {
// TODO: re-do labeling.
// SimpleClassLabel label = new SimpleClassLabel();
// label.init(result.canonicalClusterLabel(i));
- Cluster<EMModel<V>> model = new Cluster<>(hardClusters.get(i), new EMModel<>(factory.newNumberVector(means.get(i).getArrayRef()), covarianceMatrices.get(i)));
+ Cluster<EMModel<V>> model = new Cluster<>(hardClusters.get(i), new EMModel<>(factory.newNumberVector(means[i].getArrayRef()), covarianceMatrices[i]));
result.addToplevelCluster(model);
}
+ if(isSoft()) {
+ result.addChildResult(new MaterializedRelation<>("cluster assignments", "em-soft-score", SOFT_TYPE, probClusterIGivenX, relation.getDBIDs()));
+ }
+ else {
+ probClusterIGivenX.destroy();
+ }
return result;
}
/**
+ * Compute the inverse cluster matrices.
+ *
+ * @param covarianceMatrices Input covariance matrices
+ * @param invCovMatr Output array for inverse matrices
+ * @param normDistrFactor Output array for norm distribution factors.
+ * @param norm Normalization factor, usually (2pi)^d
+ */
+ public static void computeInverseMatrixes(Matrix[] covarianceMatrices, Matrix[] invCovMatr, double[] normDistrFactor, final double norm) {
+ int k = covarianceMatrices.length;
+ for(int i = 0; i < k; i++) {
+ final double det = covarianceMatrices[i].det();
+ if(det > 0.) {
+ normDistrFactor[i] = 1. / Math.sqrt(norm * det);
+ }
+ else {
+ LOG.warning("Encountered matrix with 0 determinant - degenerated.");
+ normDistrFactor[i] = 1.; // Not really well defined
+ }
+ invCovMatr[i] = covarianceMatrices[i].inverse();
+ }
+ }
+
+ /**
+ * Recompute the covariance matrixes.
+ *
+ * @param relation Vector data
+ * @param probClusterIGivenX Object probabilities
+ * @param means Cluster means output
+ * @param covarianceMatrices Output covariance matrixes
+ * @param dimensionality Data set dimensionality
+ */
+ public static void recomputeCovarianceMatrices(Relation<? extends NumberVector<?>> relation, WritableDataStore<double[]> probClusterIGivenX, Vector[] means, Matrix[] covarianceMatrices, final int dimensionality) {
+ final int k = means.length;
+ CovarianceMatrix[] cms = new CovarianceMatrix[k];
+ for(int i = 0; i < k; i++) {
+ cms[i] = new CovarianceMatrix(dimensionality);
+ }
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ double[] clusterProbabilities = probClusterIGivenX.get(iditer);
+ Vector instance = relation.get(iditer).getColumnVector();
+ for(int i = 0; i < k; i++) {
+ if(clusterProbabilities[i] > 0.) {
+ cms[i].put(instance, clusterProbabilities[i]);
+ }
+ }
+ }
+ for(int i = 0; i < k; i++) {
+ if(cms[i].getWeight() <= 0.) {
+ means[i] = new Vector(dimensionality);
+ covarianceMatrices[i] = Matrix.identity(dimensionality, dimensionality);
+ }
+ else {
+ means[i] = cms[i].getMeanVector();
+ covarianceMatrices[i] = cms[i].destroyToNaiveMatrix().cheatToAvoidSingularity(SINGULARITY_CHEAT);
+ }
+ }
+ }
+
+ /**
* Assigns the current probability values to the instances in the database and
* compute the expectation value of the current mixture of distributions.
*
* Computed as the sum of the logarithms of the prior probability of each
* instance.
*
- * @param database the database used for assignment to instances
+ * @param relation the database used for assignment to instances
* @param normDistrFactor normalization factor for density function, based on
* current covariance matrix
* @param means the current means
@@ -337,58 +336,55 @@ public class EM<V extends NumberVector<?>> extends AbstractAlgorithm<Clustering<
* @param clusterWeights the weights of the current clusters
* @return the expectation value of the current mixture of distributions
*/
- protected double assignProbabilitiesToInstances(Relation<V> database, double[] normDistrFactor, List<Vector> means, List<Matrix> invCovMatr, double[] clusterWeights, WritableDataStore<double[]> probClusterIGivenX) {
- double emSum = 0.0;
+ public static double assignProbabilitiesToInstances(Relation<? extends NumberVector<?>> relation, double[] normDistrFactor, Vector[] means, Matrix[] invCovMatr, double[] clusterWeights, WritableDataStore<double[]> probClusterIGivenX) {
+ final int k = clusterWeights.length;
+ double emSum = 0.;
- for (DBIDIter iditer = database.iterDBIDs(); iditer.valid(); iditer.advance()) {
- Vector x = database.get(iditer).getColumnVector();
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ Vector x = relation.get(iditer).getColumnVector();
double[] probabilities = new double[k];
- for (int i = 0; i < k; i++) {
- Vector difference = x.minus(means.get(i));
- double rowTimesCovTimesCol = difference.transposeTimesTimes(invCovMatr.get(i), difference);
- double power = rowTimesCovTimesCol / 2.0;
+ for(int i = 0; i < k; i++) {
+ Vector difference = x.minus(means[i]);
+ double rowTimesCovTimesCol = difference.transposeTimesTimes(invCovMatr[i], difference);
+ double power = rowTimesCovTimesCol / 2.;
double prob = normDistrFactor[i] * Math.exp(-power);
- if (LOG.isDebuggingFinest()) {
- LOG.debugFinest(" difference vector= ( " + difference.toString() + " )\n" + " difference:\n" + FormatUtil.format(difference, " ") + "\n" + " rowTimesCovTimesCol:\n" + rowTimesCovTimesCol + "\n" + " power= " + power + "\n" + " prob=" + prob + "\n" + " inv cov matrix: \n" + FormatUtil.format(invCovMatr.get(i), " "));
+ if(LOG.isDebuggingFinest()) {
+ LOG.debugFinest(" difference vector= ( " + difference.toString() + " )\n" + //
+ " difference:\n" + FormatUtil.format(difference, " ") + "\n" + //
+ " rowTimesCovTimesCol:\n" + rowTimesCovTimesCol + "\n" + //
+ " power= " + power + "\n" + " prob=" + prob + "\n" + //
+ " inv cov matrix: \n" + FormatUtil.format(invCovMatr[i], " "));
}
- if (!(prob >= 0.)) {
+ if(!(prob >= 0.)) {
LOG.warning("Invalid probability: " + prob + " power: " + power + " factor: " + normDistrFactor[i]);
+ prob = 0.;
}
probabilities[i] = prob;
}
- double priorProbability = 0.0;
- for (int i = 0; i < k; i++) {
+ double priorProbability = 0.;
+ for(int i = 0; i < k; i++) {
priorProbability += probabilities[i] * clusterWeights[i];
}
double logP = Math.max(Math.log(priorProbability), MIN_LOGLIKELIHOOD);
- if (!Double.isNaN(logP)) {
+ if(!Double.isNaN(logP)) {
emSum += logP;
}
double[] clusterProbabilities = new double[k];
- for (int i = 0; i < k; i++) {
- assert (clusterWeights[i] >= 0.0);
+ for(int i = 0; i < k; i++) {
+ assert (clusterWeights[i] >= 0.);
// do not divide by zero!
- if (priorProbability > 0.0) {
+ if(priorProbability > 0.) {
clusterProbabilities[i] = probabilities[i] / priorProbability * clusterWeights[i];
- } else {
- clusterProbabilities[i] = 0.0;
+ }
+ else {
+ clusterProbabilities[i] = 0.;
}
}
probClusterIGivenX.put(iditer, clusterProbabilities);
}
- return emSum;
- }
-
- /**
- * Get the probabilities for a given point.
- *
- * @param index Point ID
- * @return Probabilities of given point
- */
- public double[] getProbClusterIGivenX(DBIDRef index) {
- return probClusterIGivenX.get(index);
+ return emSum / relation.size();
}
@Override
@@ -402,6 +398,20 @@ public class EM<V extends NumberVector<?>> extends AbstractAlgorithm<Clustering<
}
/**
+ * @return the soft
+ */
+ public boolean isSoft() {
+ return soft;
+ }
+
+ /**
+ * @param soft the soft to set
+ */
+ public void setSoft(boolean soft) {
+ this.soft = soft;
+ }
+
+ /**
* Parameterization class.
*
* @author Erich Schubert
@@ -409,45 +419,77 @@ public class EM<V extends NumberVector<?>> extends AbstractAlgorithm<Clustering<
* @apiviz.exclude
*/
public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
+ /**
+ * Parameter to specify the number of clusters to find, must be an integer
+ * greater than 0.
+ */
+ public static final OptionID K_ID = new OptionID("em.k", "The number of clusters to find.");
+
+ /**
+ * Parameter to specify the termination criterion for maximization of E(M):
+ * E(M) - E(M') < em.delta, must be a double equal to or greater than 0.
+ */
+ public static final OptionID DELTA_ID = new OptionID("em.delta", //
+ "The termination criterion for maximization of E(M): " + //
+ "E(M) - E(M') < em.delta");
+
+ /**
+ * Parameter to specify the initialization method
+ */
+ public static final OptionID INIT_ID = new OptionID("kmeans.initialization", //
+ "Method to choose the initial means.");
+
+ /**
+ * Number of clusters.
+ */
protected int k;
+ /**
+ * Stopping threshold
+ */
protected double delta;
+ /**
+ * Initialization method
+ */
protected KMeansInitialization<V> initializer;
+ /**
+ * Maximum number of iterations.
+ */
protected int maxiter = -1;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
IntParameter kP = new IntParameter(K_ID);
- kP.addConstraint(new GreaterConstraint(0));
- if (config.grab(kP)) {
+ kP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
+ if(config.grab(kP)) {
k = kP.getValue();
}
ObjectParameter<KMeansInitialization<V>> initialP = new ObjectParameter<>(INIT_ID, KMeansInitialization.class, RandomlyGeneratedInitialMeans.class);
- if (config.grab(initialP)) {
+ if(config.grab(initialP)) {
initializer = initialP.instantiateClass(config);
}
DoubleParameter deltaP = new DoubleParameter(DELTA_ID, 0.0);
- deltaP.addConstraint(new GreaterEqualConstraint(0.0));
- if (config.grab(deltaP)) {
+ deltaP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE);
+ if(config.grab(deltaP)) {
delta = deltaP.getValue();
}
IntParameter maxiterP = new IntParameter(KMeans.MAXITER_ID);
- maxiterP.addConstraint(new GreaterEqualConstraint(0));
+ maxiterP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_INT);
maxiterP.setOptional(true);
- if (config.grab(maxiterP)) {
+ if(config.grab(maxiterP)) {
maxiter = maxiterP.getValue();
}
}
@Override
protected EM<V> makeInstance() {
- return new EM<>(k, delta, initializer, maxiter);
+ return new EM<>(k, delta, initializer, maxiter, false);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICS.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICS.java
index e928d041..a4a922df 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICS.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICS.java
@@ -33,10 +33,10 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDList;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDPair;
import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDPair;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDPair;
import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDPair;
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.DistanceUtil;
@@ -55,7 +55,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DistanceParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -146,7 +146,8 @@ public class OPTICS<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
// boxing/unboxing.
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
if(!processedIDs.contains(iditer)) {
- // We need to do some ugly casts to be able to run the optimized version, unfortunately.
+ // We need to do some ugly casts to be able to run the optimized
+ // version, unfortunately.
@SuppressWarnings("unchecked")
final ClusterOrderResult<DoubleDistance> doubleClusterOrder = ClusterOrderResult.class.cast(clusterOrder);
@SuppressWarnings("unchecked")
@@ -304,7 +305,7 @@ public class OPTICS<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
}
IntParameter minptsP = new IntParameter(MINPTS_ID);
- minptsP.addConstraint(new GreaterConstraint(0));
+ minptsP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
if(config.grab(minptsP)) {
minpts = minptsP.intValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICSXi.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICSXi.java
index 583d402b..db343f3a 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICSXi.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICSXi.java
@@ -48,8 +48,7 @@ import de.lmu.ifi.dbs.elki.result.optics.ClusterOrderEntry;
import de.lmu.ifi.dbs.elki.result.optics.ClusterOrderResult;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ClassParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
@@ -240,6 +239,10 @@ public class OPTICSXi<N extends NumberDistance<N, ?>> extends AbstractAlgorithm<
// By default, clusters cover both the steep up and steep down area
int cstart = sda.getStartIndex();
int cend = sua.getEndIndex();
+ // Hotfix: never include infinity-reachable points at the end
+ while(cend > cstart && Double.isInfinite(clusterOrder.get(cend).getReachability().doubleValue())) {
+ --cend;
+ }
// However, we sometimes have to adjust this (Condition 4):
{
// Case b)
@@ -654,8 +657,8 @@ public class OPTICSXi<N extends NumberDistance<N, ?>> extends AbstractAlgorithm<
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
DoubleParameter xiP = new DoubleParameter(XI_ID);
- xiP.addConstraint(new GreaterEqualConstraint(0.0));
- xiP.addConstraint(new LessConstraint(1.0));
+ xiP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE);
+ xiP.addConstraint(CommonConstraints.LESS_THAN_ONE_DOUBLE);
if(config.grab(xiP)) {
xi = xiP.doubleValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/SNNClustering.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/SNNClustering.java
index 95d9f23c..86bb9a09 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/SNNClustering.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/SNNClustering.java
@@ -53,7 +53,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DistanceParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -328,7 +328,7 @@ public class SNNClustering<O> extends AbstractAlgorithm<Clustering<Model>> imple
}
IntParameter minptsP = new IntParameter(MINPTS_ID);
- minptsP.addConstraint(new GreaterConstraint(0));
+ minptsP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
if(config.grab(minptsP)) {
minpts = minptsP.intValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/affinitypropagation/AffinityPropagationClusteringAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/affinitypropagation/AffinityPropagationClusteringAlgorithm.java
new file mode 100644
index 00000000..68dacf34
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/affinitypropagation/AffinityPropagationClusteringAlgorithm.java
@@ -0,0 +1,350 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.affinitypropagation;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import gnu.trove.iterator.TIntObjectIterator;
+import gnu.trove.map.hash.TIntObjectHashMap;
+import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm;
+import de.lmu.ifi.dbs.elki.data.Cluster;
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.data.model.MedoidModel;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress;
+import de.lmu.ifi.dbs.elki.logging.progress.MutableProgress;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+
+/**
+ * Cluster analysis by affinity propagation.
+ *
+ * Reference:
+ * <p>
+ * Clustering by Passing Messages Between Data Points<br />
+ * B. J. Frey and D. Dueck<br />
+ * Science Vol 315
+ * </p>
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.composedOf AffinityPropagationInitialization
+ *
+ * @param <O> object type
+ */
+@Title("Affinity Propagation: Clustering by Passing Messages Between Data Points")
+@Reference(title = "Clustering by Passing Messages Between Data Points", authors = "B. J. Frey and D. Dueck", booktitle = "Science Vol 315", url = "http://dx.doi.org/10.1126/science.1136800")
+public class AffinityPropagationClusteringAlgorithm<O> extends AbstractAlgorithm<Clustering<MedoidModel>> implements ClusteringAlgorithm<Clustering<MedoidModel>> {
+ /**
+ * Class logger
+ */
+ private static final Logging LOG = Logging.getLogger(AffinityPropagationClusteringAlgorithm.class);
+
+ /**
+ * Similarity initialization
+ */
+ AffinityPropagationInitialization<O> initialization;
+
+ /**
+ * Damping factor lambda.
+ */
+ double lambda = 0.5;
+
+ /**
+ * Terminate after 10 iterations with no changes.
+ */
+ int convergence = 10;
+
+ /**
+ * Maximum number of iterations.
+ */
+ int maxiter = 1000;
+
+ /**
+ * Constructor.
+ *
+ * @param initialization Similarity initialization
+ * @param lambda Damping factor
+ * @param convergence Termination threshold (Number of stable iterations)
+ * @param maxiter Maximum number of iterations
+ */
+ public AffinityPropagationClusteringAlgorithm(AffinityPropagationInitialization<O> initialization, double lambda, int convergence, int maxiter) {
+ super();
+ this.initialization = initialization;
+ this.lambda = lambda;
+ this.convergence = convergence;
+ this.maxiter = maxiter;
+ }
+
+ /**
+ * Perform affinity propagation clustering.
+ *
+ * @param db Database
+ * @param relation Relation
+ * @return Clustering result
+ */
+ public Clustering<MedoidModel> run(Database db, Relation<O> relation) {
+ ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
+ final int size = ids.size();
+
+ int[] assignment = new int[size];
+ double[][] s = initialization.getSimilarityMatrix(db, relation, ids);
+ double[][] r = new double[size][size];
+ double[][] a = new double[size][size];
+
+ IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("Affinity Propagation Iteration", LOG) : null;
+ MutableProgress aprog = LOG.isVerbose() ? new MutableProgress("Stable assignments", size + 1, LOG) : null;
+
+ int inactive = 0;
+ for(int iteration = 0; iteration < maxiter && inactive < convergence; iteration++) {
+ // Update responsibility matrix:
+ for(int i = 0; i < size; i++) {
+ double[] ai = a[i], ri = r[i], si = s[i];
+ // Find the two largest values (as initially maxk == i)
+ double max1 = Double.NEGATIVE_INFINITY, max2 = Double.NEGATIVE_INFINITY;
+ int maxk = -1;
+ for(int k = 0; k < size; k++) {
+ double val = ai[k] + si[k];
+ if(val > max1) {
+ max2 = max1;
+ max1 = val;
+ maxk = k;
+ }
+ else if(val > max2) {
+ max2 = val;
+ }
+ }
+ // With the maximum value known, update r:
+ for(int k = 0; k < size; k++) {
+ double val = si[k] - ((k != maxk) ? max1 : max2);
+ ri[k] = ri[k] * lambda + val * (1. - lambda);
+ }
+ }
+ // Update availability matrix
+ for(int k = 0; k < size; k++) {
+ // Compute sum of max(0, r_ik) for all i.
+ // For r_kk, don't apply the max.
+ double colposum = 0.;
+ for(int i = 0; i < size; i++) {
+ if(i == k || r[i][k] > 0.) {
+ colposum += r[i][k];
+ }
+ }
+ for(int i = 0; i < size; i++) {
+ double val = colposum;
+ // Adjust column sum by the one extra term.
+ if(i == k || r[i][k] > 0.) {
+ val -= r[i][k];
+ }
+ if(i != k && val > 0.) { // min
+ val = 0.;
+ }
+ a[i][k] = a[i][k] * lambda + val * (1 - lambda);
+ }
+ }
+ int changed = 0;
+ for(int i = 0; i < size; i++) {
+ double[] ai = a[i], ri = r[i];
+ double max = Double.NEGATIVE_INFINITY;
+ int maxj = -1;
+ for(int j = 0; j < size; j++) {
+ double v = ai[j] + ri[j];
+ if(v > max || (i == j && v >= max)) {
+ max = v;
+ maxj = j;
+ }
+ }
+ if(assignment[i] != maxj) {
+ changed += 1;
+ assignment[i] = maxj;
+ }
+ }
+ inactive = (changed > 0) ? 0 : (inactive + 1);
+ if(prog != null) {
+ prog.incrementProcessed(LOG);
+ }
+ if(aprog != null) {
+ aprog.setProcessed(size - changed, LOG);
+ }
+ }
+ if(aprog != null) {
+ aprog.setProcessed(aprog.getTotal(), LOG);
+ }
+ if(prog != null) {
+ prog.setCompleted(LOG);
+ }
+ // Cluster map, by lead object
+ TIntObjectHashMap<ModifiableDBIDs> map = new TIntObjectHashMap<>();
+ DBIDArrayIter i1 = ids.iter();
+ for(int i = 0; i1.valid(); i1.advance(), i++) {
+ int c = assignment[i];
+ // Add to cluster members:
+ ModifiableDBIDs cids = map.get(c);
+ if(cids == null) {
+ cids = DBIDUtil.newArray();
+ map.put(c, cids);
+ }
+ cids.add(i1);
+ }
+ // If we stopped early, the cluster lead might be in a different cluster.
+ for(TIntObjectIterator<ModifiableDBIDs> iter = map.iterator(); iter.hasNext();) {
+ iter.advance(); // Trove iterator; advance first!
+ final int key = iter.key();
+ int targetkey = key;
+ ModifiableDBIDs tids = null;
+ // Chase arrows:
+ while(ids == null && assignment[targetkey] != targetkey) {
+ targetkey = assignment[targetkey];
+ tids = map.get(targetkey);
+ }
+ if(tids != null && targetkey != key) {
+ tids.addDBIDs(iter.value());
+ iter.remove();
+ }
+ }
+
+ Clustering<MedoidModel> clustering = new Clustering<>("Affinity Propagation Clustering", "ap-clustering");
+ ModifiableDBIDs noise = DBIDUtil.newArray();
+ for(TIntObjectIterator<ModifiableDBIDs> iter = map.iterator(); iter.hasNext();) {
+ iter.advance(); // Trove iterator; advance first!
+ i1.seek(iter.key());
+ if(iter.value().size() > 1) {
+ MedoidModel mod = new MedoidModel(DBIDUtil.deref(i1));
+ clustering.addToplevelCluster(new Cluster<>(iter.value(), mod));
+ }
+ else {
+ noise.add(i1);
+ }
+ }
+ if(noise.size() > 0) {
+ MedoidModel mod = new MedoidModel(DBIDUtil.deref(noise.iter()));
+ clustering.addToplevelCluster(new Cluster<>(noise, true, mod));
+ }
+ return clustering;
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(initialization.getInputTypeRestriction());
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ *
+ * @param <O> object type
+ */
+ public static class Parameterizer<O> extends AbstractParameterizer {
+ /**
+ * Parameter for the similarity matrix initialization
+ */
+ public static final OptionID INITIALIZATION_ID = new OptionID("ap.initialization", "Similarity matrix initialization..");
+
+ /**
+ * Parameter for the dampening factor.
+ */
+ public static final OptionID LAMBDA_ID = new OptionID("ap.lambda", "Dampening factor lambda. Usually 0.5 to 1.");
+
+ /**
+ * Parameter for the convergence factor.
+ */
+ public static final OptionID CONVERGENCE_ID = new OptionID("ap.convergence", "Number of stable iterations for convergence.");
+
+ /**
+ * Parameter for the convergence factor.
+ */
+ public static final OptionID MAXITER_ID = new OptionID("ap.maxiter", "Maximum number of iterations.");
+
+ /**
+ * Initialization function for the similarity matrix.
+ */
+ AffinityPropagationInitialization<O> initialization;
+
+ /**
+ * Dampening parameter.
+ */
+ double lambda = .5;
+
+ /**
+ * Number of stable iterations for convergence.
+ */
+ int convergence;
+
+ /**
+ * Maximum number of iterations.
+ */
+ int maxiter;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ final ObjectParameter<AffinityPropagationInitialization<O>> param = new ObjectParameter<>(INITIALIZATION_ID, AffinityPropagationInitialization.class, DistanceBasedInitializationWithMedian.class);
+ if(config.grab(param)) {
+ initialization = param.instantiateClass(config);
+ }
+ final DoubleParameter lambdaP = new DoubleParameter(LAMBDA_ID, .5);
+ lambdaP.addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE);
+ lambdaP.addConstraint(CommonConstraints.LESS_THAN_ONE_DOUBLE);
+ if(config.grab(lambdaP)) {
+ lambda = lambdaP.doubleValue();
+ }
+ final IntParameter convergenceP = new IntParameter(CONVERGENCE_ID, 15);
+ convergenceP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
+ if(config.grab(convergenceP)) {
+ convergence = convergenceP.intValue();
+ }
+ final IntParameter maxiterP = new IntParameter(MAXITER_ID, 1000);
+ if(config.grab(maxiterP)) {
+ maxiter = maxiterP.intValue();
+ }
+ }
+
+ @Override
+ protected AffinityPropagationClusteringAlgorithm<O> makeInstance() {
+ return new AffinityPropagationClusteringAlgorithm<>(initialization, lambda, convergence, maxiter);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/affinitypropagation/AffinityPropagationInitialization.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/affinitypropagation/AffinityPropagationInitialization.java
new file mode 100644
index 00000000..5dbc54de
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/affinitypropagation/AffinityPropagationInitialization.java
@@ -0,0 +1,59 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.affinitypropagation;
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.Parameterizable;
+
+/**
+ * Initialization methods for affinity propagation.
+ *
+ * @author Erich Schubert
+ */
+public interface AffinityPropagationInitialization<O> extends Parameterizable {
+ /**
+ * Quantile to use for the diagonal entries.
+ */
+ public static final OptionID QUANTILE_ID = new OptionID("ap.quantile", "Quantile to use for diagonal entries.");
+
+ /**
+ * Compute the initial similarity matrix.
+ *
+ * @param db Database
+ * @param relation Data relation
+ * @param ids indexed DBIDs
+ * @return Similarity matrix
+ */
+ double[][] getSimilarityMatrix(Database db, Relation<O> relation, ArrayDBIDs ids);
+
+ /**
+ * Get the data type information for the similarity computations.
+ *
+ * @return Data type
+ */
+ TypeInformation getInputTypeRestriction();
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/affinitypropagation/DistanceBasedInitializationWithMedian.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/affinitypropagation/DistanceBasedInitializationWithMedian.java
new file mode 100644
index 00000000..2c8cabf9
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/affinitypropagation/DistanceBasedInitializationWithMedian.java
@@ -0,0 +1,148 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.affinitypropagation;
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter;
+import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.QuickSelect;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+
+/**
+ * Distance based initialization.
+ *
+ * @author Erich Schubert
+ *
+ * @param <O> Object type
+ * @param <D> Distance type
+ */
+public class DistanceBasedInitializationWithMedian<O, D extends NumberDistance<D, ?>> implements AffinityPropagationInitialization<O> {
+ /**
+ * Distance function.
+ */
+ DistanceFunction<? super O, D> distance;
+
+ /**
+ * Quantile to use.
+ */
+ double quantile;
+
+ /**
+ * Constructor.
+ *
+ * @param distance Similarity function
+ * @param quantile Quantile
+ */
+ public DistanceBasedInitializationWithMedian(DistanceFunction<? super O, D> distance, double quantile) {
+ super();
+ this.distance = distance;
+ this.quantile = quantile;
+ }
+
+ @Override
+ public double[][] getSimilarityMatrix(Database db, Relation<O> relation, ArrayDBIDs ids) {
+ final int size = ids.size();
+ DistanceQuery<O, D> dq = db.getDistanceQuery(relation, distance);
+ double[][] mat = new double[size][size];
+ double[] flat = new double[(size * (size - 1)) >> 1];
+ // TODO: optimize for double valued primitive distances.
+ DBIDArrayIter i1 = ids.iter(), i2 = ids.iter();
+ for (int i = 0, j = 0; i < size; i++, i1.advance()) {
+ double[] mati = mat[i];
+ i2.seek(i + 1);
+ for (int k = i + 1; k < size; k++, i2.advance()) {
+ mati[k] = -dq.distance(i1, i2).doubleValue();
+ mat[k][i] = mati[k]; // symmetry.
+ flat[j] = mati[k];
+ j++;
+ }
+ }
+ double median = QuickSelect.quantile(flat, quantile);
+ // On the diagonal, we place the median
+ for (int i = 0; i < size; i++) {
+ mat[i][i] = median;
+ }
+ return mat;
+ }
+
+ @Override
+ public TypeInformation getInputTypeRestriction() {
+ return distance.getInputTypeRestriction();
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ *
+ * @param <O> Object type
+ * @param <D> Distance type
+ */
+ public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractParameterizer {
+ /**
+ * Parameter for the distance function.
+ */
+ public static final OptionID DISTANCE_ID = new OptionID("ap.distance", "Distance function to use.");
+
+ /**
+ * istance function.
+ */
+ DistanceFunction<? super O, D> distance;
+
+ /**
+ * Quantile to use.
+ */
+ double quantile;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ ObjectParameter<DistanceFunction<? super O, D>> param = new ObjectParameter<>(DISTANCE_ID, DistanceFunction.class, SquaredEuclideanDistanceFunction.class);
+ if (config.grab(param)) {
+ distance = param.instantiateClass(config);
+ }
+
+ DoubleParameter quantileP = new DoubleParameter(QUANTILE_ID, .5);
+ if (config.grab(quantileP)) {
+ quantile = quantileP.doubleValue();
+ }
+ }
+
+ @Override
+ protected DistanceBasedInitializationWithMedian<O, D> makeInstance() {
+ return new DistanceBasedInitializationWithMedian<>(distance, quantile);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/affinitypropagation/SimilarityBasedInitializationWithMedian.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/affinitypropagation/SimilarityBasedInitializationWithMedian.java
new file mode 100644
index 00000000..a138da96
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/affinitypropagation/SimilarityBasedInitializationWithMedian.java
@@ -0,0 +1,153 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.affinitypropagation;
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter;
+import de.lmu.ifi.dbs.elki.database.query.similarity.SimilarityQuery;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
+import de.lmu.ifi.dbs.elki.distance.similarityfunction.SimilarityFunction;
+import de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.LinearKernelFunction;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.QuickSelect;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+
+/**
+ * Similarity based initialization.
+ *
+ * @author Erich Schubert
+ *
+ * @param <O> Object type
+ * @param <D> Distance type
+ */
+public class SimilarityBasedInitializationWithMedian<O, D extends NumberDistance<D, ?>> implements AffinityPropagationInitialization<O> {
+ /**
+ * Similarity function.
+ */
+ SimilarityFunction<? super O, D> similarity;
+
+ /**
+ * Quantile to use.
+ */
+ double quantile;
+
+ /**
+ * Constructor.
+ *
+ * @param similarity Similarity function
+ * @param quantile Quantile
+ */
+ public SimilarityBasedInitializationWithMedian(SimilarityFunction<? super O, D> similarity, double quantile) {
+ super();
+ this.similarity = similarity;
+ this.quantile = quantile;
+ }
+
+ @Override
+ public double[][] getSimilarityMatrix(Database db, Relation<O> relation, ArrayDBIDs ids) {
+ final int size = ids.size();
+ SimilarityQuery<O, D> sq = db.getSimilarityQuery(relation, similarity);
+ double[][] mat = new double[size][size];
+ double[] flat = new double[(size * (size - 1)) >> 1];
+ // TODO: optimize for double valued primitive distances.
+ DBIDArrayIter i1 = ids.iter(), i2 = ids.iter();
+ // Compute self-similarities first, for centering:
+ for (int i = 0; i < size; i++, i1.advance()) {
+ mat[i][i] = sq.similarity(i1, i1).doubleValue() * .5;
+ }
+ i1.seek(0);
+ for (int i = 0, j = 0; i < size; i++, i1.advance()) {
+ final double[] mati = mat[i]; // Probably faster access.
+ i2.seek(i + 1);
+ for (int k = i + 1; k < size; k++, i2.advance()) {
+ mati[k] = sq.similarity(i1, i2).doubleValue() - mati[i] - mat[k][k];
+ mat[k][i] = mati[k]; // symmetry.
+ flat[j] = mati[k];
+ j++;
+ }
+ }
+ double median = QuickSelect.quantile(flat, quantile);
+ // On the diagonal, we place the median
+ for (int i = 0; i < size; i++) {
+ mat[i][i] = median;
+ }
+ return mat;
+ }
+
+ @Override
+ public TypeInformation getInputTypeRestriction() {
+ return similarity.getInputTypeRestriction();
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ *
+ * @param <O> Object type
+ * @param <D> Distance type
+ */
+ public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractParameterizer {
+ /**
+ * Parameter for the similarity function.
+ */
+ public static final OptionID SIMILARITY_ID = new OptionID("ap.similarity", "Similarity function to use.");
+
+ /**
+ * Similarity function.
+ */
+ SimilarityFunction<? super O, D> similarity;
+
+ /**
+ * Quantile to use.
+ */
+ double quantile;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ ObjectParameter<SimilarityFunction<? super O, D>> param = new ObjectParameter<>(SIMILARITY_ID, SimilarityFunction.class, LinearKernelFunction.class);
+ if (config.grab(param)) {
+ similarity = param.instantiateClass(config);
+ }
+
+ DoubleParameter quantileP = new DoubleParameter(QUANTILE_ID, .5);
+ if (config.grab(quantileP)) {
+ quantile = quantileP.doubleValue();
+ }
+ }
+
+ @Override
+ protected SimilarityBasedInitializationWithMedian<O, D> makeInstance() {
+ return new SimilarityBasedInitializationWithMedian<>(similarity, quantile);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/affinitypropagation/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/affinitypropagation/package-info.java
new file mode 100644
index 00000000..bc6059ac
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/affinitypropagation/package-info.java
@@ -0,0 +1,27 @@
+/**
+ * Affinity Propagation (AP) clustering.
+ */
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package de.lmu.ifi.dbs.elki.algorithm.clustering.affinitypropagation; \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/biclustering/AbstractBiclustering.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/biclustering/AbstractBiclustering.java
new file mode 100644
index 00000000..8b875340
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/biclustering/AbstractBiclustering.java
@@ -0,0 +1,302 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.biclustering;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.BitSet;
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm;
+import de.lmu.ifi.dbs.elki.data.Cluster;
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.model.BiclusterModel;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
+import de.lmu.ifi.dbs.elki.utilities.BitsUtil;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.ExceptionMessages;
+
+/**
+ * Abstract class as a convenience for different biclustering approaches.
+ * <p/>
+ * The typically required values describing submatrices are computed using the
+ * corresponding values within a database of NumberVectors.
+ * <p/>
+ * The database is supposed to present a data matrix with a row representing an
+ * entry ({@link NumberVector}), a column representing a dimension (attribute)
+ * of the {@link NumberVector}s.
+ *
+ * @author Arthur Zimek
+ * @param <V> a certain subtype of NumberVector - the data matrix is supposed to
+ * consist of rows where each row relates to an object of type V and the
+ * columns relate to the attribute values of these objects
+ * @param <M> Cluster model type
+ */
+public abstract class AbstractBiclustering<V extends NumberVector<?>, M extends BiclusterModel> extends AbstractAlgorithm<Clustering<M>> implements ClusteringAlgorithm<Clustering<M>> {
+ /**
+ * Keeps the currently set database.
+ */
+ private Database database;
+
+ /**
+ * Relation we use.
+ */
+ protected Relation<V> relation;
+
+ /**
+ * Iterator to use for more efficient random access.
+ */
+ private DBIDArrayIter iter;
+
+ /**
+ * The row ids corresponding to the currently set {@link #relation}.
+ */
+ protected ArrayDBIDs rowIDs;
+
+ /**
+ * Column dimensionality.
+ */
+ private int colDim;
+
+ /**
+ * Constructor.
+ */
+ protected AbstractBiclustering() {
+ super();
+ }
+
+ /**
+ * Prepares the algorithm for running on a specific database.
+ * <p/>
+ * Assigns the database, the row ids, and the col ids, then calls
+ * {@link #biclustering()}.
+ * <p/>
+ * Any concrete algorithm should be implemented within method
+ * {@link #biclustering()} by an inheriting biclustering approach.
+ *
+ * @param relation Relation to process
+ * @return Clustering result
+ */
+ public final Clustering<M> run(Relation<V> relation) {
+ this.relation = relation;
+ if (this.relation == null || this.relation.size() == 0) {
+ throw new IllegalArgumentException(ExceptionMessages.DATABASE_EMPTY);
+ }
+ colDim = RelationUtil.dimensionality(relation);
+ rowIDs = DBIDUtil.ensureArray(this.relation.getDBIDs());
+ iter = rowIDs.iter();
+ return biclustering();
+ }
+
+ /**
+ * Run the actual biclustering algorithm.
+ * <p/>
+ * This method is supposed to be called only from the method
+ * {@link #run}.
+ * <p/>
+ */
+ protected abstract Clustering<M> biclustering();
+
+ /**
+ * Convert a bitset into integer column ids.
+ *
+ * @param cols
+ * @return integer column ids
+ */
+ protected int[] colsBitsetToIDs(BitSet cols) {
+ int[] colIDs = new int[cols.cardinality()];
+ int colsIndex = 0;
+ for (int i = cols.nextSetBit(0); i >= 0; i = cols.nextSetBit(i + 1)) {
+ colIDs[colsIndex] = i;
+ colsIndex++;
+ }
+ return colIDs;
+ }
+
+ /**
+ * Convert a bitset into integer row ids.
+ *
+ * @param rows
+ * @return integer row ids
+ */
+ protected ArrayDBIDs rowsBitsetToIDs(BitSet rows) {
+ ArrayModifiableDBIDs rowIDs = DBIDUtil.newArray(rows.cardinality());
+ DBIDArrayIter iter = this.rowIDs.iter();
+ for (int i = rows.nextSetBit(0); i >= 0; i = rows.nextSetBit(i + 1)) {
+ iter.seek(i);
+ rowIDs.add(iter);
+ }
+ return rowIDs;
+ }
+
+ /**
+ * Defines a Bicluster as given by the included rows and columns.
+ *
+ * @param rows the rows included in the Bicluster
+ * @param cols the columns included in the Bicluster
+ * @return a Bicluster as given by the included rows and columns
+ */
+ protected Cluster<BiclusterModel> defineBicluster(BitSet rows, BitSet cols) {
+ ArrayDBIDs rowIDs = rowsBitsetToIDs(rows);
+ int[] colIDs = colsBitsetToIDs(cols);
+ return new Cluster<>(rowIDs, new BiclusterModel(colIDs));
+ }
+
+ /**
+ * Defines a Bicluster as given by the included rows and columns.
+ *
+ * @param rows the rows included in the Bicluster
+ * @param cols the columns included in the Bicluster
+ * @return A Bicluster as given by the included rows and columns
+ */
+ protected Cluster<BiclusterModel> defineBicluster(long[] rows, long[] cols) {
+ ArrayDBIDs rowIDs = rowsBitsetToIDs(rows);
+ int[] colIDs = colsBitsetToIDs(cols);
+ return new Cluster<>(rowIDs, new BiclusterModel(colIDs));
+ }
+
+ /**
+ * Returns the value of the data matrix at row <code>row</code> and column
+ * <code>col</code>.
+ *
+ * @param row the row in the data matrix according to the current order of
+ * rows (refers to database entry
+ * <code>database.get(rowIDs[row])</code>)
+ * @param col the column in the data matrix according to the current order of
+ * rows (refers to the attribute value of an database entry
+ * <code>getValue(colIDs[col])</code>)
+ * @return the attribute value of the database entry as retrieved by
+ * <code>database.get(rowIDs[row]).getValue(colIDs[col])</code>
+ */
+ protected double valueAt(int row, int col) {
+ iter.seek(row);
+ return relation.get(iter).doubleValue(col);
+ }
+
+ /**
+ * Get the DBID of a certain row
+ *
+ * @param row Row number
+ * @return DBID of this row
+ * @deprecated Expensive!
+ */
+ @Deprecated
+ protected DBID getRowDBID(int row) {
+ return rowIDs.get(row);
+ }
+
+ /**
+ * Convert a bitset into integer column ids.
+ *
+ * @param cols
+ * @return integer column ids
+ */
+ protected int[] colsBitsetToIDs(long[] cols) {
+ int[] colIDs = new int[(int) BitsUtil.cardinality(cols)];
+ int colsIndex = 0;
+ for (int cpos = 0, clpos = 0; clpos < cols.length; ++clpos) {
+ long clong = cols[clpos];
+ if (clong == 0L) {
+ cpos += Long.SIZE;
+ continue;
+ }
+ for (int j = 0; j < Long.SIZE; ++j, ++cpos, clong >>>= 1) {
+ if ((clong & 1L) == 1L) {
+ colIDs[colsIndex] = cpos;
+ ++colsIndex;
+ }
+ }
+ }
+ return colIDs;
+ }
+
+ /**
+ * Convert a bitset into integer row ids.
+ *
+ * @param rows
+ * @return integer row ids
+ */
+ protected ArrayDBIDs rowsBitsetToIDs(long[] rows) {
+ ArrayModifiableDBIDs rowIDs = DBIDUtil.newArray((int) BitsUtil.cardinality(rows));
+ DBIDArrayIter iter = this.rowIDs.iter();
+ outer: for (int rlpos = 0; rlpos < rows.length; ++rlpos) {
+ long rlong = rows[rlpos];
+ // Fast skip blocks of 64 masked values.
+ if (rlong == 0L) {
+ iter.advance(Long.SIZE);
+ continue;
+ }
+ for (int i = 0; i < Long.SIZE; ++i, rlong >>>= 1, iter.advance()) {
+ if (!iter.valid()) {
+ break outer;
+ }
+ if ((rlong & 1L) == 1L) {
+ rowIDs.add(iter);
+ }
+ }
+ }
+ return rowIDs;
+ }
+
+ /**
+ * Provides the number of rows of the data matrix.
+ *
+ * @return the number of rows of the data matrix
+ */
+ protected int getRowDim() {
+ return this.rowIDs.size();
+ }
+
+ /**
+ * Provides the number of columns of the data matrix.
+ *
+ * @return the number of columns of the data matrix
+ */
+ protected int getColDim() {
+ return colDim;
+ }
+
+ /**
+ * Getter for database.
+ *
+ * @return database
+ */
+ public Database getDatabase() {
+ return database;
+ }
+
+ /**
+ * Getter for the relation.
+ *
+ * @return relation
+ */
+ public Relation<V> getRelation() {
+ return relation;
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/biclustering/ChengAndChurch.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/biclustering/ChengAndChurch.java
new file mode 100644
index 00000000..e110faff
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/biclustering/ChengAndChurch.java
@@ -0,0 +1,900 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.biclustering;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.Arrays;
+
+import de.lmu.ifi.dbs.elki.data.Cluster;
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.model.BiclusterWithInversionsModel;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.math.Mean;
+import de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution;
+import de.lmu.ifi.dbs.elki.math.statistics.distribution.UniformDistribution;
+import de.lmu.ifi.dbs.elki.utilities.BitsUtil;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+
+/**
+ * Perform Cheng and Church biclustering.
+ *
+ * <p>
+ * Reference: <br>
+ * Y. Cheng and G. M. Church. Biclustering of expression data. In Proceedings of
+ * the 8th International Conference on Intelligent Systems for Molecular Biology
+ * (ISMB), San Diego, CA, 2000.
+ * </p>
+ *
+ * @author Erich Schubert
+ * @param <V> Vector type.
+ */
+@Reference(authors = "Y. Cheng, G. M. Church", title = "Biclustering of expression data", booktitle = "Proc. 8th International Conference on Intelligent Systems for Molecular Biology (ISMB)")
+public class ChengAndChurch<V extends NumberVector<?>> extends AbstractBiclustering<V, BiclusterWithInversionsModel> {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging LOG = Logging.getLogger(ChengAndChurch.class);
+
+ /**
+ * The minimum number of columns that the database must have so that a removal
+ * of columns is performed in {@link #multipleNodeDeletion}.</p>
+ * <p>
+ * Just start deleting multiple columns when more than 100 columns are in the
+ * data matrix.
+ * </p>
+ */
+ private static final int MIN_COLUMN_REMOVE_THRESHOLD = 100;
+
+ /**
+ * The minimum number of rows that the database must have so that a removal of
+ * rows is performed in {@link #multipleNodeDeletion}.
+ * <p>
+ * Just start deleting multiple rows when more than 100 rows are in the data
+ * matrix.
+ * </p>
+ * <!--
+ * <p>
+ * The value is set to 100 as this is not really described in the paper.
+ * </p>
+ * -->
+ */
+ private static final int MIN_ROW_REMOVE_THRESHOLD = 100;
+
+ /**
+ * Threshold for the score.
+ */
+ private double delta;
+
+ /**
+ * The parameter for multiple node deletion.</p>
+ * <p>
+ * It is used to magnify the {@link #delta} value in the
+ * {@link #multipleNodeDeletion} method.
+ * </p>
+ */
+ private double alpha;
+
+ /**
+ * Number of biclusters to be found.
+ */
+ private int n;
+
+ /**
+ * Allow inversion of rows in the last phase.
+ */
+ private boolean useinverted = true;
+
+ /**
+ * Distribution to sample random replacement values from.
+ */
+ private Distribution dist;
+
+ /**
+ * Constructor.
+ *
+ * @param delta Delta parameter: desired quality
+ * @param alpha Alpha parameter: controls switching to single node deletion
+ * approach
+ * @param n Number of clusters to detect
+ * @param dist Distribution of random values to insert
+ */
+ public ChengAndChurch(double delta, double alpha, int n, Distribution dist) {
+ super();
+ this.delta = delta;
+ this.alpha = alpha;
+ this.n = n;
+ this.dist = dist;
+ }
+
+ /**
+ * Visitor pattern for processing cells.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static interface CellVisitor {
+ /** Different modes of operation. */
+ int ALL = 0, SELECTED = 1, NOT_SELECTED = 2;
+
+ /**
+ * Visit a cell.
+ *
+ * @param val Value
+ * @param row Row Number
+ * @param col Column number
+ * @param selrow Boolean, whether row is selected
+ * @param selcol Boolean, whether column is selected
+ * @return Stop flag, return {@code true} to stop visiting
+ */
+ public boolean visit(double val, int row, int col, boolean selrow, boolean selcol);
+ }
+
+ /**
+ * Bicluster candidate.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ protected static class BiclusterCandidate {
+ /**
+ * Cardinalities.
+ */
+ int rowcard, colcard;
+
+ /**
+ * Means.
+ */
+ double[] rowM, colM;
+
+ /**
+ * Row and column bitmasks.
+ */
+ long[] rows, irow, cols;
+
+ /**
+ * Mean of the current bicluster.
+ */
+ double allM;
+
+ /**
+ * The current bicluster score (mean squared residue).
+ */
+ double residue;
+
+ /**
+ * Constructor.
+ *
+ * @param rows Row dimensionality.
+ * @param cols Column dimensionality.
+ */
+ protected BiclusterCandidate(int rows, int cols) {
+ super();
+ this.rows = BitsUtil.ones(rows);
+ this.irow = BitsUtil.zero(rows);
+ this.rowcard = rows;
+ this.rowM = new double[rows];
+ this.cols = BitsUtil.ones(cols);
+ this.colcard = cols;
+ this.colM = new double[cols];
+ }
+
+ /**
+ * Resets the values for the next cluster search.
+ */
+ protected void reset() {
+ rows = BitsUtil.ones(rowM.length);
+ rowcard = rowM.length;
+ cols = BitsUtil.ones(colM.length);
+ colcard = colM.length;
+ BitsUtil.zeroI(irow);
+ }
+
+ /**
+ * Visit all selected cells in the data matrix.
+ *
+ * @param mat Data matrix
+ * @param mode Operation mode
+ * @param visitor Visitor function
+ */
+ protected void visitAll(double[][] mat, int mode, CellVisitor visitor) {
+ // For efficiency, we manually iterate over the rows and column bitmasks.
+ // This saves repeated shifting needed by the manual bit access.
+ for(int rpos = 0, rlpos = 0; rlpos < rows.length; ++rlpos) {
+ long rlong = rows[rlpos];
+ // Fast skip blocks of 64 masked values.
+ if((mode == CellVisitor.SELECTED && rlong == 0L) || (mode == CellVisitor.NOT_SELECTED && rlong == -1L)) {
+ rpos += Long.SIZE;
+ continue;
+ }
+ for(int i = 0; i < Long.SIZE && rpos < rowM.length; ++i, ++rpos, rlong >>>= 1) {
+ boolean rselected = ((rlong & 1L) == 1L);
+ if((mode == CellVisitor.SELECTED && !rselected) || (mode == CellVisitor.NOT_SELECTED && rselected)) {
+ continue;
+ }
+ for(int cpos = 0, clpos = 0; clpos < cols.length; ++clpos) {
+ long clong = cols[clpos];
+ if((mode == CellVisitor.SELECTED && clong == 0L) || (mode == CellVisitor.NOT_SELECTED && clong == -1L)) {
+ cpos += Long.SIZE;
+ continue;
+ }
+ for(int j = 0; j < Long.SIZE && cpos < colM.length; ++j, ++cpos, clong >>>= 1) {
+ boolean cselected = ((clong & 1L) == 1L);
+ if((mode == CellVisitor.SELECTED && !cselected) || (mode == CellVisitor.NOT_SELECTED && cselected)) {
+ continue;
+ }
+ boolean stop = visitor.visit(mat[rpos][cpos], rpos, cpos, rselected, cselected);
+ if(stop) {
+ return;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ /**
+ * Visit a column of the matrix.
+ *
+ * @param mat Data matrix
+ * @param col Column to visit
+ * @param mode Operation mode
+ * @param visitor Visitor function
+ */
+ protected void visitColumn(double[][] mat, int col, int mode, CellVisitor visitor) {
+ boolean cselected = BitsUtil.get(cols, col);
+ // For efficiency, we manually iterate over the rows and column bitmasks.
+ // This saves repeated shifting needed by the manual bit access.
+ for(int rpos = 0, rlpos = 0; rlpos < rows.length; ++rlpos) {
+ long rlong = rows[rlpos];
+ // Fast skip blocks of 64 masked values.
+ if(mode == CellVisitor.SELECTED && rlong == 0L) {
+ rpos += Long.SIZE;
+ continue;
+ }
+ if(mode == CellVisitor.NOT_SELECTED && rlong == -1L) {
+ rpos += Long.SIZE;
+ continue;
+ }
+ for(int i = 0; i < Long.SIZE && rpos < rowM.length; ++i, ++rpos, rlong >>>= 1) {
+ boolean rselected = ((rlong & 1L) == 1L);
+ if(mode == CellVisitor.SELECTED && !rselected) {
+ continue;
+ }
+ if(mode == CellVisitor.NOT_SELECTED && rselected) {
+ continue;
+ }
+ boolean stop = visitor.visit(mat[rpos][col], rpos, col, rselected, cselected);
+ if(stop) {
+ return;
+ }
+ }
+ }
+ }
+
+ /**
+ * Visit a row of the data matrix.
+ *
+ * @param mat Data matrix
+ * @param row Row to visit
+ * @param visitor Visitor function
+ */
+ protected void visitRow(double[][] mat, int row, int mode, CellVisitor visitor) {
+ boolean rselected = BitsUtil.get(rows, row);
+ final double[] rowdata = mat[row];
+ for(int cpos = 0, clpos = 0; clpos < cols.length; ++clpos) {
+ long clong = cols[clpos];
+ // Fast skip blocks of 64 masked values.
+ if(mode == CellVisitor.SELECTED && clong == 0L) {
+ cpos += Long.SIZE;
+ continue;
+ }
+ if(mode == CellVisitor.NOT_SELECTED && clong == -1L) {
+ cpos += Long.SIZE;
+ continue;
+ }
+ for(int j = 0; j < Long.SIZE && cpos < colM.length; ++j, ++cpos, clong >>>= 1) {
+ boolean cselected = ((clong & 1L) == 1L);
+ if(mode == CellVisitor.SELECTED && !cselected) {
+ continue;
+ }
+ if(mode == CellVisitor.NOT_SELECTED && cselected) {
+ continue;
+ }
+ boolean stop = visitor.visit(rowdata[cpos], row, cpos, rselected, cselected);
+ if(stop) {
+ return;
+ }
+ }
+ }
+ }
+
+ /** Visitor for updating the means. */
+ private final CellVisitor MEANVISITOR = new CellVisitor() {
+ @Override
+ public boolean visit(double val, int row, int col, boolean selrow, boolean selcol) {
+ if(selcol) {
+ rowM[row] += val;
+ }
+ if(selrow) {
+ colM[col] += val;
+ }
+ if(selcol && selrow) {
+ allM += val;
+ }
+ return false;
+ }
+ };
+
+ /**
+ * Update the row means and column means.
+ *
+ * @param mat Data matrix
+ * @param all Flag, to update all
+ * @return overall mean
+ */
+ protected double updateRowAndColumnMeans(final double[][] mat, boolean all) {
+ final int mode = all ? CellVisitor.ALL : CellVisitor.SELECTED;
+ Arrays.fill(rowM, 0.);
+ Arrays.fill(colM, 0.);
+ allM = 0.;
+ visitAll(mat, mode, MEANVISITOR);
+ visitColumn(mat, 0, mode, new CellVisitor() {
+ @Override
+ public boolean visit(double val, int row, int col, boolean selrow, boolean selcol) {
+ rowM[row] /= colcard;
+ return false;
+ }
+ });
+ visitRow(mat, 0, mode, new CellVisitor() {
+ @Override
+ public boolean visit(double val, int row, int col, boolean selrow, boolean selcol) {
+ colM[col] /= rowcard;
+ return false;
+ }
+ });
+ allM /= colcard * rowcard;
+ return allM;
+ }
+
+ /**
+ * Compute the mean square residue.
+ *
+ * @param mat Data matrix
+ * @return mean squared residue
+ */
+ protected double computeMeanSquaredDeviation(final double[][] mat) {
+ final Mean msr = new Mean();
+ visitAll(mat, CellVisitor.SELECTED, new CellVisitor() {
+ @Override
+ public boolean visit(double val, int row, int col, boolean selrow, boolean selcol) {
+ assert (selrow && selcol);
+ double v = val - rowM[row] - colM[col] + allM;
+ msr.put(v * v);
+ return false;
+ }
+ });
+ residue = msr.getMean();
+ return residue;
+ }
+
+ /**
+ * Computes the <b>mean row residue</b> of the given <code>row</code>.
+ *
+ * @param mat Data matrix
+ * @param row The row who's residue should be computed.
+ * @param rowinverted Indicates if the row should be considered inverted.
+ * @return The row residue of the given <code>row</code>.
+ */
+ protected double computeRowResidue(final double[][] mat, int row, final boolean rowinverted) {
+ final Mean rowResidue = new Mean();
+ visitRow(mat, row, CellVisitor.SELECTED, new CellVisitor() {
+ @Override
+ public boolean visit(double val, int row, int col, boolean selrow, boolean selcol) {
+ assert (selcol);
+ final double rowMean = rowM[row];
+ final double colMean = colM[col];
+ double v = ((!rowinverted) ? (val - rowMean) : (rowMean - val)) - colMean + allM;
+ rowResidue.put(v * v);
+ return false;
+ }
+ });
+ return rowResidue.getMean();
+ }
+
+ /**
+ *
+ * Computes the <b>mean column residue</b> of the given <code>col</code>.
+ *
+ * @param col The column who's residue should be computed.
+ * @return The row residue of the given <code>col</code>um.
+ */
+ protected double computeColResidue(final double[][] mat, final int col) {
+ final double bias = colM[col] - allM;
+ final Mean colResidue = new Mean();
+ visitColumn(mat, col, CellVisitor.SELECTED, new CellVisitor() {
+ @Override
+ public boolean visit(double val, int row, int col, boolean selrow, boolean selcol) {
+ assert (selrow);
+ final double rowMean = rowM[row];
+ double v = val - rowMean - bias;
+ colResidue.put(v * v);
+ return false;
+ }
+ });
+ return colResidue.getMean();
+ }
+
+ /**
+ * Updates the mask with replacement values for all data in the given rows
+ * and columns.
+ *
+ * @param mat Mask to update.
+ * @param replacement Distribution to sample replacement values from.
+ */
+ protected void maskMatrix(final double[][] mat, final Distribution replacement) {
+ visitAll(mat, CellVisitor.SELECTED, new CellVisitor() {
+ @Override
+ public boolean visit(double val, int row, int col, boolean selrow, boolean selcol) {
+ assert (selrow && selcol);
+ mat[row][col] = replacement.nextRandom();
+ return false;
+ }
+ });
+ }
+
+ /**
+ * Select or deselect a column.
+ *
+ * @param cnum Column to select
+ * @param set Value to set
+ */
+ protected void selectColumn(int cnum, boolean set) {
+ if(set) {
+ BitsUtil.setI(cols, cnum);
+ colcard++;
+ }
+ else {
+ BitsUtil.clearI(cols, cnum);
+ colcard--;
+ }
+ }
+
+ /**
+ * Select or deselect a row.
+ *
+ * @param rnum Row to select
+ * @param set Value to set
+ */
+ protected void selectRow(int rnum, boolean set) {
+ if(set) {
+ BitsUtil.setI(rows, rnum);
+ rowcard++;
+ }
+ else {
+ BitsUtil.clearI(rows, rnum);
+ rowcard--;
+ }
+ }
+
+ protected void invertRow(int rnum, boolean b) {
+ BitsUtil.setI(irow, rnum);
+ }
+ }
+
+ @Override
+ public Clustering<BiclusterWithInversionsModel> biclustering() {
+ double[][] mat = RelationUtil.relationAsMatrix(relation, rowIDs);
+
+ BiclusterCandidate cand = new BiclusterCandidate(getRowDim(), getColDim());
+
+ Clustering<BiclusterWithInversionsModel> result = new Clustering<>("Cheng-and-Church", "Cheng and Church Biclustering");
+ ModifiableDBIDs noise = DBIDUtil.newHashSet(relation.getDBIDs());
+
+ FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Extracting Cluster", n, LOG) : null;
+ for(int i = 0; i < n; i++) {
+ cand.reset();
+ multipleNodeDeletion(mat, cand);
+ if(LOG.isVeryVerbose()) {
+ LOG.veryverbose("Residue after Alg 2: " + cand.residue + " " + cand.rowcard + "x" + cand.colcard);
+ }
+ singleNodeDeletion(mat, cand);
+ if(LOG.isVeryVerbose()) {
+ LOG.veryverbose("Residue after Alg 1: " + cand.residue + " " + cand.rowcard + "x" + cand.colcard);
+ }
+ nodeAddition(mat, cand);
+ if(LOG.isVeryVerbose()) {
+ LOG.veryverbose("Residue after Alg 3: " + cand.residue + " " + cand.rowcard + "x" + cand.colcard);
+ }
+ cand.maskMatrix(mat, dist);
+ BiclusterWithInversionsModel model = new BiclusterWithInversionsModel(colsBitsetToIDs(cand.cols), rowsBitsetToIDs(cand.irow));
+ final ArrayDBIDs cids = rowsBitsetToIDs(cand.rows);
+ noise.removeDBIDs(cids);
+ result.addToplevelCluster(new Cluster<>(cids, model));
+
+ if(LOG.isVerbose()) {
+ LOG.verbose("Score of bicluster " + (i + 1) + ": " + cand.residue + "\n");
+ LOG.verbose("Number of rows: " + cand.rowcard + "\n");
+ LOG.verbose("Number of columns: " + cand.colcard + "\n");
+ // LOG.verbose("Total number of masked values: " + maskedVals.size() +
+ // "\n");
+ }
+ if(prog != null) {
+ prog.incrementProcessed(LOG);
+ }
+ }
+ // Add a noise cluster, full-dimensional.
+ if(!noise.isEmpty()) {
+ long[] allcols = BitsUtil.ones(getColDim());
+ BiclusterWithInversionsModel model = new BiclusterWithInversionsModel(colsBitsetToIDs(allcols), DBIDUtil.EMPTYDBIDS);
+ result.addToplevelCluster(new Cluster<>(noise, true, model));
+ }
+ if(prog != null) {
+ prog.ensureCompleted(LOG);
+ }
+ return result;
+ }
+
+ /**
+ * Algorithm 1 of Cheng and Church:
+ *
+ * Remove single rows or columns.
+ *
+ * Inverted rows are not supported in this method.
+ *
+ * @param mat Data matrix
+ * @param cand Bicluster candidate
+ */
+ private void singleNodeDeletion(final double[][] mat, final BiclusterCandidate cand) {
+ // Assume that cand.residue is up to date!
+ while(cand.residue > delta && (cand.colcard > 2 || cand.rowcard > 2)) {
+ // Store current maximum. Need final mutable, so use arrays.
+ final double[] max = { Double.NEGATIVE_INFINITY };
+ final int[] best = { -1, -1 };
+
+ // Test rows
+ if(cand.rowcard > 2) {
+ cand.visitColumn(mat, 0, CellVisitor.SELECTED, new CellVisitor() {
+ @Override
+ public boolean visit(double val, int row, int col, boolean selrow, boolean selcol) {
+ assert (selrow);
+ double rowResidue = cand.computeRowResidue(mat, row, false);
+ if(max[0] < rowResidue) {
+ max[0] = rowResidue;
+ best[0] = row;
+ }
+ return false;
+ }
+ });
+ }
+
+ // Test columns:
+ if(cand.colcard > 2) {
+ cand.visitRow(mat, 0, CellVisitor.SELECTED, new CellVisitor() {
+ @Override
+ public boolean visit(double val, int row, int col, boolean selrow, boolean selcol) {
+ assert (selcol);
+ double colResidue = cand.computeColResidue(mat, col);
+ if(max[0] < colResidue) {
+ max[0] = colResidue;
+ best[1] = col;
+ }
+ return false;
+ }
+ });
+ }
+
+ if(best[1] >= 0) { // then override bestrow!
+ cand.selectColumn(best[1], false);
+ }
+ else {
+ assert (best[0] >= 0);
+ cand.selectRow(best[0], false);
+ }
+ // TODO: incremental update could be much faster?
+ cand.updateRowAndColumnMeans(mat, false);
+ cand.computeMeanSquaredDeviation(mat);
+ if(LOG.isDebuggingFine()) {
+ LOG.debugFine("Residue in Alg 1: " + cand.residue + " " + cand.rowcard + "x" + cand.colcard);
+ }
+ }
+ }
+
+ //
+ /**
+ * Algorithm 2 of Cheng and Church.
+ *
+ * Remove all rows and columns that reduce the residue by alpha.
+ *
+ * Inverted rows are not supported in this method.
+ *
+ * @param mat Data matrix
+ * @param cand Bicluster candidate
+ */
+ private void multipleNodeDeletion(final double[][] mat, final BiclusterCandidate cand) {
+ cand.updateRowAndColumnMeans(mat, false);
+ cand.computeMeanSquaredDeviation(mat);
+
+ // Note: assumes that cand.residue = H(I,J)
+ while(cand.residue > delta) {
+ final boolean[] modified = { false, false };
+
+ // Step 2: remove rows above threshold
+ if(cand.rowcard > MIN_ROW_REMOVE_THRESHOLD) {
+ final double alphaResidue = alpha * cand.residue;
+ cand.visitColumn(mat, 0, CellVisitor.SELECTED, new CellVisitor() {
+ @Override
+ public boolean visit(double val, int row, int col, boolean selrow, boolean selcol) {
+ assert (selrow);
+ if(cand.computeRowResidue(mat, row, false) > alphaResidue) {
+ cand.selectRow(row, false);
+ modified[0] = true;
+ }
+ return (cand.rowcard > MIN_ROW_REMOVE_THRESHOLD);
+ }
+ });
+
+ // Step 3: update residue
+ if(modified[0]) {
+ cand.updateRowAndColumnMeans(mat, false);
+ cand.computeMeanSquaredDeviation(mat);
+ }
+ }
+
+ // Step 4: remove columns above threshold
+ if(cand.colcard > MIN_COLUMN_REMOVE_THRESHOLD) {
+ final double alphaResidue = alpha * cand.residue;
+ cand.visitRow(mat, 0, CellVisitor.SELECTED, new CellVisitor() {
+ @Override
+ public boolean visit(double val, int row, int col, boolean selrow, boolean selcol) {
+ assert (selcol);
+ if(cand.computeColResidue(mat, col) > alphaResidue) {
+ cand.selectColumn(col, false);
+ modified[1] = true;
+ }
+ return (cand.colcard > MIN_COLUMN_REMOVE_THRESHOLD);
+ }
+ });
+ if(modified[1]) {
+ cand.updateRowAndColumnMeans(mat, false);
+ cand.computeMeanSquaredDeviation(mat);
+ }
+ }
+
+ if(LOG.isDebuggingFine()) {
+ LOG.debugFine("Residue in Alg 2: " + cand.residue + " " + cand.rowcard + "x" + cand.colcard);
+ }
+ // Step 5: if nothing has been removed, try removing single nodes.
+ if(!modified[0] && !modified[1]) {
+ break;
+ // Will be executed next in main loop, as per algorithm 4.
+ // singleNodeDeletion();
+ }
+ }
+ }
+
+ /**
+ * Algorithm 3 of Cheng and Church.
+ *
+ * Try to re-add rows or columns that decrease the overall score.
+ *
+ * Also try adding inverted rows.
+ *
+ * @param mat Data matrix
+ * @param cand Bicluster candidate
+ */
+ private void nodeAddition(final double[][] mat, final BiclusterCandidate cand) {
+ cand.updateRowAndColumnMeans(mat, true);
+ cand.computeMeanSquaredDeviation(mat);
+ while(true) {
+ // We need this to be final + mutable
+ final boolean[] added = new boolean[] { false, false };
+
+ // Step 2: add columns
+ cand.visitRow(mat, 0, CellVisitor.NOT_SELECTED, new CellVisitor() {
+ @Override
+ public boolean visit(double val, int row, int col, boolean selrow, boolean selcol) {
+ assert (!selcol);
+ if(cand.computeColResidue(mat, col) <= cand.residue) {
+ cand.selectColumn(col, true);
+ added[0] = true;
+ }
+ return false;
+ }
+ });
+
+ // Step 3: recompute values
+ if(added[0]) {
+ cand.updateRowAndColumnMeans(mat, true);
+ cand.computeMeanSquaredDeviation(mat);
+ }
+
+ // Step 4: try adding rows.
+ cand.visitColumn(mat, 0, CellVisitor.NOT_SELECTED, new CellVisitor() {
+ @Override
+ public boolean visit(double val, int row, int col, boolean selrow, boolean selcol) {
+ assert (!selrow);
+ if(cand.computeRowResidue(mat, row, false) <= cand.residue) {
+ cand.selectRow(row, true);
+ added[1] = true;
+ }
+ return false;
+ }
+ });
+
+ // Step 5: try adding inverted rows.
+ if(useinverted) {
+ cand.visitColumn(mat, 0, CellVisitor.NOT_SELECTED, new CellVisitor() {
+ @Override
+ public boolean visit(double val, int row, int col, boolean selrow, boolean selcol) {
+ assert (!selrow);
+ if(cand.computeRowResidue(mat, row, true) <= cand.residue) {
+ cand.selectRow(row, true);
+ cand.invertRow(row, true);
+ added[1] = true;
+ }
+ return false;
+ }
+ });
+ }
+ if(added[1]) {
+ cand.updateRowAndColumnMeans(mat, true);
+ cand.computeMeanSquaredDeviation(mat);
+ if(LOG.isDebuggingFine()) {
+ LOG.debugFine("Residue in Alg 3: " + cand.residue + " " + cand.rowcard + "x" + cand.colcard);
+ }
+ }
+ if(!added[0] && !added[1]) {
+ break;
+ }
+ }
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(TypeUtil.NUMBER_VECTOR_FIELD);
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ *
+ * @param <V> Vector type
+ */
+ public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
+ /**
+ * Parameter to specify the distribution of replacement values when masking
+ * a cluster.
+ */
+ public static final OptionID DIST_ID = new OptionID("chengandchurch.replacement", "Distribution of replacement values when masking found clusters.");
+
+ /**
+ * Threshold value to determine the maximal acceptable score (mean squared
+ * residue) of a bicluster.
+ * <p/>
+ * Key: {@code -chengandchurch.delta}
+ * </p>
+ */
+ public static final OptionID DELTA_ID = new OptionID("chengandchurch.delta", "Threshold value to determine the maximal acceptable score (mean squared residue) of a bicluster.");
+
+ /**
+ * Parameter for multiple node deletion to accelerate the algorithm. (&gt;=
+ * 1)
+ * <p/>
+ * Key: {@code -chengandchurch.alpha}
+ * </p>
+ */
+ public static final OptionID ALPHA_ID = new OptionID("chengandchurch.alpha", "Parameter for multiple node deletion to accelerate the algorithm.");
+
+ /**
+ * Number of biclusters to be found.
+ * <p/>
+ * Default value: 1
+ * </p>
+ * <p/>
+ * Key: {@code -chengandchurch.n}
+ * </p>
+ */
+ public static final OptionID N_ID = new OptionID("chengandchurch.n", "The number of biclusters to be found.");
+
+ /**
+ * Threshold for the score ({@link #DELTA_ID}).
+ */
+ private double delta;
+
+ /**
+ * The parameter for multiple node deletion.</p>
+ * <p>
+ * It is used to magnify the {@link #delta} value in the
+ * {@link ChengAndChurch#multipleNodeDeletion} method.
+ * </p>
+ */
+ private double alpha;
+
+ /**
+ * Number of biclusters to be found.
+ */
+ private int n;
+
+ /**
+ * Distribution of replacement values.
+ */
+ private Distribution dist;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ DoubleParameter deltaP = new DoubleParameter(DELTA_ID);
+ if(config.grab(deltaP)) {
+ delta = deltaP.doubleValue();
+ }
+ deltaP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE);
+
+ IntParameter nP = new IntParameter(N_ID, 1);
+ nP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
+ if(config.grab(nP)) {
+ n = nP.intValue();
+ }
+
+ DoubleParameter alphaP = new DoubleParameter(ALPHA_ID, 1.);
+ alphaP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_DOUBLE);
+ if(config.grab(alphaP)) {
+ alpha = alphaP.doubleValue();
+ }
+
+ ObjectParameter<Distribution> distP = new ObjectParameter<>(DIST_ID, Distribution.class, UniformDistribution.class);
+ if(config.grab(distP)) {
+ dist = distP.instantiateClass(config);
+ }
+ }
+
+ @Override
+ protected ChengAndChurch<V> makeInstance() {
+ return new ChengAndChurch<>(delta, alpha, n, dist);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/biclustering/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/biclustering/package-info.java
new file mode 100644
index 00000000..21363bfc
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/biclustering/package-info.java
@@ -0,0 +1,28 @@
+/**
+ * <p>Biclustering algorithms.</p>
+ *
+ *
+ */
+/*
+This file is part of ELKI:
+Environment for Developing KDD-Applications Supported by Index-Structures
+
+Copyright (C) 2013
+Ludwig-Maximilians-Universität München
+Lehr- und Forschungseinheit für Datenbanksysteme
+ELKI Development Team
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+package de.lmu.ifi.dbs.elki.algorithm.clustering.biclustering; \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/CASH.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/CASH.java
index 0d82add9..8e5fa627 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/CASH.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/CASH.java
@@ -74,7 +74,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
@@ -838,22 +838,22 @@ public class CASH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
IntParameter minptsP = new IntParameter(MINPTS_ID);
- minptsP.addConstraint(new GreaterConstraint(0));
+ minptsP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
if (config.grab(minptsP)) {
minpts = minptsP.getValue();
}
IntParameter maxlevelP = new IntParameter(MAXLEVEL_ID);
- maxlevelP.addConstraint(new GreaterConstraint(0));
+ maxlevelP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
if (config.grab(maxlevelP)) {
maxlevel = maxlevelP.getValue();
}
IntParameter mindimP = new IntParameter(MINDIM_ID, 1);
- mindimP.addConstraint(new GreaterConstraint(0));
+ mindimP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
if (config.grab(mindimP)) {
mindim = mindimP.getValue();
}
DoubleParameter jitterP = new DoubleParameter(JITTER_ID);
- jitterP.addConstraint(new GreaterConstraint(0));
+ jitterP.addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE);
if (config.grab(jitterP)) {
jitter = jitterP.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/COPAC.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/COPAC.java
index 9a4b8512..68878aef 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/COPAC.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/COPAC.java
@@ -29,7 +29,7 @@ import java.util.Map;
import java.util.Map.Entry;
import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
-import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.DistanceBasedAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm;
import de.lmu.ifi.dbs.elki.data.Cluster;
import de.lmu.ifi.dbs.elki.data.Clustering;
@@ -270,7 +270,7 @@ public class COPAC<V extends NumberVector<?>, D extends Distance<D>> extends Abs
public ClusteringAlgorithm<Clustering<Model>> getPartitionAlgorithm(DistanceQuery<V, D> query) {
ListParameterization reconfig = new ListParameterization(partitionAlgorithmParameters);
ProxyDistanceFunction<V, D> dist = ProxyDistanceFunction.proxy(query);
- reconfig.addParameter(AbstractDistanceBasedAlgorithm.DISTANCE_FUNCTION_ID, dist);
+ reconfig.addParameter(DistanceBasedAlgorithm.DISTANCE_FUNCTION_ID, dist);
ClusteringAlgorithm<Clustering<Model>> instance = reconfig.tryInstantiate(partitionAlgorithm);
reconfig.failOnErrors();
return instance;
@@ -335,7 +335,7 @@ public class COPAC<V extends NumberVector<?>, D extends Distance<D>> extends Abs
ClassParameter<ClusteringAlgorithm<Clustering<Model>>> algP = new ClassParameter<>(PARTITION_ALGORITHM_ID, ClusteringAlgorithm.class);
if(config.grab(algP)) {
ListParameterization predefined = new ListParameterization();
- predefined.addParameter(AbstractDistanceBasedAlgorithm.DISTANCE_FUNCTION_ID, pdistI);
+ predefined.addParameter(DistanceBasedAlgorithm.DISTANCE_FUNCTION_ID, pdistI);
TrackParameters trackpar = new TrackParameters(config);
ChainedParameterization chain = new ChainedParameterization(predefined, trackpar);
chain.errorsTo(config);
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/HiCO.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/HiCO.java
index d1b714bf..79ddc16e 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/HiCO.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/HiCO.java
@@ -36,9 +36,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ChainedParameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
@@ -162,33 +160,34 @@ public class HiCO<V extends NumberVector<?>> extends OPTICS<V, PCACorrelationDis
super.makeOptions(config);
IntParameter muP = new IntParameter(MU_ID);
- muP.addConstraint(new GreaterConstraint(0));
- if (config.grab(muP)) {
+ muP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
+ if(config.grab(muP)) {
mu = muP.getValue();
}
IntParameter kP = new IntParameter(K_ID);
- kP.addConstraint(new GreaterConstraint(0));
+ kP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
kP.setOptional(true);
final int k;
- if (config.grab(kP)) {
+ if(config.grab(kP)) {
k = kP.getValue();
- } else {
+ }
+ else {
k = mu;
}
DoubleParameter deltaP = new DoubleParameter(DELTA_ID, DEFAULT_DELTA);
- deltaP.addConstraint(new GreaterEqualConstraint(0));
+ deltaP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE);
double delta = DEFAULT_DELTA;
- if (config.grab(deltaP)) {
+ if(config.grab(deltaP)) {
delta = deltaP.doubleValue();
}
DoubleParameter alphaP = new DoubleParameter(ALPHA_ID, DEFAULT_ALPHA);
- alphaP.addConstraint(new GreaterConstraint(0.0));
- alphaP.addConstraint(new LessConstraint(1.0));
+ alphaP.addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE);
+ alphaP.addConstraint(CommonConstraints.LESS_THAN_ONE_DOUBLE);
double alpha = DEFAULT_ALPHA;
- if (config.grab(alphaP)) {
+ if(config.grab(alphaP)) {
alpha = alphaP.doubleValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/LMCLUS.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/LMCLUS.java
index f9531be0..99144b42 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/LMCLUS.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/LMCLUS.java
@@ -55,7 +55,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -116,7 +116,7 @@ public class LMCLUS extends AbstractAlgorithm<Clustering<Model>> {
* Number of sampling rounds to find a good split
*/
private final int samplingLevel;
-
+
/**
* Random factory
*/
@@ -163,34 +163,34 @@ public class LMCLUS extends AbstractAlgorithm<Clustering<Model>> {
FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Clustered objects", relation.size(), LOG) : null;
IndefiniteProgress cprogress = LOG.isVerbose() ? new IndefiniteProgress("Clusters found", LOG) : null;
ModifiableDBIDs unclustered = DBIDUtil.newHashSet(relation.getDBIDs());
- Random r = rnd.getRandom();
+ Random r = rnd.getSingleThreadedRandom();
final int maxdim = Math.min(maxLMDim, RelationUtil.dimensionality(relation));
int cnum = 0;
- while (unclustered.size() > minsize) {
+ while(unclustered.size() > minsize) {
DBIDs current = unclustered;
int lmDim = 1;
- for (int k = 1; k <= maxdim; k++) {
+ for(int k = 1; k <= maxdim; k++) {
// Implementation note: this while loop is from the original publication
// and the published LMCLUS source code. It doesn't make sense to me -
// it is lacking a stop criterion other than "cluster is too small" and
// "cluster is inseparable"! Additionally, there is good criterion for
// stopping at the appropriate dimensionality either.
- while (true) {
+ while(true) {
Separation separation = findSeparation(relation, current, k, r);
// logger.verbose("k: " + k + " goodness: " + separation.goodness +
// " threshold: " + separation.threshold);
- if (separation.goodness <= sensitivityThreshold) {
+ if(separation.goodness <= sensitivityThreshold) {
break;
}
ModifiableDBIDs subset = DBIDUtil.newArray(current.size());
- for (DBIDIter iter = current.iter(); iter.valid(); iter.advance()) {
- if (deviation(relation.get(iter).getColumnVector().minusEquals(separation.originV), separation.basis) < separation.threshold) {
+ for(DBIDIter iter = current.iter(); iter.valid(); iter.advance()) {
+ if(deviation(relation.get(iter).getColumnVector().minusEquals(separation.originV), separation.basis) < separation.threshold) {
subset.add(iter);
}
}
// logger.verbose("size:"+subset.size());
- if (subset.size() < minsize) {
+ if(subset.size() < minsize) {
break;
}
current = subset;
@@ -199,7 +199,7 @@ public class LMCLUS extends AbstractAlgorithm<Clustering<Model>> {
}
}
// No more clusters found
- if (current.size() < minsize || current == unclustered) {
+ if(current.size() < minsize || current == unclustered) {
break;
}
// New cluster found
@@ -210,22 +210,22 @@ public class LMCLUS extends AbstractAlgorithm<Clustering<Model>> {
ret.addToplevelCluster(cluster);
// Remove from main working set.
unclustered.removeDBIDs(current);
- if (progress != null) {
+ if(progress != null) {
progress.setProcessed(relation.size() - unclustered.size(), LOG);
}
- if (cprogress != null) {
+ if(cprogress != null) {
cprogress.setProcessed(cnum, LOG);
}
}
// Remaining objects are noise
- if (unclustered.size() > 0) {
+ if(unclustered.size() > 0) {
ret.addToplevelCluster(new Cluster<>(unclustered, true));
}
- if (progress != null) {
+ if(progress != null) {
progress.setProcessed(relation.size(), LOG);
progress.ensureCompleted(LOG);
}
- if (cprogress != null) {
+ if(cprogress != null) {
cprogress.setCompleted(LOG);
}
return ret;
@@ -272,7 +272,7 @@ public class LMCLUS extends AbstractAlgorithm<Clustering<Model>> {
int samples = (int) Math.min(Math.log(NOT_FROM_ONE_CLUSTER_PROBABILITY) / (Math.log(1 - Math.pow((1.0d / samplingLevel), dimension))), (double) currentids.size());
// System.out.println("Number of samples: " + samples);
int remaining_retries = 100;
- for (int i = 1; i <= samples; i++) {
+ for(int i = 1; i <= samples; i++) {
DBIDs sample = DBIDUtil.randomSample(currentids, dimension + 1, r.nextLong());
final DBIDIter iter = sample.iter();
// Use first as origin
@@ -282,17 +282,17 @@ public class LMCLUS extends AbstractAlgorithm<Clustering<Model>> {
Matrix basis;
{
List<Vector> vectors = new ArrayList<>(sample.size() - 1);
- for (; iter.valid(); iter.advance()) {
+ for(; iter.valid(); iter.advance()) {
Vector vec = relation.get(iter).getColumnVector();
vectors.add(vec.minusEquals(originV));
}
// generate orthogonal basis
basis = generateOrthonormalBasis(vectors);
- if (basis == null) {
+ if(basis == null) {
// new sample has to be taken.
i--;
remaining_retries--;
- if (remaining_retries < 0) {
+ if(remaining_retries < 0) {
throw new AbortException("Too many retries in sampling, and always a linear dependant data set.");
}
continue;
@@ -301,9 +301,9 @@ public class LMCLUS extends AbstractAlgorithm<Clustering<Model>> {
// Generate and fill a histogram.
DoubleDynamicHistogram histogram = new DoubleDynamicHistogram(BINS);
double w = 1.0 / currentids.size();
- for (DBIDIter iter2 = currentids.iter(); iter2.valid(); iter2.advance()) {
+ for(DBIDIter iter2 = currentids.iter(); iter2.valid(); iter2.advance()) {
// Skip sampled points
- if (sample.contains(iter2)) {
+ if(sample.contains(iter2)) {
continue;
}
Vector vec = relation.get(iter2).getColumnVector().minusEquals(originV);
@@ -311,7 +311,7 @@ public class LMCLUS extends AbstractAlgorithm<Clustering<Model>> {
histogram.increment(distance, w);
}
double[] th = findAndEvaluateThreshold(histogram); // evaluate threshold
- if (th[1] > separation.goodness) {
+ if(th[1] > separation.goodness) {
separation.goodness = th[1];
separation.threshold = th[0];
separation.originV = originV;
@@ -341,16 +341,16 @@ public class LMCLUS extends AbstractAlgorithm<Clustering<Model>> {
first = first.times(1.0 / first.euclideanLength());
Matrix ret = new Matrix(first.getDimensionality(), vectors.size());
ret.setCol(0, first);
- for (int i = 1; i < vectors.size(); i++) {
+ for(int i = 1; i < vectors.size(); i++) {
// System.out.println("Matrix:" + ret);
Vector v_i = vectors.get(i);
Vector u_i = v_i.copy();
// System.out.println("Vector " + i + ":" + partialSol);
- for (int j = 0; j < i; j++) {
+ for(int j = 0; j < i; j++) {
Vector v_j = ret.getCol(j);
double f = v_i.transposeTimes(v_j) / v_j.transposeTimes(v_j);
- if (Double.isNaN(f)) {
- if (LOG.isDebuggingFine()) {
+ if(Double.isNaN(f)) {
+ if(LOG.isDebuggingFine()) {
LOG.debugFine("Zero vector encountered? " + v_j);
}
return null;
@@ -359,8 +359,8 @@ public class LMCLUS extends AbstractAlgorithm<Clustering<Model>> {
}
// check if the vectors weren't independent
final double len_u_i = u_i.euclideanLength();
- if (len_u_i == 0.0) {
- if (LOG.isDebuggingFine()) {
+ if(len_u_i == 0.0) {
+ if(LOG.isDebuggingFine()) {
LOG.debugFine("Points not independent - no orthonormalization.");
}
return null;
@@ -391,7 +391,7 @@ public class LMCLUS extends AbstractAlgorithm<Clustering<Model>> {
{
MeanVariance mv = new MeanVariance();
DoubleHistogram.Iter forward = histogram.iter();
- for (int i = 0; forward.valid(); i++, forward.advance()) {
+ for(int i = 0; forward.valid(); i++, forward.advance()) {
p1[i] = forward.getValue() + ((i > 0) ? p1[i - 1] : 0);
mv.put(i, forward.getValue());
mu1[i] = mv.getMean();
@@ -404,7 +404,7 @@ public class LMCLUS extends AbstractAlgorithm<Clustering<Model>> {
DoubleHistogram.Iter backwards = histogram.iter();
backwards.seek(histogram.getNumBins() - 1); // Seek to last
- for (int j = n - 1; backwards.valid(); j--, backwards.retract()) {
+ for(int j = n - 1; backwards.valid(); j--, backwards.retract()) {
p2[j] = backwards.getValue() + ((j + 1 < n) ? p2[j + 1] : 0);
mv.put(j, backwards.getValue());
mu2[j] = mv.getMean();
@@ -412,7 +412,7 @@ public class LMCLUS extends AbstractAlgorithm<Clustering<Model>> {
}
}
- for (int i = 0; i < n; i++) {
+ for(int i = 0; i < n; i++) {
jt[i] = 1.0 + 2 * (p1[i] * (Math.log(sigma1[i]) - Math.log(p1[i])) + p2[i] * (Math.log(sigma2[i]) - Math.log(p2[i])));
}
@@ -420,23 +420,23 @@ public class LMCLUS extends AbstractAlgorithm<Clustering<Model>> {
double bestgoodness = Double.NEGATIVE_INFINITY;
double devPrev = jt[1] - jt[0];
- for (int i = 1; i < jt.length - 1; i++) {
+ for(int i = 1; i < jt.length - 1; i++) {
double devCur = jt[i + 1] - jt[i];
// System.out.println(p1[i]);
// System.out.println(jt[i + 1]);
// System.out.println(jt[i]);
// System.out.println(devCur);
// Local minimum found - calculate depth
- if (devCur >= 0 && devPrev <= 0) {
+ if(devCur >= 0 && devPrev <= 0) {
double lowestMaxima = Double.POSITIVE_INFINITY;
- for (int j = i - 1; j > 0; j--) {
- if (jt[j - 1] < jt[j]) {
+ for(int j = i - 1; j > 0; j--) {
+ if(jt[j - 1] < jt[j]) {
lowestMaxima = Math.min(lowestMaxima, jt[j]);
break;
}
}
- for (int j = i + 1; j < n - 2; j++) {
- if (jt[j + 1] < jt[j]) {
+ for(int j = i + 1; j < n - 2; j++) {
+ if(jt[j + 1] < jt[j]) {
lowestMaxima = Math.min(lowestMaxima, jt[j]);
break;
}
@@ -445,11 +445,11 @@ public class LMCLUS extends AbstractAlgorithm<Clustering<Model>> {
final double mud = mu1[i] - mu2[i];
double discriminability = mud * mud / (sigma1[i] * sigma1[i] + sigma2[i] * sigma2[i]);
- if (Double.isNaN(discriminability)) {
+ if(Double.isNaN(discriminability)) {
discriminability = -1;
}
double goodness = localDepth * discriminability;
- if (goodness > bestgoodness) {
+ if(goodness > bestgoodness) {
bestgoodness = goodness;
bestpos = i;
}
@@ -552,7 +552,7 @@ public class LMCLUS extends AbstractAlgorithm<Clustering<Model>> {
* Threshold
*/
private double threshold;
-
+
/**
* Random generator
*/
@@ -562,26 +562,26 @@ public class LMCLUS extends AbstractAlgorithm<Clustering<Model>> {
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
IntParameter maxLMDimP = new IntParameter(MAXDIM_ID);
- maxLMDimP.addConstraint(new GreaterEqualConstraint(1));
+ maxLMDimP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
maxLMDimP.setOptional(true);
- if (config.grab(maxLMDimP)) {
+ if(config.grab(maxLMDimP)) {
maxdim = maxLMDimP.getValue();
}
IntParameter minsizeP = new IntParameter(MINSIZE_ID);
- minsizeP.addConstraint(new GreaterEqualConstraint(1));
- if (config.grab(minsizeP)) {
+ minsizeP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
+ if(config.grab(minsizeP)) {
minsize = minsizeP.getValue();
}
IntParameter samplingLevelP = new IntParameter(SAMPLINGL_ID, 100);
- if (config.grab(samplingLevelP)) {
+ if(config.grab(samplingLevelP)) {
samplingLevel = samplingLevelP.getValue();
}
DoubleParameter sensivityThresholdP = new DoubleParameter(THRESHOLD_ID);
- if (config.grab(sensivityThresholdP)) {
+ if(config.grab(sensivityThresholdP)) {
threshold = sensivityThresholdP.getValue();
}
RandomParameter rndP = new RandomParameter(RANDOM_ID);
- if (config.grab(rndP)) {
+ if(config.grab(rndP)) {
rnd = rndP.getValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ORCLUS.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ORCLUS.java
index a9c67a58..7733ddaa 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ORCLUS.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ORCLUS.java
@@ -61,8 +61,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
@@ -135,7 +134,7 @@ public class ORCLUS<V extends NumberVector<?>> extends AbstractProjectedClusteri
// current dimensionality associated with each seed
int dim_c = RelationUtil.dimensionality(relation);
- if (dim_c < l) {
+ if(dim_c < l) {
throw new IllegalStateException("Dimensionality of data < parameter l! " + "(" + dim_c + " < " + l + ")");
}
@@ -149,8 +148,8 @@ public class ORCLUS<V extends NumberVector<?>> extends AbstractProjectedClusteri
IndefiniteProgress cprogress = LOG.isVerbose() ? new IndefiniteProgress("Current number of clusters:", LOG) : null;
- while (k_c > k) {
- if (cprogress != null) {
+ while(k_c > k) {
+ if(cprogress != null) {
cprogress.setProcessed(clusters.size(), LOG);
}
@@ -158,8 +157,8 @@ public class ORCLUS<V extends NumberVector<?>> extends AbstractProjectedClusteri
assign(relation, distFunc, clusters);
// determine current subspace associated with each cluster
- for (ORCLUSCluster cluster : clusters) {
- if (cluster.objectIDs.size() > 0) {
+ for(ORCLUSCluster cluster : clusters) {
+ if(cluster.objectIDs.size() > 0) {
cluster.basis = findBasis(relation, distFunc, cluster, dim_c);
}
}
@@ -172,18 +171,19 @@ public class ORCLUS<V extends NumberVector<?>> extends AbstractProjectedClusteri
}
assign(relation, distFunc, clusters);
- if (cprogress != null) {
+ if(cprogress != null) {
cprogress.setProcessed(clusters.size());
cprogress.setCompleted(LOG);
}
// get the result
Clustering<Model> r = new Clustering<>("ORCLUS clustering", "orclus-clustering");
- for (ORCLUSCluster c : clusters) {
+ for(ORCLUSCluster c : clusters) {
r.addToplevelCluster(new Cluster<Model>(c.objectIDs, ClusterModel.CLUSTER));
}
return r;
- } catch (Exception e) {
+ }
+ catch(Exception e) {
throw new IllegalStateException(e);
}
}
@@ -199,7 +199,7 @@ public class ORCLUS<V extends NumberVector<?>> extends AbstractProjectedClusteri
DBIDs randomSample = DBIDUtil.randomSample(database.getDBIDs(), k, rnd);
NumberVector.Factory<V, ?> factory = RelationUtil.getNumberVectorFactory(database);
List<ORCLUSCluster> seeds = new ArrayList<>();
- for (DBIDIter iter = randomSample.iter(); iter.valid(); iter.advance()) {
+ for(DBIDIter iter = randomSample.iter(); iter.valid(); iter.advance()) {
seeds.add(new ORCLUSCluster(database.get(iter), iter, factory));
}
return seeds;
@@ -217,29 +217,29 @@ public class ORCLUS<V extends NumberVector<?>> extends AbstractProjectedClusteri
private void assign(Relation<V> database, DistanceQuery<V, DoubleDistance> distFunc, List<ORCLUSCluster> clusters) {
NumberVector.Factory<V, ?> factory = RelationUtil.getNumberVectorFactory(database);
// clear the current clusters
- for (ORCLUSCluster cluster : clusters) {
+ for(ORCLUSCluster cluster : clusters) {
cluster.objectIDs.clear();
}
// projected centroids of the clusters
List<V> projectedCentroids = new ArrayList<>(clusters.size());
- for (ORCLUSCluster c : clusters) {
+ for(ORCLUSCluster c : clusters) {
projectedCentroids.add(projection(c, c.centroid, factory));
}
// for each data point o do
- for (DBIDIter it = database.iterDBIDs(); it.valid(); it.advance()) {
+ for(DBIDIter it = database.iterDBIDs(); it.valid(); it.advance()) {
V o = database.get(it);
DoubleDistance minDist = null;
ORCLUSCluster minCluster = null;
// determine projected distance between o and cluster
- for (int i = 0; i < clusters.size(); i++) {
+ for(int i = 0; i < clusters.size(); i++) {
ORCLUSCluster c = clusters.get(i);
V o_proj = projection(c, o, factory);
DoubleDistance dist = distFunc.distance(o_proj, projectedCentroids.get(i));
- if (minDist == null || minDist.compareTo(dist) > 0) {
+ if(minDist == null || minDist.compareTo(dist) > 0) {
minDist = dist;
minCluster = c;
}
@@ -250,8 +250,8 @@ public class ORCLUS<V extends NumberVector<?>> extends AbstractProjectedClusteri
}
// recompute the seed in each clusters
- for (ORCLUSCluster cluster : clusters) {
- if (cluster.objectIDs.size() > 0) {
+ for(ORCLUSCluster cluster : clusters) {
+ if(cluster.objectIDs.size() > 0) {
cluster.centroid = Centroid.make(database, cluster.objectIDs).toVector(database);
}
}
@@ -271,7 +271,7 @@ public class ORCLUS<V extends NumberVector<?>> extends AbstractProjectedClusteri
// covariance matrix of cluster
// Matrix covariance = Util.covarianceMatrix(database, cluster.objectIDs);
GenericDistanceDBIDList<DoubleDistance> results = new GenericDistanceDBIDList<>(cluster.objectIDs.size());
- for (DBIDIter it = cluster.objectIDs.iter(); it.valid(); it.advance()) {
+ for(DBIDIter it = cluster.objectIDs.iter(); it.valid(); it.advance()) {
DoubleDistance distance = distFunc.distance(cluster.centroid, database.get(it));
results.add(distance, it);
}
@@ -304,9 +304,9 @@ public class ORCLUS<V extends NumberVector<?>> extends AbstractProjectedClusteri
*/
private void merge(Relation<V> database, DistanceQuery<V, DoubleDistance> distFunc, List<ORCLUSCluster> clusters, int k_new, int d_new, IndefiniteProgress cprogress) {
ArrayList<ProjectedEnergy> projectedEnergies = new ArrayList<>();
- for (int i = 0; i < clusters.size(); i++) {
- for (int j = 0; j < clusters.size(); j++) {
- if (i >= j) {
+ for(int i = 0; i < clusters.size(); i++) {
+ for(int j = 0; j < clusters.size(); j++) {
+ if(i >= j) {
continue;
}
// projected energy of c_ij in subspace e_ij
@@ -318,8 +318,8 @@ public class ORCLUS<V extends NumberVector<?>> extends AbstractProjectedClusteri
}
}
- while (clusters.size() > k_new) {
- if (cprogress != null) {
+ while(clusters.size() > k_new) {
+ if(cprogress != null) {
cprogress.setProcessed(clusters.size(), LOG);
}
// find the smallest value of r_ij
@@ -327,12 +327,12 @@ public class ORCLUS<V extends NumberVector<?>> extends AbstractProjectedClusteri
// renumber the clusters by replacing cluster c_i with cluster c_ij
// and discarding cluster c_j
- for (int c = 0; c < clusters.size(); c++) {
- if (c == minPE.i) {
+ for(int c = 0; c < clusters.size(); c++) {
+ if(c == minPE.i) {
clusters.remove(c);
clusters.add(c, minPE.cluster);
}
- if (c == minPE.j) {
+ if(c == minPE.j) {
clusters.remove(c);
}
}
@@ -341,15 +341,16 @@ public class ORCLUS<V extends NumberVector<?>> extends AbstractProjectedClusteri
int i = minPE.i;
int j = minPE.j;
Iterator<ProjectedEnergy> it = projectedEnergies.iterator();
- while (it.hasNext()) {
+ while(it.hasNext()) {
ProjectedEnergy pe = it.next();
- if (pe.i == i || pe.i == j || pe.j == i || pe.j == j) {
+ if(pe.i == i || pe.i == j || pe.j == i || pe.j == j) {
it.remove();
- } else {
- if (pe.i > j) {
+ }
+ else {
+ if(pe.i > j) {
pe.i -= 1;
}
- if (pe.j > j) {
+ if(pe.j > j) {
pe.j -= 1;
}
}
@@ -357,10 +358,11 @@ public class ORCLUS<V extends NumberVector<?>> extends AbstractProjectedClusteri
// ... and recompute them
ORCLUSCluster c_ij = minPE.cluster;
- for (int c = 0; c < clusters.size(); c++) {
- if (c < i) {
+ for(int c = 0; c < clusters.size(); c++) {
+ if(c < i) {
projectedEnergies.add(projectedEnergy(database, distFunc, clusters.get(c), c_ij, c, i, d_new));
- } else if (c > i) {
+ }
+ else if(c > i) {
projectedEnergies.add(projectedEnergy(database, distFunc, clusters.get(c), c_ij, i, c, d_new));
}
}
@@ -389,7 +391,7 @@ public class ORCLUS<V extends NumberVector<?>> extends AbstractProjectedClusteri
double sum = 0.;
V c_proj = projection(c_ij, c_ij.centroid, factory);
- for (DBIDIter iter = c_ij.objectIDs.iter(); iter.valid(); iter.advance()) {
+ for(DBIDIter iter = c_ij.objectIDs.iter(); iter.valid(); iter.advance()) {
V o_proj = projection(c_ij, database.get(iter), factory);
double dist = distFunc.distance(o_proj, c_proj).doubleValue();
sum += dist * dist;
@@ -417,15 +419,16 @@ public class ORCLUS<V extends NumberVector<?>> extends AbstractProjectedClusteri
// convert into array.
c.objectIDs = DBIDUtil.newArray(c.objectIDs);
- if (c.objectIDs.size() > 0) {
+ if(c.objectIDs.size() > 0) {
c.centroid = Centroid.make(relation, c.objectIDs).toVector(relation);
c.basis = findBasis(relation, distFunc, c, dim);
- } else {
+ }
+ else {
NumberVector.Factory<V, ?> factory = RelationUtil.getNumberVectorFactory(relation);
Vector cent = c1.centroid.getColumnVector().plusEquals(c2.centroid.getColumnVector()).timesEquals(0.5);
c.centroid = factory.newNumberVector(cent.getArrayRef());
double[][] doubles = new double[c1.basis.getRowDimensionality()][dim];
- for (int i = 0; i < dim; i++) {
+ for(int i = 0; i < dim; i++) {
doubles[i][i] = 1;
}
c.basis = new Matrix(doubles);
@@ -590,16 +593,16 @@ public class ORCLUS<V extends NumberVector<?>> extends AbstractProjectedClusteri
protected void configAlpha(Parameterization config) {
DoubleParameter alphaP = new DoubleParameter(ALPHA_ID, 0.5);
- alphaP.addConstraint(new GreaterConstraint(0));
- alphaP.addConstraint(new LessEqualConstraint(1));
- if (config.grab(alphaP)) {
+ alphaP.addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE);
+ alphaP.addConstraint(CommonConstraints.LESS_EQUAL_ONE_DOUBLE);
+ if(config.grab(alphaP)) {
alpha = alphaP.doubleValue();
}
}
protected void configSeed(Parameterization config) {
RandomParameter rndP = new RandomParameter(SEED_ID);
- if (config.grab(rndP)) {
+ if(config.grab(rndP)) {
rnd = rndP.getValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/EpsilonNeighborPredicate.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/EpsilonNeighborPredicate.java
index 545a8171..1b316c7c 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/EpsilonNeighborPredicate.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/EpsilonNeighborPredicate.java
@@ -23,7 +23,8 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.gdbscan;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.DistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.clustering.DBSCAN;
import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
@@ -67,12 +68,12 @@ public class EpsilonNeighborPredicate<O, D extends Distance<D>> implements Neigh
/**
* Range to query with
*/
- D epsilon;
+ protected D epsilon;
/**
* Distance function to use
*/
- DistanceFunction<O, D> distFunc;
+ protected DistanceFunction<O, D> distFunc;
/**
* Full constructor.
@@ -177,14 +178,14 @@ public class EpsilonNeighborPredicate<O, D extends Distance<D>> implements Neigh
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
// Get a distance function.
- ObjectParameter<DistanceFunction<O, D>> distanceP = new ObjectParameter<>(AbstractDistanceBasedAlgorithm.DISTANCE_FUNCTION_ID, DistanceFunction.class, EuclideanDistanceFunction.class);
+ ObjectParameter<DistanceFunction<O, D>> distanceP = new ObjectParameter<>(DistanceBasedAlgorithm.DISTANCE_FUNCTION_ID, DistanceFunction.class, EuclideanDistanceFunction.class);
D distanceFactory = null;
if(config.grab(distanceP)) {
distfun = distanceP.instantiateClass(config);
distanceFactory = distfun.getDistanceFactory();
}
// Get the epsilon parameter
- DistanceParameter<D> epsilonP = new DistanceParameter<>(de.lmu.ifi.dbs.elki.algorithm.clustering.DBSCAN.EPSILON_ID, distanceFactory);
+ DistanceParameter<D> epsilonP = new DistanceParameter<>(DBSCAN.Parameterizer.EPSILON_ID, distanceFactory);
if(config.grab(epsilonP)) {
epsilon = epsilonP.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/MinPtsCorePredicate.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/MinPtsCorePredicate.java
index a6e62e2e..ac7ba81d 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/MinPtsCorePredicate.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/MinPtsCorePredicate.java
@@ -23,6 +23,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.gdbscan;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+import de.lmu.ifi.dbs.elki.algorithm.clustering.DBSCAN;
import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
@@ -54,7 +55,7 @@ public class MinPtsCorePredicate implements CorePredicate {
/**
* The minpts parameter.
*/
- int minpts;
+ protected int minpts;
/**
* Default constructor.
@@ -127,7 +128,7 @@ public class MinPtsCorePredicate implements CorePredicate {
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
// Get the minpts parameter
- IntParameter minptsP = new IntParameter(de.lmu.ifi.dbs.elki.algorithm.clustering.DBSCAN.MINPTS_ID);
+ IntParameter minptsP = new IntParameter(DBSCAN.Parameterizer.MINPTS_ID);
if(config.grab(minptsP)) {
minpts = minptsP.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/ExtractFlatClusteringFromHierarchy.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/ExtractFlatClusteringFromHierarchy.java
index ac5cb77c..f6dbc88f 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/ExtractFlatClusteringFromHierarchy.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/ExtractFlatClusteringFromHierarchy.java
@@ -55,7 +55,7 @@ import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DistanceParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.EnumParameter;
@@ -178,9 +178,10 @@ public class ExtractFlatClusteringFromHierarchy<D extends Distance<D>> implement
DataStore<D> lambda = pointerresult.getParentDistanceStore();
Clustering<DendrogramModel<D>> result;
- if (lambda instanceof DoubleDistanceDataStore) {
+ if(lambda instanceof DoubleDistanceDataStore) {
result = extractClustersDouble(ids, pi, (DoubleDistanceDataStore) lambda);
- } else {
+ }
+ else {
result = extractClusters(ids, pi, lambda);
}
result.addChildResult(pointerresult);
@@ -208,28 +209,31 @@ public class ExtractFlatClusteringFromHierarchy<D extends Distance<D>> implement
DBIDArrayIter it = order.iter(); // Used multiple times!
int split;
- if (minclusters > 0) {
+ if(minclusters > 0) {
split = Math.max(ids.size() - minclusters, 0);
// Stop distance:
final D stopdist = lambda.get(order.get(split));
// Tie handling: decrement split.
- while (split > 0) {
+ while(split > 0) {
it.seek(split - 1);
- if (stopdist.compareTo(lambda.get(it)) <= 0) {
+ if(stopdist.compareTo(lambda.get(it)) <= 0) {
split--;
- } else {
+ }
+ else {
break;
}
}
- } else if (threshold != null) {
+ }
+ else if(threshold != null) {
split = ids.size();
it.seek(split - 1);
- while (threshold.compareTo(lambda.get(it)) <= 0 && it.valid()) {
+ while(threshold.compareTo(lambda.get(it)) <= 0 && it.valid()) {
split--;
it.retract();
}
- } else { // full hierarchy
+ }
+ else { // full hierarchy
split = 0;
}
@@ -242,19 +246,20 @@ public class ExtractFlatClusteringFromHierarchy<D extends Distance<D>> implement
DBIDVar succ = DBIDUtil.newVar(); // Variable for successor.
// Go backwards on the lower part.
- for (it.seek(split - 1); it.valid(); it.retract()) {
+ for(it.seek(split - 1); it.valid(); it.retract()) {
D dist = lambda.get(it); // Distance to successor
pi.assignVar(it, succ); // succ = pi(it)
int clusterid = cluster_map.intValue(succ);
// Successor cluster has already been created:
- if (clusterid >= 0) {
+ if(clusterid >= 0) {
cluster_dbids.get(clusterid).add(it);
cluster_map.putInt(it, clusterid);
// Update distance to maximum encountered:
- if (cluster_dist.get(clusterid).compareTo(dist) < 0) {
+ if(cluster_dist.get(clusterid).compareTo(dist) < 0) {
cluster_dist.set(clusterid, dist);
}
- } else {
+ }
+ else {
// Need to start a new cluster:
clusterid = cluster_dbids.size(); // next cluster number.
ModifiableDBIDs cids = DBIDUtil.newArray();
@@ -270,12 +275,12 @@ public class ExtractFlatClusteringFromHierarchy<D extends Distance<D>> implement
}
// Decrement counter
- if (progress != null) {
+ if(progress != null) {
progress.incrementProcessed(LOG);
}
}
final Clustering<DendrogramModel<D>> dendrogram;
- switch(outputmode) {
+ switch(outputmode){
case PARTIAL_HIERARCHY: {
// Build a hierarchy out of these clusters.
dendrogram = new Clustering<>("Hierarchical Clustering", "hierarchical-clustering");
@@ -284,74 +289,81 @@ public class ExtractFlatClusteringFromHierarchy<D extends Distance<D>> implement
// Convert initial clusters to cluster objects
{
int i = 0;
- for (DBIDIter it2 = cluster_leads.iter(); it2.valid(); it2.advance(), i++) {
+ for(DBIDIter it2 = cluster_leads.iter(); it2.valid(); it2.advance(), i++) {
clusters.add(makeCluster(it2, cluster_dist.get(i), cluster_dbids.get(i)));
}
cluster_dist = null; // Invalidate
cluster_dbids = null; // Invalidate
}
// Process the upper part, bottom-up.
- for (it.seek(split); it.valid(); it.advance()) {
+ for(it.seek(split); it.valid(); it.advance()) {
int clusterid = cluster_map.intValue(it);
// The current cluster led by the current element:
final Cluster<DendrogramModel<D>> clus;
- if (clusterid >= 0) {
+ if(clusterid >= 0) {
clus = clusters.get(clusterid);
- } else if (!singletons && ids.size() != 1) {
+ }
+ else if(!singletons && ids.size() != 1) {
clus = null;
- } else {
+ }
+ else {
clus = makeCluster(it, null, DBIDUtil.deref(it));
}
// The successor to join:
pi.assignVar(it, succ); // succ = pi(it)
- if (DBIDUtil.equal(it, succ)) {
+ if(DBIDUtil.equal(it, succ)) {
assert (root == null);
root = clus;
- } else {
+ }
+ else {
// Parent cluster:
int parentid = cluster_map.intValue(succ);
D depth = lambda.get(it);
// Parent cluster exists - merge as a new cluster:
- if (parentid >= 0) {
+ if(parentid >= 0) {
final Cluster<DendrogramModel<D>> pclus = clusters.get(parentid);
- if (pclus.getModel().getDistance().equals(depth)) {
- if (clus == null) {
+ if(pclus.getModel().getDistance().equals(depth)) {
+ if(clus == null) {
((ModifiableDBIDs) pclus.getIDs()).add(it);
- } else {
+ }
+ else {
dendrogram.addChildCluster(pclus, clus);
}
- } else {
+ }
+ else {
// Merge at new depth:
ModifiableDBIDs cids = DBIDUtil.newArray(clus == null ? 1 : 0);
- if (clus == null) {
+ if(clus == null) {
cids.add(it);
}
Cluster<DendrogramModel<D>> npclus = makeCluster(succ, depth, cids);
- if (clus != null) {
+ if(clus != null) {
dendrogram.addChildCluster(npclus, clus);
}
dendrogram.addChildCluster(npclus, pclus);
// Replace existing parent cluster: new depth
clusters.set(parentid, npclus);
}
- } else {
+ }
+ else {
// Merge with parent at this depth:
final Cluster<DendrogramModel<D>> pclus;
- if (!singletons) {
+ if(!singletons) {
ModifiableDBIDs cids = DBIDUtil.newArray(clus == null ? 2 : 1);
cids.add(succ);
- if (clus == null) {
+ if(clus == null) {
cids.add(it);
}
// New cluster for parent and/or new point
pclus = makeCluster(succ, depth, cids);
- } else {
+ }
+ else {
// Create a new, one-element cluster for parent, and a merged
// cluster on top.
pclus = makeCluster(succ, depth, DBIDUtil.EMPTYDBIDS);
dendrogram.addChildCluster(pclus, makeCluster(succ, null, DBIDUtil.deref(succ)));
}
- if (clus != null) {
+ if(clus != null) {
dendrogram.addChildCluster(pclus, clus);
}
// Store cluster:
@@ -362,7 +374,7 @@ public class ExtractFlatClusteringFromHierarchy<D extends Distance<D>> implement
}
// Decrement counter
- if (progress != null) {
+ if(progress != null) {
progress.incrementProcessed(LOG);
}
}
@@ -377,21 +389,21 @@ public class ExtractFlatClusteringFromHierarchy<D extends Distance<D>> implement
// Convert initial clusters to cluster objects
{
int i = 0;
- for (DBIDIter it2 = cluster_leads.iter(); it2.valid(); it2.advance(), i++) {
+ for(DBIDIter it2 = cluster_leads.iter(); it2.valid(); it2.advance(), i++) {
dendrogram.addToplevelCluster(makeCluster(it2, cluster_dist.get(i), cluster_dbids.get(i)));
}
cluster_dist = null; // Invalidate
cluster_dbids = null; // Invalidate
}
// Process the upper part, bottom-up.
- for (it.seek(split); it.valid(); it.advance()) {
+ for(it.seek(split); it.valid(); it.advance()) {
int clusterid = cluster_map.intValue(it);
- if (clusterid < 0) {
+ if(clusterid < 0) {
dendrogram.addToplevelCluster(makeCluster(it, null, DBIDUtil.deref(it)));
}
// Decrement counter
- if (progress != null) {
+ if(progress != null) {
progress.incrementProcessed(LOG);
}
}
@@ -401,7 +413,7 @@ public class ExtractFlatClusteringFromHierarchy<D extends Distance<D>> implement
throw new AbortException("Unsupported output mode.");
}
- if (progress != null) {
+ if(progress != null) {
progress.ensureCompleted(LOG);
}
@@ -428,29 +440,32 @@ public class ExtractFlatClusteringFromHierarchy<D extends Distance<D>> implement
DBIDArrayIter it = order.iter(); // Used multiple times!
int split;
- if (minclusters > 0) {
+ if(minclusters > 0) {
split = Math.max(ids.size() - minclusters, 0);
// Stop distance:
final double stopdist = lambda.doubleValue(order.get(split));
// Tie handling: decrement split.
- while (split > 0) {
+ while(split > 0) {
it.seek(split - 1);
- if (stopdist <= lambda.doubleValue(it)) {
+ if(stopdist <= lambda.doubleValue(it)) {
split--;
- } else {
+ }
+ else {
break;
}
}
- } else if (threshold != null) {
+ }
+ else if(threshold != null) {
split = ids.size();
it.seek(split - 1);
double stopdist = ((DoubleDistance) threshold).doubleValue();
- while (stopdist <= lambda.doubleValue(it) && it.valid()) {
+ while(stopdist <= lambda.doubleValue(it) && it.valid()) {
split--;
it.retract();
}
- } else { // full hierarchy
+ }
+ else { // full hierarchy
split = 0;
}
@@ -463,19 +478,20 @@ public class ExtractFlatClusteringFromHierarchy<D extends Distance<D>> implement
DBIDVar succ = DBIDUtil.newVar(); // Variable for successor.
// Go backwards on the lower part.
- for (it.seek(split - 1); it.valid(); it.retract()) {
+ for(it.seek(split - 1); it.valid(); it.retract()) {
double dist = lambda.doubleValue(it); // Distance to successor
pi.assignVar(it, succ); // succ = pi(it)
int clusterid = cluster_map.intValue(succ);
// Successor cluster has already been created:
- if (clusterid >= 0) {
+ if(clusterid >= 0) {
cluster_dbids.get(clusterid).add(it);
cluster_map.putInt(it, clusterid);
// Update distance to maximum encountered:
- if (cluster_dist.get(clusterid) < dist) {
+ if(cluster_dist.get(clusterid) < dist) {
cluster_dist.set(clusterid, dist);
}
- } else {
+ }
+ else {
// Need to start a new cluster:
clusterid = cluster_dbids.size(); // next cluster number.
ModifiableDBIDs cids = DBIDUtil.newArray();
@@ -491,12 +507,12 @@ public class ExtractFlatClusteringFromHierarchy<D extends Distance<D>> implement
}
// Decrement counter
- if (progress != null) {
+ if(progress != null) {
progress.incrementProcessed(LOG);
}
}
final Clustering<DendrogramModel<D>> dendrogram;
- switch(outputmode) {
+ switch(outputmode){
case PARTIAL_HIERARCHY: {
// Build a hierarchy out of these clusters.
dendrogram = new Clustering<>("Hierarchical Clustering", "hierarchical-clustering");
@@ -505,7 +521,7 @@ public class ExtractFlatClusteringFromHierarchy<D extends Distance<D>> implement
// Convert initial clusters to cluster objects
{
int i = 0;
- for (DBIDIter it2 = cluster_leads.iter(); it2.valid(); it2.advance(), i++) {
+ for(DBIDIter it2 = cluster_leads.iter(); it2.valid(); it2.advance(), i++) {
@SuppressWarnings("unchecked")
D depth = (D) new DoubleDistance(cluster_dist.get(i));
clusters.add(makeCluster(it2, depth, cluster_dbids.get(i)));
@@ -514,68 +530,75 @@ public class ExtractFlatClusteringFromHierarchy<D extends Distance<D>> implement
cluster_dbids = null; // Invalidate
}
// Process the upper part, bottom-up.
- for (it.seek(split); it.valid(); it.advance()) {
+ for(it.seek(split); it.valid(); it.advance()) {
int clusterid = cluster_map.intValue(it);
// The current cluster led by the current element:
final Cluster<DendrogramModel<D>> clus;
- if (clusterid >= 0) {
+ if(clusterid >= 0) {
clus = clusters.get(clusterid);
- } else if (!singletons && ids.size() != 1) {
+ }
+ else if(!singletons && ids.size() != 1) {
clus = null;
- } else {
+ }
+ else {
clus = makeCluster(it, null, DBIDUtil.deref(it));
}
// The successor to join:
pi.assignVar(it, succ); // succ = pi(it)
- if (DBIDUtil.equal(it, succ)) {
+ if(DBIDUtil.equal(it, succ)) {
assert (root == null);
root = clus;
- } else {
+ }
+ else {
// Parent cluster:
int parentid = cluster_map.intValue(succ);
@SuppressWarnings("unchecked")
D depth = (D) new DoubleDistance(lambda.doubleValue(it));
// Parent cluster exists - merge as a new cluster:
- if (parentid >= 0) {
+ if(parentid >= 0) {
final Cluster<DendrogramModel<D>> pclus = clusters.get(parentid);
- if (pclus.getModel().getDistance().equals(depth)) {
- if (clus == null) {
+ if(pclus.getModel().getDistance().equals(depth)) {
+ if(clus == null) {
((ModifiableDBIDs) pclus.getIDs()).add(it);
- } else {
+ }
+ else {
dendrogram.addChildCluster(pclus, clus);
}
- } else {
+ }
+ else {
// Merge at new depth:
ModifiableDBIDs cids = DBIDUtil.newArray(clus == null ? 1 : 0);
- if (clus == null) {
+ if(clus == null) {
cids.add(it);
}
Cluster<DendrogramModel<D>> npclus = makeCluster(succ, depth, cids);
- if (clus != null) {
+ if(clus != null) {
dendrogram.addChildCluster(npclus, clus);
}
dendrogram.addChildCluster(npclus, pclus);
// Replace existing parent cluster: new depth
clusters.set(parentid, npclus);
}
- } else {
+ }
+ else {
// Merge with parent at this depth:
final Cluster<DendrogramModel<D>> pclus;
- if (!singletons) {
+ if(!singletons) {
ModifiableDBIDs cids = DBIDUtil.newArray(clus == null ? 2 : 1);
cids.add(succ);
- if (clus == null) {
+ if(clus == null) {
cids.add(it);
}
// New cluster for parent and/or new point
pclus = makeCluster(succ, depth, cids);
- } else {
+ }
+ else {
// Create a new, one-element cluster for parent, and a merged
// cluster on top.
pclus = makeCluster(succ, depth, DBIDUtil.EMPTYDBIDS);
dendrogram.addChildCluster(pclus, makeCluster(succ, null, DBIDUtil.deref(succ)));
}
- if (clus != null) {
+ if(clus != null) {
dendrogram.addChildCluster(pclus, clus);
}
// Store cluster:
@@ -586,7 +609,7 @@ public class ExtractFlatClusteringFromHierarchy<D extends Distance<D>> implement
}
// Decrement counter
- if (progress != null) {
+ if(progress != null) {
progress.incrementProcessed(LOG);
}
}
@@ -601,7 +624,7 @@ public class ExtractFlatClusteringFromHierarchy<D extends Distance<D>> implement
// Convert initial clusters to cluster objects
{
int i = 0;
- for (DBIDIter it2 = cluster_leads.iter(); it2.valid(); it2.advance(), i++) {
+ for(DBIDIter it2 = cluster_leads.iter(); it2.valid(); it2.advance(), i++) {
@SuppressWarnings("unchecked")
D depth = (D) new DoubleDistance(cluster_dist.get(i));
dendrogram.addToplevelCluster(makeCluster(it2, depth, cluster_dbids.get(i)));
@@ -610,14 +633,14 @@ public class ExtractFlatClusteringFromHierarchy<D extends Distance<D>> implement
cluster_dbids = null; // Invalidate
}
// Process the upper part, bottom-up.
- for (it.seek(split); it.valid(); it.advance()) {
+ for(it.seek(split); it.valid(); it.advance()) {
int clusterid = cluster_map.intValue(it);
- if (clusterid < 0) {
+ if(clusterid < 0) {
dendrogram.addToplevelCluster(makeCluster(it, null, DBIDUtil.deref(it)));
}
// Decrement counter
- if (progress != null) {
+ if(progress != null) {
progress.incrementProcessed(LOG);
}
}
@@ -627,7 +650,7 @@ public class ExtractFlatClusteringFromHierarchy<D extends Distance<D>> implement
throw new AbortException("Unsupported output mode.");
}
- if (progress != null) {
+ if(progress != null) {
progress.ensureCompleted(LOG);
}
@@ -644,13 +667,16 @@ public class ExtractFlatClusteringFromHierarchy<D extends Distance<D>> implement
*/
private Cluster<DendrogramModel<D>> makeCluster(DBIDRef lead, D depth, DBIDs members) {
final String name;
- if (members.size() == 0) {
+ if(members.size() == 0) {
name = "mrg_" + DBIDUtil.toString(lead) + "_" + depth;
- } else if (depth != null && depth.isInfiniteDistance() || (members.size() == 1 && members.contains(lead))) {
+ }
+ else if(depth != null && depth.isInfiniteDistance() || (members.size() == 1 && members.contains(lead))) {
name = "obj_" + DBIDUtil.toString(lead);
- } else if (depth != null) {
+ }
+ else if(depth != null) {
name = "clu_" + DBIDUtil.toString(lead) + "_" + depth;
- } else {
+ }
+ else {
// Complete data set only?
name = "clu_" + DBIDUtil.toString(lead);
}
@@ -794,53 +820,54 @@ public class ExtractFlatClusteringFromHierarchy<D extends Distance<D>> implement
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
ObjectParameter<HierarchicalClusteringAlgorithm<D>> algorithmP = new ObjectParameter<>(AlgorithmStep.Parameterizer.ALGORITHM_ID, HierarchicalClusteringAlgorithm.class);
- if (config.grab(algorithmP)) {
+ if(config.grab(algorithmP)) {
algorithm = algorithmP.instantiateClass(config);
}
EnumParameter<ThresholdMode> modeP = new EnumParameter<>(MODE_ID, ThresholdMode.class, ThresholdMode.BY_MINCLUSTERS);
- if (config.grab(modeP)) {
+ if(config.grab(modeP)) {
thresholdmode = modeP.getValue();
}
- if (thresholdmode == null || ThresholdMode.BY_MINCLUSTERS.equals(thresholdmode)) {
+ if(thresholdmode == null || ThresholdMode.BY_MINCLUSTERS.equals(thresholdmode)) {
IntParameter minclustersP = new IntParameter(MINCLUSTERS_ID);
- minclustersP.addConstraint(new GreaterEqualConstraint(1));
- if (config.grab(minclustersP)) {
+ minclustersP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
+ if(config.grab(minclustersP)) {
minclusters = minclustersP.intValue();
}
}
- if (thresholdmode == null || ThresholdMode.BY_THRESHOLD.equals(thresholdmode)) {
+ if(thresholdmode == null || ThresholdMode.BY_THRESHOLD.equals(thresholdmode)) {
// Fallback to double when no algorithm chosen yet:
@SuppressWarnings("unchecked")
final D factory = algorithm != null ? algorithm.getDistanceFactory() : (D) DoubleDistance.FACTORY;
DistanceParameter<D> distP = new DistanceParameter<>(THRESHOLD_ID, factory);
- if (config.grab(distP)) {
+ if(config.grab(distP)) {
threshold = distP.getValue();
}
}
- if (thresholdmode == null || !ThresholdMode.NO_THRESHOLD.equals(thresholdmode)) {
+ if(thresholdmode == null || !ThresholdMode.NO_THRESHOLD.equals(thresholdmode)) {
EnumParameter<OutputMode> outputP = new EnumParameter<>(OUTPUTMODE_ID, OutputMode.class);
- if (config.grab(outputP)) {
+ if(config.grab(outputP)) {
outputmode = outputP.getValue();
}
- } else {
+ }
+ else {
// This becomes full hierarchy:
minclusters = -1;
outputmode = OutputMode.PARTIAL_HIERARCHY;
}
Flag singletonsF = new Flag(SINGLETONS_ID);
- if (config.grab(singletonsF)) {
+ if(config.grab(singletonsF)) {
singletons = singletonsF.isTrue();
}
}
@Override
protected ExtractFlatClusteringFromHierarchy<D> makeInstance() {
- switch(thresholdmode) {
+ switch(thresholdmode){
case NO_THRESHOLD:
case BY_MINCLUSTERS:
return new ExtractFlatClusteringFromHierarchy<>(algorithm, minclusters, outputmode, singletons);
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeans.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeans.java
index dc1fa47c..5754e961 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeans.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeans.java
@@ -35,6 +35,7 @@ import de.lmu.ifi.dbs.elki.data.model.MeanModel;
import de.lmu.ifi.dbs.elki.data.type.CombinedTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore;
import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
@@ -49,8 +50,7 @@ import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
import de.lmu.ifi.dbs.elki.utilities.datastructures.QuickSelect;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
@@ -105,68 +105,61 @@ public abstract class AbstractKMeans<V extends NumberVector<?>, D extends Distan
* @param relation the database to cluster
* @param means a list of k means
* @param clusters cluster assignment
+ * @param assignment Current cluster assignment
* @return true when the object was reassigned
*/
- protected boolean assignToNearestCluster(Relation<V> relation, List<? extends NumberVector<?>> means, List<? extends ModifiableDBIDs> clusters) {
+ protected boolean assignToNearestCluster(Relation<V> relation, List<? extends NumberVector<?>> means, List<? extends ModifiableDBIDs> clusters, WritableIntegerDataStore assignment) {
boolean changed = false;
- if (getDistanceFunction() instanceof PrimitiveDoubleDistanceFunction) {
+ if(getDistanceFunction() instanceof PrimitiveDoubleDistanceFunction) {
@SuppressWarnings("unchecked")
final PrimitiveDoubleDistanceFunction<? super NumberVector<?>> df = (PrimitiveDoubleDistanceFunction<? super NumberVector<?>>) getDistanceFunction();
- for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double mindist = Double.POSITIVE_INFINITY;
V fv = relation.get(iditer);
int minIndex = 0;
- for (int i = 0; i < k; i++) {
+ for(int i = 0; i < k; i++) {
double dist = df.doubleDistance(fv, means.get(i));
- if (dist < mindist) {
+ if(dist < mindist) {
minIndex = i;
mindist = dist;
}
}
- if (clusters.get(minIndex).add(iditer)) {
- changed = true;
- // Remove from previous cluster
- // TODO: keep a list of cluster assignments to save this search?
- for (int i = 0; i < k; i++) {
- if (i != minIndex) {
- if (clusters.get(i).remove(iditer)) {
- break;
- }
- }
- }
- }
+ changed |= updateAssignment(iditer, clusters, assignment, minIndex);
}
- } else {
+ }
+ else {
final PrimitiveDistanceFunction<? super NumberVector<?>, D> df = getDistanceFunction();
- for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
D mindist = df.getDistanceFactory().infiniteDistance();
V fv = relation.get(iditer);
int minIndex = 0;
- for (int i = 0; i < k; i++) {
+ for(int i = 0; i < k; i++) {
D dist = df.distance(fv, means.get(i));
- if (dist.compareTo(mindist) < 0) {
+ if(dist.compareTo(mindist) < 0) {
minIndex = i;
mindist = dist;
}
}
- if (clusters.get(minIndex).add(iditer)) {
- changed = true;
- // Remove from previous cluster
- // TODO: keep a list of cluster assignments to save this search?
- for (int i = 0; i < k; i++) {
- if (i != minIndex) {
- if (clusters.get(i).remove(iditer)) {
- break;
- }
- }
- }
- }
+ changed |= updateAssignment(iditer, clusters, assignment, minIndex);
}
}
return changed;
}
+ protected boolean updateAssignment(DBIDIter iditer, List<? extends ModifiableDBIDs> clusters, WritableIntegerDataStore assignment, int newA) {
+ final int oldA = assignment.intValue(iditer);
+ if(oldA == newA) {
+ return false;
+ }
+ clusters.get(newA).add(iditer);
+ assignment.putInt(iditer, newA);
+ if(oldA >= 0) {
+ clusters.get(oldA).remove(iditer);
+ }
+ return true;
+ }
+
@Override
public TypeInformation[] getInputTypeRestriction() {
return TypeUtil.array(new CombinedTypeInformation(TypeUtil.NUMBER_VECTOR_FIELD, getDistanceFunction().getInputTypeRestriction()));
@@ -181,24 +174,28 @@ public abstract class AbstractKMeans<V extends NumberVector<?>, D extends Distan
* @return the mean vectors of the given clusters in the given database
*/
protected List<Vector> means(List<? extends ModifiableDBIDs> clusters, List<? extends NumberVector<?>> means, Relation<V> database) {
+ // TODO: use Kahan summation for better numerical precision?
List<Vector> newMeans = new ArrayList<>(k);
- for (int i = 0; i < k; i++) {
+ for(int i = 0; i < k; i++) {
ModifiableDBIDs list = clusters.get(i);
Vector mean = null;
- if (list.size() > 0) {
- double s = 1.0 / list.size();
+ if(list.size() > 0) {
DBIDIter iter = list.iter();
- assert (iter.valid());
- mean = database.get(iter).getColumnVector().timesEquals(s);
+ // Initialize with first.
+ mean = database.get(iter).getColumnVector();
double[] raw = mean.getArrayRef();
iter.advance();
- for (; iter.valid(); iter.advance()) {
+ // Update with remaining instances
+ for(; iter.valid(); iter.advance()) {
NumberVector<?> vec = database.get(iter);
- for (int j = 0; j < mean.getDimensionality(); j++) {
- raw[j] += s * vec.doubleValue(j);
+ for(int j = 0; j < mean.getDimensionality(); j++) {
+ raw[j] += vec.doubleValue(j);
}
}
- } else {
+ mean.timesEquals(1.0 / list.size());
+ }
+ else {
+ // Keep degenerated means as-is for now.
mean = means.get(i).getColumnVector();
}
newMeans.add(mean);
@@ -218,17 +215,18 @@ public abstract class AbstractKMeans<V extends NumberVector<?>, D extends Distan
final int dim = medians.get(0).getDimensionality();
final SortDBIDsBySingleDimension sorter = new SortDBIDsBySingleDimension(database);
List<NumberVector<?>> newMedians = new ArrayList<>(k);
- for (int i = 0; i < k; i++) {
+ for(int i = 0; i < k; i++) {
ArrayModifiableDBIDs list = DBIDUtil.newArray(clusters.get(i));
- if (list.size() > 0) {
+ if(list.size() > 0) {
Vector mean = new Vector(dim);
- for (int d = 0; d < dim; d++) {
+ for(int d = 0; d < dim; d++) {
sorter.setDimension(d);
DBID id = QuickSelect.median(list, sorter);
mean.set(d, database.get(id).doubleValue(d));
}
newMedians.add(mean);
- } else {
+ }
+ else {
newMedians.add((NumberVector<?>) medians.get(i));
}
}
@@ -244,14 +242,11 @@ public abstract class AbstractKMeans<V extends NumberVector<?>, D extends Distan
* @param op Cluster size change / Weight change
*/
protected void incrementalUpdateMean(Vector mean, V vec, int newsize, double op) {
- if (newsize == 0) {
+ if(newsize == 0) {
return; // Keep old mean
}
- Vector delta = vec.getColumnVector();
- // Compute difference from mean
- delta.minusEquals(mean);
- delta.timesEquals(op / newsize);
- mean.plusEquals(delta);
+ Vector delta = vec.getColumnVector().minusEquals(mean);
+ mean.plusTimesEquals(delta, op / newsize);
}
/**
@@ -260,76 +255,84 @@ public abstract class AbstractKMeans<V extends NumberVector<?>, D extends Distan
* @param relation Relation
* @param means Means
* @param clusters Clusters
+ * @param assignment Current cluster assignment
* @return true when the means have changed
*/
- protected boolean macQueenIterate(Relation<V> relation, List<Vector> means, List<ModifiableDBIDs> clusters) {
+ protected boolean macQueenIterate(Relation<V> relation, List<Vector> means, List<ModifiableDBIDs> clusters, WritableIntegerDataStore assignment) {
boolean changed = false;
- if (getDistanceFunction() instanceof PrimitiveDoubleDistanceFunction) {
+ if(getDistanceFunction() instanceof PrimitiveDoubleDistanceFunction) {
// Raw distance function
@SuppressWarnings("unchecked")
final PrimitiveDoubleDistanceFunction<? super NumberVector<?>> df = (PrimitiveDoubleDistanceFunction<? super NumberVector<?>>) getDistanceFunction();
// Incremental update
- for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double mindist = Double.POSITIVE_INFINITY;
V fv = relation.get(iditer);
int minIndex = 0;
- for (int i = 0; i < k; i++) {
+ for(int i = 0; i < k; i++) {
double dist = df.doubleDistance(fv, means.get(i));
- if (dist < mindist) {
+ if(dist < mindist) {
minIndex = i;
mindist = dist;
}
}
- // Update the cluster mean incrementally:
- for (int i = 0; i < k; i++) {
- ModifiableDBIDs ci = clusters.get(i);
- if (i == minIndex) {
- if (ci.add(iditer)) {
- incrementalUpdateMean(means.get(i), fv, ci.size(), +1);
- changed = true;
- }
- } else if (ci.remove(iditer)) {
- incrementalUpdateMean(means.get(i), fv, ci.size() + 1, -1);
- changed = true;
- }
- }
+ changed |= updateMeanAndAssignment(clusters, means, minIndex, fv, iditer, assignment);
}
- } else {
+ }
+ else {
// Raw distance function
final PrimitiveDistanceFunction<? super NumberVector<?>, D> df = getDistanceFunction();
// Incremental update
- for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
D mindist = df.getDistanceFactory().infiniteDistance();
V fv = relation.get(iditer);
int minIndex = 0;
- for (int i = 0; i < k; i++) {
+ for(int i = 0; i < k; i++) {
D dist = df.distance(fv, means.get(i));
- if (dist.compareTo(mindist) < 0) {
+ if(dist.compareTo(mindist) < 0) {
minIndex = i;
mindist = dist;
}
}
- // Update the cluster mean incrementally:
- for (int i = 0; i < k; i++) {
- ModifiableDBIDs ci = clusters.get(i);
- if (i == minIndex) {
- if (ci.add(iditer)) {
- incrementalUpdateMean(means.get(i), fv, ci.size(), +1);
- changed = true;
- }
- } else if (ci.remove(iditer)) {
- incrementalUpdateMean(means.get(i), fv, ci.size() + 1, -1);
- changed = true;
- }
- }
+ changed |= updateMeanAndAssignment(clusters, means, minIndex, fv, iditer, assignment);
}
}
return changed;
}
+ /**
+ * Try to update the cluster assignment.
+ *
+ * @param clusters Current clusters
+ * @param means Means to update
+ * @param minIndex Cluster to assign to
+ * @param fv Vector
+ * @param iditer Object ID
+ * @param assignment Current cluster assignment
+ * @return {@code true} when assignment changed
+ */
+ private boolean updateMeanAndAssignment(List<ModifiableDBIDs> clusters, List<Vector> means, int minIndex, V fv, DBIDIter iditer, WritableIntegerDataStore assignment) {
+ int cur = assignment.intValue(iditer);
+ if(cur == minIndex) {
+ return false;
+ }
+ final ModifiableDBIDs curclus = clusters.get(minIndex);
+ curclus.add(iditer);
+ incrementalUpdateMean(means.get(minIndex), fv, curclus.size(), +1);
+
+ if(cur >= 0) {
+ ModifiableDBIDs ci = clusters.get(cur);
+ ci.remove(iditer);
+ incrementalUpdateMean(means.get(cur), fv, ci.size() + 1, -1);
+ }
+
+ assignment.putInt(iditer, minIndex);
+ return true;
+ }
+
@Override
public void setK(int k) {
this.k = k;
@@ -366,27 +369,27 @@ public abstract class AbstractKMeans<V extends NumberVector<?>, D extends Distan
@Override
protected void makeOptions(Parameterization config) {
ObjectParameter<PrimitiveDistanceFunction<NumberVector<?>, D>> distanceFunctionP = makeParameterDistanceFunction(SquaredEuclideanDistanceFunction.class, PrimitiveDistanceFunction.class);
- if (config.grab(distanceFunctionP)) {
+ if(config.grab(distanceFunctionP)) {
distanceFunction = distanceFunctionP.instantiateClass(config);
- if (!(distanceFunction instanceof EuclideanDistanceFunction) && !(distanceFunction instanceof SquaredEuclideanDistanceFunction)) {
+ if(!(distanceFunction instanceof EuclideanDistanceFunction) && !(distanceFunction instanceof SquaredEuclideanDistanceFunction)) {
getLogger().warning("k-means optimizes the sum of squares - it should be used with squared euclidean distance and may stop converging otherwise!");
}
}
IntParameter kP = new IntParameter(K_ID);
- kP.addConstraint(new GreaterConstraint(0));
- if (config.grab(kP)) {
+ kP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
+ if(config.grab(kP)) {
k = kP.getValue();
}
ObjectParameter<KMeansInitialization<V>> initialP = new ObjectParameter<>(INIT_ID, KMeansInitialization.class, RandomlyChosenInitialMeans.class);
- if (config.grab(initialP)) {
+ if(config.grab(initialP)) {
initializer = initialP.instantiateClass(config);
}
IntParameter maxiterP = new IntParameter(MAXITER_ID, 0);
- maxiterP.addConstraint(new GreaterEqualConstraint(0));
- if (config.grab(maxiterP)) {
+ maxiterP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_INT);
+ if(config.grab(maxiterP)) {
maxiter = maxiterP.getValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/BestOfMultipleKMeans.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/BestOfMultipleKMeans.java
index 30bb640c..51e7ace9 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/BestOfMultipleKMeans.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/BestOfMultipleKMeans.java
@@ -38,7 +38,7 @@ import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
@@ -90,34 +90,35 @@ public class BestOfMultipleKMeans<V extends NumberVector<?>, D extends Distance<
@Override
public Clustering<M> run(Database database, Relation<V> relation) {
- if (!(innerkMeans.getDistanceFunction() instanceof PrimitiveDistanceFunction)) {
+ if(!(innerkMeans.getDistanceFunction() instanceof PrimitiveDistanceFunction)) {
throw new AbortException("K-Means results can only be evaluated for primitive distance functions, got: " + innerkMeans.getDistanceFunction().getClass());
}
final PrimitiveDistanceFunction<? super V, D> df = (PrimitiveDistanceFunction<? super V, D>) innerkMeans.getDistanceFunction();
Clustering<M> bestResult = null;
- if (trials > 1) {
+ if(trials > 1) {
double bestCost = Double.POSITIVE_INFINITY;
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("K-means iterations", trials, LOG) : null;
- for (int i = 0; i < trials; i++) {
+ for(int i = 0; i < trials; i++) {
Clustering<M> currentCandidate = innerkMeans.run(database, relation);
double currentCost = qualityMeasure.calculateCost(currentCandidate, df, relation);
- if (LOG.isVerbose()) {
+ if(LOG.isVerbose()) {
LOG.verbose("Cost of candidate " + i + ": " + currentCost);
}
- if (currentCost < bestCost) {
+ if(currentCost < bestCost) {
bestResult = currentCandidate;
bestCost = currentCost;
}
- if (prog != null) {
+ if(prog != null) {
prog.incrementProcessed(LOG);
}
}
- if (prog != null) {
+ if(prog != null) {
prog.ensureCompleted(LOG);
}
- } else {
+ }
+ else {
bestResult = innerkMeans.run(database);
}
@@ -195,18 +196,18 @@ public class BestOfMultipleKMeans<V extends NumberVector<?>, D extends Distance<
@Override
protected void makeOptions(Parameterization config) {
IntParameter trialsP = new IntParameter(TRIALS_ID);
- trialsP.addConstraint(new GreaterEqualConstraint(1));
- if (config.grab(trialsP)) {
+ trialsP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
+ if(config.grab(trialsP)) {
trials = trialsP.intValue();
}
ObjectParameter<KMeans<V, D, M>> kMeansVariantP = new ObjectParameter<>(KMEANS_ID, KMeans.class);
- if (config.grab(kMeansVariantP)) {
+ if(config.grab(kMeansVariantP)) {
kMeansVariant = kMeansVariantP.instantiateClass(config);
}
ObjectParameter<KMeansQualityMeasure<V, ? super D>> qualityMeasureP = new ObjectParameter<>(QUALITYMEASURE_ID, KMeansQualityMeasure.class);
- if (config.grab(qualityMeasureP)) {
+ if(config.grab(qualityMeasureP)) {
qualityMeasure = qualityMeasureP.instantiateClass(config);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/FarthestPointsInitialMeans.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/FarthestPointsInitialMeans.java
index a018c04b..9edfd816 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/FarthestPointsInitialMeans.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/FarthestPointsInitialMeans.java
@@ -24,7 +24,6 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
*/
import java.util.ArrayList;
import java.util.List;
-import java.util.Random;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.database.Database;
@@ -74,7 +73,7 @@ public class FarthestPointsInitialMeans<V, D extends NumberDistance<D, ?>> exten
@Override
public List<V> chooseInitialMeans(Database database, Relation<V> relation, int k, PrimitiveDistanceFunction<? super NumberVector<?>, ?> distanceFunction) {
// Get a distance query
- if (!(distanceFunction.getDistanceFactory() instanceof NumberDistance)) {
+ if(!(distanceFunction.getDistanceFactory() instanceof NumberDistance)) {
throw new AbortException("Farthest points K-Means initialization can only be used with numerical distances.");
}
@SuppressWarnings("unchecked")
@@ -84,26 +83,25 @@ public class FarthestPointsInitialMeans<V, D extends NumberDistance<D, ?>> exten
// Chose first mean
List<V> means = new ArrayList<>(k);
- Random random = rnd.getRandom();
- DBIDIter first = DBIDUtil.randomSample(relation.getDBIDs(), 1, new Random(random.nextLong())).iter();
+ DBIDIter first = DBIDUtil.randomSample(relation.getDBIDs(), 1, rnd).iter();
means.add(relation.get(first));
DBIDVar best = DBIDUtil.newVar(first);
- for (int i = (dropfirst ? 0 : 1); i < k; i++) {
+ for(int i = (dropfirst ? 0 : 1); i < k; i++) {
// Find farthest object:
double maxdist = Double.NEGATIVE_INFINITY;
- for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
+ for(DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
double dsum = 0.;
- for (V ex : means) {
+ for(V ex : means) {
dsum += distQ.distance(ex, it).doubleValue();
}
- if (dsum > maxdist) {
+ if(dsum > maxdist) {
maxdist = dsum;
best.set(it);
}
}
// Add new mean:
- if (k == 0) {
+ if(k == 0) {
means.clear(); // Remove temporary first element.
}
means.add(relation.get(best));
@@ -114,7 +112,7 @@ public class FarthestPointsInitialMeans<V, D extends NumberDistance<D, ?>> exten
@Override
public DBIDs chooseInitialMedoids(int k, DistanceQuery<? super V, ?> distQ2) {
- if (!(distQ2.getDistanceFactory() instanceof NumberDistance)) {
+ if(!(distQ2.getDistanceFactory() instanceof NumberDistance)) {
throw new AbortException("Farthest points K-Means initialization can only be used with numerical distances.");
}
@SuppressWarnings("unchecked")
@@ -123,26 +121,25 @@ public class FarthestPointsInitialMeans<V, D extends NumberDistance<D, ?>> exten
// Chose first mean
ArrayModifiableDBIDs means = DBIDUtil.newArray(k);
- Random random = rnd.getRandom();
- DBIDIter first = DBIDUtil.randomSample(relation.getDBIDs(), 1, new Random(random.nextLong())).iter();
+ DBIDIter first = DBIDUtil.randomSample(relation.getDBIDs(), 1, rnd).iter();
means.add(first);
DBIDVar best = DBIDUtil.newVar(first);
- for (int i = (dropfirst ? 0 : 1); i < k; i++) {
+ for(int i = (dropfirst ? 0 : 1); i < k; i++) {
// Find farthest object:
double maxdist = Double.NEGATIVE_INFINITY;
- for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
+ for(DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
double dsum = 0.;
- for (DBIDIter ex = means.iter(); ex.valid(); ex.advance()) {
+ for(DBIDIter ex = means.iter(); ex.valid(); ex.advance()) {
dsum += distQ.distance(ex, it).doubleValue();
}
- if (dsum > maxdist) {
+ if(dsum > maxdist) {
maxdist = dsum;
best.set(it);
}
}
// Add new mean:
- if (k == 0) {
+ if(k == 0) {
means.clear(); // Remove temporary first element.
}
means.add(best);
@@ -173,7 +170,7 @@ public class FarthestPointsInitialMeans<V, D extends NumberDistance<D, ?>> exten
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
Flag dropfirstP = new Flag(DROPFIRST_ID);
- if (config.grab(dropfirstP)) {
+ if(config.grab(dropfirstP)) {
dropfirst = dropfirstP.isTrue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansBatchedLloyd.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansBatchedLloyd.java
new file mode 100644
index 00000000..aec4fe0f
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansBatchedLloyd.java
@@ -0,0 +1,346 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import de.lmu.ifi.dbs.elki.data.Cluster;
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.model.KMeansModel;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
+
+/**
+ * Provides the k-means algorithm, using Lloyd-style bulk iterations.
+ *
+ * However, in contrast to Lloyd's k-means and similar to MacQueen, we do update
+ * the mean vectors multiple times, not only at the very end of the iteration.
+ * This should yield faster convergence at little extra cost.
+ *
+ * To avoid issues with ordered data, we use random sampling to obtain the data
+ * blocks.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.has KMeansModel
+ *
+ * @param <V> vector datatype
+ * @param <D> distance value type
+ */
+public class KMeansBatchedLloyd<V extends NumberVector<?>, D extends Distance<D>> extends AbstractKMeans<V, D, KMeansModel<V>> {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging LOG = Logging.getLogger(KMeansBatchedLloyd.class);
+
+ /**
+ * Number of blocks to use.
+ */
+ int blocks;
+
+ /**
+ * Random used for partitioning.
+ */
+ RandomFactory random;
+
+ /**
+ * Constructor.
+ *
+ * @param distanceFunction distance function
+ * @param k k parameter
+ * @param maxiter Maxiter parameter
+ * @param initializer Initialization method
+ * @param blocks Number of blocks
+ * @param random Random factory used for partitioning.
+ */
+ public KMeansBatchedLloyd(PrimitiveDistanceFunction<NumberVector<?>, D> distanceFunction, int k, int maxiter, KMeansInitialization<V> initializer, int blocks, RandomFactory random) {
+ super(distanceFunction, k, maxiter, initializer);
+ this.blocks = blocks;
+ this.random = random;
+ }
+
+ @Override
+ public Clustering<KMeansModel<V>> run(Database database, Relation<V> relation) {
+ final int dim = RelationUtil.dimensionality(relation);
+ // Choose initial means
+ List<? extends NumberVector<?>> mvs = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction());
+ // Convert to (modifiable) math vectors.
+ List<Vector> means = new ArrayList<>(k);
+ for (NumberVector<?> m : mvs) {
+ means.add(m.getColumnVector());
+ }
+
+ // Setup cluster assignment store
+ List<ModifiableDBIDs> clusters = new ArrayList<>();
+ for (int i = 0; i < k; i++) {
+ clusters.add(DBIDUtil.newHashSet((int) (relation.size() * 2. / k)));
+ }
+ WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1);
+
+ ArrayDBIDs[] parts = DBIDUtil.randomSplit(relation.getDBIDs(), blocks, random);
+
+ double[][] meanshift = new double[k][dim];
+ int[] changesize = new int[k];
+
+ IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Means iteration", LOG) : null;
+ for (int iteration = 0; maxiter <= 0 || iteration < maxiter; iteration++) {
+ if (prog != null) {
+ prog.incrementProcessed(LOG);
+ }
+ boolean changed = false;
+ FiniteProgress pprog = LOG.isVerbose() ? new FiniteProgress("Batch", parts.length, LOG) : null;
+ for (int p = 0; p < parts.length; p++) {
+ // Initialize new means scratch space.
+ for (int i = 0; i < k; i++) {
+ Arrays.fill(meanshift[i], 0.);
+ }
+ Arrays.fill(changesize, 0);
+ changed |= assignToNearestCluster(relation, parts[p], means, meanshift, changesize, clusters, assignment);
+ // Recompute means.
+ updateMeans(means, meanshift, clusters, changesize);
+ if (pprog != null) {
+ pprog.incrementProcessed(LOG);
+ }
+ }
+ if (pprog != null) {
+ pprog.ensureCompleted(LOG);
+ }
+ // Stop if no cluster assignment changed.
+ if (!changed) {
+ break;
+ }
+ }
+ if (prog != null) {
+ prog.setCompleted(LOG);
+ }
+
+ // Wrap result
+ final NumberVector.Factory<V, ?> factory = RelationUtil.getNumberVectorFactory(relation);
+ Clustering<KMeansModel<V>> result = new Clustering<>("k-Means Clustering", "kmeans-clustering");
+ for (int i = 0; i < clusters.size(); i++) {
+ KMeansModel<V> model = new KMeansModel<>(factory.newNumberVector(means.get(i).getColumnVector().getArrayRef()));
+ result.addToplevelCluster(new Cluster<>(clusters.get(i), model));
+ }
+ return result;
+ }
+
+ /**
+ * Returns a list of clusters. The k<sup>th</sup> cluster contains the ids of
+ * those FeatureVectors, that are nearest to the k<sup>th</sup> mean.
+ *
+ * @param relation the database to cluster
+ * @param ids IDs to process
+ * @param oldmeans a list of k means
+ * @param meanshift delta to apply to each mean
+ * @param changesize New cluster sizes
+ * @param clusters cluster assignment
+ * @param assignment Current cluster assignment
+ * @return true when the object was reassigned
+ */
+ protected boolean assignToNearestCluster(Relation<V> relation, DBIDs ids, List<? extends NumberVector<?>> oldmeans, double[][] meanshift, int[] changesize, List<? extends ModifiableDBIDs> clusters, WritableIntegerDataStore assignment) {
+ boolean changed = false;
+
+ if (getDistanceFunction() instanceof PrimitiveDoubleDistanceFunction) {
+ @SuppressWarnings("unchecked")
+ final PrimitiveDoubleDistanceFunction<? super NumberVector<?>> df = (PrimitiveDoubleDistanceFunction<? super NumberVector<?>>) getDistanceFunction();
+ for (DBIDIter iditer = ids.iter(); iditer.valid(); iditer.advance()) {
+ double mindist = Double.POSITIVE_INFINITY;
+ V fv = relation.get(iditer);
+ int minIndex = 0;
+ for (int i = 0; i < k; i++) {
+ double dist = df.doubleDistance(fv, oldmeans.get(i));
+ if (dist < mindist) {
+ minIndex = i;
+ mindist = dist;
+ }
+ }
+ changed |= updateAssignment(iditer, fv, clusters, assignment, meanshift, changesize, minIndex);
+ }
+ } else {
+ final PrimitiveDistanceFunction<? super NumberVector<?>, D> df = getDistanceFunction();
+ for (DBIDIter iditer = ids.iter(); iditer.valid(); iditer.advance()) {
+ D mindist = df.getDistanceFactory().infiniteDistance();
+ V fv = relation.get(iditer);
+ int minIndex = 0;
+ for (int i = 0; i < k; i++) {
+ D dist = df.distance(fv, oldmeans.get(i));
+ if (dist.compareTo(mindist) < 0) {
+ minIndex = i;
+ mindist = dist;
+ }
+ }
+ changed |= updateAssignment(iditer, fv, clusters, assignment, meanshift, changesize, minIndex);
+ }
+ }
+ return changed;
+ }
+
+ /**
+ * Update the assignment of a single object.
+ *
+ * @param id Object to assign
+ * @param fv Vector
+ * @param clusters Clusters
+ * @param assignment Current cluster assignment
+ * @param meanshift Current shifting offset
+ * @param changesize Size change of the current cluster
+ * @param minIndex Index of best cluster.
+ * @return {@code true} when assignment changed.
+ */
+ protected boolean updateAssignment(DBIDIter id, V fv, List<? extends ModifiableDBIDs> clusters, WritableIntegerDataStore assignment, double[][] meanshift, int[] changesize, int minIndex) {
+ int cur = assignment.intValue(id);
+ if (cur == minIndex) {
+ return false;
+ }
+ // Add to new cluster.
+ {
+ clusters.get(minIndex).add(id);
+ changesize[minIndex]++;
+ double[] raw = meanshift[minIndex];
+ for (int j = 0; j < fv.getDimensionality(); j++) {
+ raw[j] += fv.doubleValue(j);
+ }
+ }
+ // Remove from previous cluster
+ if (cur >= 0) {
+ clusters.get(cur).remove(id);
+ changesize[cur]--;
+ double[] raw = meanshift[cur];
+ for (int j = 0; j < fv.getDimensionality(); j++) {
+ raw[j] -= fv.doubleValue(j);
+ }
+ }
+ assignment.putInt(id, minIndex);
+ return true;
+ }
+
+ /**
+ * Merge changes into mean vectors.
+ *
+ * @param means Mean vectors
+ * @param meanshift Shift offset
+ * @param clusters
+ * @param changesize Size of change (for weighting!)
+ */
+ protected void updateMeans(List<Vector> means, double[][] meanshift, List<ModifiableDBIDs> clusters, int[] changesize) {
+ for (int i = 0; i < k; i++) {
+ int newsize = clusters.get(i).size(), oldsize = newsize - changesize[i];
+ if (newsize == 0) {
+ continue; // Keep previous mean vector.
+ }
+ if (oldsize == 0) {
+ means.set(i, new Vector(meanshift[i]).times(1. / newsize));
+ continue; // Replace with new vector.
+ }
+ if (oldsize == newsize) {
+ means.get(i).plusTimesEquals(new Vector(meanshift[i]), 1. / (double) newsize);
+ continue;
+ }
+ means.get(i).timesEquals(oldsize / (double) newsize).plusTimesEquals(new Vector(meanshift[i]), 1. / (double) newsize);
+ }
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer<V extends NumberVector<?>, D extends Distance<D>> extends AbstractKMeans.Parameterizer<V, D> {
+ /**
+ * Parameter for the number of blocks.
+ */
+ public static final OptionID BLOCKS_ID = new OptionID("kmeans.blocks", "Number of blocks to use for processing. Means will be recomputed after each block.");
+
+ /**
+ * Random source for blocking.
+ */
+ public static final OptionID RANDOM_ID = new OptionID("kmeans.blocks.random", "Random source for producing blocks.");
+
+ /**
+ * Number of blocks.
+ */
+ int blocks;
+
+ /**
+ * Random used for partitioning.
+ */
+ RandomFactory random;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ IntParameter blocksP = new IntParameter(BLOCKS_ID, 10);
+ blocksP.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
+ if (config.grab(blocksP)) {
+ blocks = blocksP.intValue();
+ }
+ RandomParameter randomP = new RandomParameter(RANDOM_ID);
+ if (config.grab(randomP)) {
+ random = randomP.getValue();
+ }
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ @Override
+ protected KMeansBatchedLloyd<V, D> makeInstance() {
+ return new KMeansBatchedLloyd<>(distanceFunction, k, maxiter, initializer, blocks, random);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansBisecting.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansBisecting.java
index 37071d36..80a581b1 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansBisecting.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansBisecting.java
@@ -41,7 +41,7 @@ import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ChainedParameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
@@ -205,7 +205,7 @@ public class KMeansBisecting<V extends NumberVector<?>, D extends Distance<?>, M
super.makeOptions(config);
IntParameter kP = new IntParameter(KMeans.K_ID);
- kP.addConstraint(new GreaterConstraint(1));
+ kP.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
if (config.grab(kP)) {
k = kP.intValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansHybridLloydMacQueen.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansHybridLloydMacQueen.java
new file mode 100644
index 00000000..2a60ef27
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansHybridLloydMacQueen.java
@@ -0,0 +1,155 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.ArrayList;
+import java.util.List;
+
+import de.lmu.ifi.dbs.elki.data.Cluster;
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.model.KMeansModel;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
+
+/**
+ * Provides the k-means algorithm, alternating between MacQueen-style
+ * incremental processing and Lloyd-Style batch steps.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.landmark
+ * @apiviz.has KMeansModel
+ *
+ * @param <V> vector datatype
+ * @param <D> distance value type
+ */
+public class KMeansHybridLloydMacQueen<V extends NumberVector<?>, D extends Distance<D>> extends AbstractKMeans<V, D, KMeansModel<V>> {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging LOG = Logging.getLogger(KMeansHybridLloydMacQueen.class);
+
+ /**
+ * Constructor.
+ *
+ * @param distanceFunction distance function
+ * @param k k parameter
+ * @param maxiter Maxiter parameter
+ * @param initializer Initialization method
+ */
+ public KMeansHybridLloydMacQueen(PrimitiveDistanceFunction<NumberVector<?>, D> distanceFunction, int k, int maxiter, KMeansInitialization<V> initializer) {
+ super(distanceFunction, k, maxiter, initializer);
+ }
+
+ @Override
+ public Clustering<KMeansModel<V>> run(Database database, Relation<V> relation) {
+ if (relation.size() <= 0) {
+ return new Clustering<>("k-Means Clustering", "kmeans-clustering");
+ }
+ // Choose initial means
+ List<Vector> means = new ArrayList<>(k);
+ for (NumberVector<?> nv : initializer.chooseInitialMeans(database, relation, k, getDistanceFunction())) {
+ means.add(nv.getColumnVector());
+ }
+ // Setup cluster assignment store
+ List<ModifiableDBIDs> clusters = new ArrayList<>();
+ for (int i = 0; i < k; i++) {
+ clusters.add(DBIDUtil.newHashSet((int) (relation.size() * 2. / k)));
+ }
+ WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1);
+
+ IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Means iteration", LOG) : null;
+ for (int iteration = 0; maxiter <= 0 || iteration < maxiter; iteration += 2) {
+ { // MacQueen
+ if (prog != null) {
+ prog.incrementProcessed(LOG);
+ }
+ boolean changed = macQueenIterate(relation, means, clusters, assignment);
+ if (!changed) {
+ break;
+ }
+ }
+ { // Lloyd
+ if (prog != null) {
+ prog.incrementProcessed(LOG);
+ }
+ boolean changed = assignToNearestCluster(relation, means, clusters, assignment);
+ // Stop if no cluster assignment changed.
+ if (!changed) {
+ break;
+ }
+ // Recompute means.
+ means = means(clusters, means, relation);
+ }
+ }
+ if (prog != null) {
+ prog.setCompleted(LOG);
+ }
+
+ // Wrap result
+ final NumberVector.Factory<V, ?> factory = RelationUtil.getNumberVectorFactory(relation);
+ Clustering<KMeansModel<V>> result = new Clustering<>("k-Means Clustering", "kmeans-clustering");
+ for (int i = 0; i < clusters.size(); i++) {
+ KMeansModel<V> model = new KMeansModel<>(factory.newNumberVector(means.get(i).getColumnVector().getArrayRef()));
+ result.addToplevelCluster(new Cluster<>(clusters.get(i), model));
+ }
+ return result;
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer<V extends NumberVector<?>, D extends Distance<D>> extends AbstractKMeans.Parameterizer<V, D> {
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ @Override
+ protected KMeansHybridLloydMacQueen<V, D> makeInstance() {
+ return new KMeansHybridLloydMacQueen<>(distanceFunction, k, maxiter, initializer);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansLloyd.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansLloyd.java
index e692293c..686e2076 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansLloyd.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansLloyd.java
@@ -31,6 +31,9 @@ import de.lmu.ifi.dbs.elki.data.Clustering;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.model.KMeansModel;
import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
@@ -93,15 +96,16 @@ public class KMeansLloyd<V extends NumberVector<?>, D extends Distance<D>> exten
// Setup cluster assignment store
List<ModifiableDBIDs> clusters = new ArrayList<>();
for (int i = 0; i < k; i++) {
- clusters.add(DBIDUtil.newHashSet(relation.size() / k));
+ clusters.add(DBIDUtil.newHashSet((int) (relation.size() * 2. / k)));
}
+ WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1);
IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Means iteration", LOG) : null;
for (int iteration = 0; maxiter <= 0 || iteration < maxiter; iteration++) {
if (prog != null) {
prog.incrementProcessed(LOG);
}
- boolean changed = assignToNearestCluster(relation, means, clusters);
+ boolean changed = assignToNearestCluster(relation, means, clusters, assignment);
// Stop if no cluster assignment changed.
if (!changed) {
break;
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansMacQueen.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansMacQueen.java
index bb689bd3..a0f4bb3f 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansMacQueen.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansMacQueen.java
@@ -31,6 +31,9 @@ import de.lmu.ifi.dbs.elki.data.Clustering;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.model.KMeansModel;
import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
@@ -95,11 +98,9 @@ public class KMeansMacQueen<V extends NumberVector<?>, D extends Distance<D>> ex
// Initialize cluster and assign objects
List<ModifiableDBIDs> clusters = new ArrayList<>();
for (int i = 0; i < k; i++) {
- clusters.add(DBIDUtil.newHashSet(relation.size() / k));
+ clusters.add(DBIDUtil.newHashSet((int) (relation.size() * 2. / k)));
}
- assignToNearestCluster(relation, means, clusters);
- // Initial recomputation of the means.
- means = means(clusters, means, relation);
+ WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1);
IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Means iteration", LOG) : null;
// Refine result
@@ -107,7 +108,7 @@ public class KMeansMacQueen<V extends NumberVector<?>, D extends Distance<D>> ex
if (prog != null) {
prog.incrementProcessed(LOG);
}
- boolean changed = macQueenIterate(relation, means, clusters);
+ boolean changed = macQueenIterate(relation, means, clusters, assignment);
if (!changed) {
break;
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansPlusPlusInitialMeans.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansPlusPlusInitialMeans.java
index 302ca86b..6fc514eb 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansPlusPlusInitialMeans.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansPlusPlusInitialMeans.java
@@ -84,8 +84,8 @@ public class KMeansPlusPlusInitialMeans<V, D extends NumberDistance<D, ?>> exten
// Chose first mean
List<V> means = new ArrayList<>(k);
- Random random = rnd.getRandom();
- DBID first = DBIDUtil.deref(DBIDUtil.randomSample(relation.getDBIDs(), 1, new Random(random.nextLong())).iter());
+ Random random = rnd.getSingleThreadedRandom();
+ DBID first = DBIDUtil.deref(DBIDUtil.randomSample(relation.getDBIDs(), 1, random).iter());
means.add(relation.get(first));
ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
@@ -134,8 +134,8 @@ public class KMeansPlusPlusInitialMeans<V, D extends NumberDistance<D, ?>> exten
// Chose first mean
ArrayModifiableDBIDs means = DBIDUtil.newArray(k);
- Random random = rnd.getRandom();
- DBID first = DBIDUtil.deref(DBIDUtil.randomSample(distQ.getRelation().getDBIDs(), 1, new Random(random.nextLong())).iter());
+ Random random = rnd.getSingleThreadedRandom();
+ DBID first = DBIDUtil.deref(DBIDUtil.randomSample(distQ.getRelation().getDBIDs(), 1, random).iter());
means.add(first);
ArrayDBIDs ids = DBIDUtil.ensureArray(distQ.getRelation().getDBIDs());
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMediansLloyd.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMediansLloyd.java
index cc7aaa9e..0a97c4d3 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMediansLloyd.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMediansLloyd.java
@@ -31,6 +31,9 @@ import de.lmu.ifi.dbs.elki.data.Clustering;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.model.MeanModel;
import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
@@ -88,15 +91,16 @@ public class KMediansLloyd<V extends NumberVector<?>, D extends Distance<D>> ext
// Setup cluster assignment store
List<ModifiableDBIDs> clusters = new ArrayList<>();
for (int i = 0; i < k; i++) {
- clusters.add(DBIDUtil.newHashSet(relation.size() / k));
+ clusters.add(DBIDUtil.newHashSet((int) (relation.size() * 2. / k)));
}
+ WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1);
IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Medians iteration", LOG) : null;
for (int iteration = 0; maxiter <= 0 || iteration < maxiter; iteration++) {
if (prog != null) {
prog.incrementProcessed(LOG);
}
- boolean changed = assignToNearestCluster(relation, medians, clusters);
+ boolean changed = assignToNearestCluster(relation, medians, clusters, assignment);
// Stop if no cluster assignment changed.
if (!changed) {
break;
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsEM.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsEM.java
index 87a0c7ae..41cca225 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsEM.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsEM.java
@@ -48,8 +48,7 @@ import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress;
import de.lmu.ifi.dbs.elki.math.Mean;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
@@ -119,7 +118,7 @@ public class KMedoidsEM<V, D extends NumberDistance<D, ?>> extends AbstractDista
* @return result
*/
public Clustering<MedoidModel> run(Database database, Relation<V> relation) {
- if (relation.size() <= 0) {
+ if(relation.size() <= 0) {
return new Clustering<>("k-Medoids Clustering", "kmedoids-clustering");
}
DistanceQuery<V, D> distQ = database.getDistanceQuery(relation, getDistanceFunction());
@@ -127,7 +126,7 @@ public class KMedoidsEM<V, D extends NumberDistance<D, ?>> extends AbstractDista
ArrayModifiableDBIDs medoids = DBIDUtil.newArray(initializer.chooseInitialMedoids(k, distQ));
// Setup cluster assignment store
List<ModifiableDBIDs> clusters = new ArrayList<>();
- for (int i = 0; i < k; i++) {
+ for(int i = 0; i < k; i++) {
clusters.add(DBIDUtil.newHashSet(relation.size() / k));
}
Mean[] mdists = Mean.newArray(k);
@@ -139,47 +138,47 @@ public class KMedoidsEM<V, D extends NumberDistance<D, ?>> extends AbstractDista
IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Medoids iteration", LOG) : null;
// Swap phase
boolean changed = true;
- while (changed) {
- if (prog != null) {
+ while(changed) {
+ if(prog != null) {
prog.incrementProcessed(LOG);
}
changed = false;
// Try to swap the medoid with a better cluster member:
int i = 0;
- for (DBIDIter miter = medoids.iter(); miter.valid(); miter.advance(), i++) {
+ for(DBIDIter miter = medoids.iter(); miter.valid(); miter.advance(), i++) {
DBID best = null;
Mean bestm = mdists[i];
- for (DBIDIter iter = clusters.get(i).iter(); iter.valid(); iter.advance()) {
- if (DBIDUtil.equal(miter, iter)) {
+ for(DBIDIter iter = clusters.get(i).iter(); iter.valid(); iter.advance()) {
+ if(DBIDUtil.equal(miter, iter)) {
continue;
}
Mean mdist = new Mean();
- for (DBIDIter iter2 = clusters.get(i).iter(); iter2.valid(); iter2.advance()) {
+ for(DBIDIter iter2 = clusters.get(i).iter(); iter2.valid(); iter2.advance()) {
mdist.put(distQ.distance(iter, iter2).doubleValue());
}
- if (mdist.getMean() < bestm.getMean()) {
+ if(mdist.getMean() < bestm.getMean()) {
best = DBIDUtil.deref(iter);
bestm = mdist;
}
}
- if (best != null && !DBIDUtil.equal(miter, best)) {
+ if(best != null && !DBIDUtil.equal(miter, best)) {
changed = true;
medoids.set(i, best);
mdists[i] = bestm;
}
}
// Reassign
- if (changed) {
+ if(changed) {
assignToNearestCluster(medoids, mdists, clusters, distQ);
}
}
- if (prog != null) {
+ if(prog != null) {
prog.setCompleted(LOG);
}
// Wrap result
Clustering<MedoidModel> result = new Clustering<>("k-Medoids Clustering", "kmedoids-clustering");
- for (int i = 0; i < clusters.size(); i++) {
+ for(int i = 0; i < clusters.size(); i++) {
MedoidModel model = new MedoidModel(medoids.get(i));
result.addToplevelCluster(new Cluster<>(clusters.get(i), model));
}
@@ -200,27 +199,27 @@ public class KMedoidsEM<V, D extends NumberDistance<D, ?>> extends AbstractDista
boolean changed = false;
double[] dists = new double[k];
- for (DBIDIter iditer = distQ.getRelation().iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for(DBIDIter iditer = distQ.getRelation().iterDBIDs(); iditer.valid(); iditer.advance()) {
int minIndex = 0;
double mindist = Double.POSITIVE_INFINITY;
{
int i = 0;
- for (DBIDIter miter = means.iter(); miter.valid(); miter.advance(), i++) {
+ for(DBIDIter miter = means.iter(); miter.valid(); miter.advance(), i++) {
dists[i] = distQ.distance(iditer, miter).doubleValue();
- if (dists[i] < mindist) {
+ if(dists[i] < mindist) {
minIndex = i;
mindist = dists[i];
}
}
}
- if (clusters.get(minIndex).add(iditer)) {
+ if(clusters.get(minIndex).add(iditer)) {
changed = true;
mdist[minIndex].put(mindist);
// Remove from previous cluster
// TODO: keep a list of cluster assignments to save this search?
- for (int i = 0; i < k; i++) {
- if (i != minIndex) {
- if (clusters.get(i).remove(iditer)) {
+ for(int i = 0; i < k; i++) {
+ if(i != minIndex) {
+ if(clusters.get(i).remove(iditer)) {
mdist[minIndex].put(dists[i], -1);
break;
}
@@ -259,19 +258,19 @@ public class KMedoidsEM<V, D extends NumberDistance<D, ?>> extends AbstractDista
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
IntParameter kP = new IntParameter(KMeans.K_ID);
- kP.addConstraint(new GreaterConstraint(0));
- if (config.grab(kP)) {
+ kP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
+ if(config.grab(kP)) {
k = kP.intValue();
}
ObjectParameter<KMedoidsInitialization<V>> initialP = new ObjectParameter<>(KMeans.INIT_ID, KMedoidsInitialization.class, PAMInitialMeans.class);
- if (config.grab(initialP)) {
+ if(config.grab(initialP)) {
initializer = initialP.instantiateClass(config);
}
IntParameter maxiterP = new IntParameter(KMeans.MAXITER_ID, 0);
- maxiterP.addConstraint(new GreaterEqualConstraint(0));
- if (config.grab(maxiterP)) {
+ maxiterP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_INT);
+ if(config.grab(maxiterP)) {
maxiter = maxiterP.intValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsPAM.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsPAM.java
index 1feda867..c9e1dc47 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsPAM.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsPAM.java
@@ -53,8 +53,7 @@ import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
@@ -124,7 +123,7 @@ public class KMedoidsPAM<V, D extends NumberDistance<D, ?>> extends AbstractDist
* @return result
*/
public Clustering<MedoidModel> run(Database database, Relation<V> relation) {
- if (relation.size() <= 0) {
+ if(relation.size() <= 0) {
return new Clustering<>("k-Medoids Clustering", "kmedoids-clustering");
}
DistanceQuery<V, D> distQ = database.getDistanceQuery(relation, getDistanceFunction());
@@ -133,7 +132,7 @@ public class KMedoidsPAM<V, D extends NumberDistance<D, ?>> extends AbstractDist
ArrayModifiableDBIDs medoids = DBIDUtil.newArray(initializer.chooseInitialMedoids(k, distQ));
// Setup cluster assignment store
List<ModifiableDBIDs> clusters = new ArrayList<>();
- for (int i = 0; i < k; i++) {
+ for(int i = 0; i < k; i++) {
clusters.add(DBIDUtil.newHashSet(relation.size() / k));
}
@@ -145,8 +144,8 @@ public class KMedoidsPAM<V, D extends NumberDistance<D, ?>> extends AbstractDist
IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("PAM iteration", LOG) : null;
// Swap phase
boolean changed = true;
- while (changed) {
- if (prog != null) {
+ while(changed) {
+ if(prog != null) {
prog.incrementProcessed(LOG);
}
changed = false;
@@ -155,57 +154,60 @@ public class KMedoidsPAM<V, D extends NumberDistance<D, ?>> extends AbstractDist
DBID bestid = null;
int bestcluster = -1;
int i = 0;
- for (DBIDIter miter = medoids.iter(); miter.valid(); miter.advance(), i++) {
- for (DBIDIter iter = clusters.get(i).iter(); iter.valid(); iter.advance()) {
- if (DBIDUtil.equal(miter, iter)) {
+ for(DBIDIter miter = medoids.iter(); miter.valid(); miter.advance(), i++) {
+ for(DBIDIter iter = clusters.get(i).iter(); iter.valid(); iter.advance()) {
+ if(DBIDUtil.equal(miter, iter)) {
continue;
}
// double disti = distQ.distance(id, med).doubleValue();
double cost = 0;
DBIDIter olditer = medoids.iter();
- for (int j = 0; j < k; j++, olditer.advance()) {
- for (DBIDIter iter2 = clusters.get(j).iter(); iter2.valid(); iter2.advance()) {
+ for(int j = 0; j < k; j++, olditer.advance()) {
+ for(DBIDIter iter2 = clusters.get(j).iter(); iter2.valid(); iter2.advance()) {
double distcur = distQ.distance(iter2, olditer).doubleValue();
double distnew = distQ.distance(iter2, iter).doubleValue();
- if (j == i) {
+ if(j == i) {
// Cases 1 and 2.
double distsec = second.doubleValue(iter2);
- if (distcur > distsec) {
+ if(distcur > distsec) {
// Case 1, other would switch to a third medoid
cost += distsec - distcur; // Always positive!
- } else { // Would remain with the candidate
+ }
+ else { // Would remain with the candidate
cost += distnew - distcur; // Could be negative
}
- } else {
+ }
+ else {
// Cases 3-4: objects from other clusters
- if (distcur < distnew) {
+ if(distcur < distnew) {
// Case 3: no change
- } else {
+ }
+ else {
// Case 4: would switch to new medoid
cost += distnew - distcur; // Always negative
}
}
}
}
- if (cost < best) {
+ if(cost < best) {
best = cost;
bestid = DBIDUtil.deref(iter);
bestcluster = i;
}
}
}
- if (prog != null) {
+ if(prog != null) {
prog.setCompleted(LOG);
}
- if (LOG.isDebugging()) {
+ if(LOG.isDebugging()) {
LOG.debug("Best cost: " + best);
}
- if (bestid != null) {
+ if(bestid != null) {
changed = true;
medoids.set(bestcluster, bestid);
}
// Reassign
- if (changed) {
+ if(changed) {
// TODO: can we save some of these recomputations?
assignToNearestCluster(medoids, ids, second, clusters, distQ);
}
@@ -213,7 +215,7 @@ public class KMedoidsPAM<V, D extends NumberDistance<D, ?>> extends AbstractDist
// Wrap result
Clustering<MedoidModel> result = new Clustering<>("k-Medoids Clustering", "kmedoids-clustering");
- for (int i = 0; i < clusters.size(); i++) {
+ for(int i = 0; i < clusters.size(); i++) {
MedoidModel model = new MedoidModel(medoids.get(i));
result.addToplevelCluster(new Cluster<>(clusters.get(i), model));
}
@@ -234,30 +236,31 @@ public class KMedoidsPAM<V, D extends NumberDistance<D, ?>> extends AbstractDist
protected boolean assignToNearestCluster(ArrayDBIDs means, DBIDs ids, WritableDoubleDataStore second, List<? extends ModifiableDBIDs> clusters, DistanceQuery<V, D> distQ) {
boolean changed = false;
- for (DBIDIter iditer = distQ.getRelation().iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for(DBIDIter iditer = distQ.getRelation().iterDBIDs(); iditer.valid(); iditer.advance()) {
int minIndex = 0;
double mindist = Double.POSITIVE_INFINITY;
double mindist2 = Double.POSITIVE_INFINITY;
{
int i = 0;
- for (DBIDIter miter = means.iter(); miter.valid(); miter.advance(), i++) {
+ for(DBIDIter miter = means.iter(); miter.valid(); miter.advance(), i++) {
double dist = distQ.distance(iditer, miter).doubleValue();
- if (dist < mindist) {
+ if(dist < mindist) {
minIndex = i;
mindist2 = mindist;
mindist = dist;
- } else if (dist < mindist2) {
+ }
+ else if(dist < mindist2) {
mindist2 = dist;
}
}
}
- if (clusters.get(minIndex).add(iditer)) {
+ if(clusters.get(minIndex).add(iditer)) {
changed = true;
// Remove from previous cluster
// TODO: keep a list of cluster assignments to save this search?
- for (int i = 0; i < k; i++) {
- if (i != minIndex) {
- if (clusters.get(i).remove(iditer)) {
+ for(int i = 0; i < k; i++) {
+ if(i != minIndex) {
+ if(clusters.get(i).remove(iditer)) {
break;
}
}
@@ -296,19 +299,19 @@ public class KMedoidsPAM<V, D extends NumberDistance<D, ?>> extends AbstractDist
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
IntParameter kP = new IntParameter(KMeans.K_ID);
- kP.addConstraint(new GreaterConstraint(0));
- if (config.grab(kP)) {
+ kP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
+ if(config.grab(kP)) {
k = kP.intValue();
}
ObjectParameter<KMedoidsInitialization<V>> initialP = new ObjectParameter<>(KMeans.INIT_ID, KMedoidsInitialization.class, PAMInitialMeans.class);
- if (config.grab(initialP)) {
+ if(config.grab(initialP)) {
initializer = initialP.instantiateClass(config);
}
IntParameter maxiterP = new IntParameter(KMeans.MAXITER_ID, 0);
- maxiterP.addConstraint(new GreaterEqualConstraint(0));
- if (config.grab(maxiterP)) {
+ maxiterP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_INT);
+ if(config.grab(maxiterP)) {
maxiter = maxiterP.intValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyGeneratedInitialMeans.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyGeneratedInitialMeans.java
index ee90e0dc..1329132e 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyGeneratedInitialMeans.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyGeneratedInitialMeans.java
@@ -60,7 +60,7 @@ public class RandomlyGeneratedInitialMeans<V extends NumberVector<?>> extends Ab
NumberVector.Factory<V, ?> factory = RelationUtil.getNumberVectorFactory(relation);
Pair<V, V> minmax = DatabaseUtil.computeMinMax(relation);
List<V> means = new ArrayList<>(k);
- final Random random = rnd.getRandom();
+ final Random random = rnd.getSingleThreadedRandom();
for(int i = 0; i < k; i++) {
double[] r = MathUtil.randomDoubleArray(dim, random);
// Rescale
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/SampleKMeansInitialization.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/SampleKMeansInitialization.java
index 9f0a1923..79013364 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/SampleKMeansInitialization.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/SampleKMeansInitialization.java
@@ -93,7 +93,7 @@ public class SampleKMeansInitialization<V extends NumberVector<?>, D extends Dis
Clustering<? extends MeanModel<V>> clusters = innerkMeans.run(proxydb, proxyv);
List<V> means = new ArrayList<>();
for (Cluster<? extends MeanModel<V>> cluster : clusters.getAllClusters()) {
- means.add((V) cluster.getModel().getMean());
+ means.add(cluster.getModel().getMean());
}
return means;
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/package-info.java
index ed9a528d..1be19bd1 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/package-info.java
@@ -1,4 +1,27 @@
/**
* Quality measures for k-Means results.
*/
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.quality; \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/onedimensional/KNNKernelDensityMinimaClustering.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/onedimensional/KNNKernelDensityMinimaClustering.java
new file mode 100644
index 00000000..55114f7d
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/onedimensional/KNNKernelDensityMinimaClustering.java
@@ -0,0 +1,384 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.onedimensional;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm;
+import de.lmu.ifi.dbs.elki.data.Cluster;
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.VectorUtil;
+import de.lmu.ifi.dbs.elki.data.model.ClusterModel;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.StepProgress;
+import de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions.EpanechnikovKernelDensityFunction;
+import de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions.KernelDensityFunction;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.QuickSelect;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.EnumParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+
+/**
+ * Cluster one-dimensional data by splitting the data set on local minima after
+ * performing kernel density estimation.
+ *
+ * @author Erich Schubert
+ */
+public class KNNKernelDensityMinimaClustering<V extends NumberVector<?>> extends AbstractAlgorithm<Clustering<ClusterModel>> implements ClusteringAlgorithm<Clustering<ClusterModel>> {
+ /**
+ * Class logger.
+ */
+ private static final Logging LOG = Logging.getLogger(KNNKernelDensityMinimaClustering.class);
+
+ /**
+ * Estimation mode.
+ *
+ * @apiviz.exclude
+ */
+ public static enum Mode {
+ BALLOON, // Balloon estimator
+ SAMPLE, // Sample-point estimator
+ }
+
+ /**
+ * Dimension to use for clustering.
+ */
+ protected int dim;
+
+ /**
+ * Kernel density function.
+ */
+ protected KernelDensityFunction kernel;
+
+ /**
+ * Estimation modes.
+ */
+ protected Mode mode;
+
+ /**
+ * Number of neighbors to use for bandwidth.
+ */
+ protected int k;
+
+ /**
+ * Window width, for local minima criterions.
+ */
+ protected int minwindow;
+
+ /**
+ * Constructor.
+ *
+ * @param dim Dimension to use for clustering
+ * @param kernel Kernel function
+ * @param mode Bandwidth mode
+ * @param k Number of neighbors
+ * @param minwindow Window size for comparison
+ */
+ public KNNKernelDensityMinimaClustering(int dim, KernelDensityFunction kernel, Mode mode, int k, int minwindow) {
+ super();
+ this.dim = dim;
+ this.kernel = kernel;
+ this.mode = mode;
+ this.k = k;
+ this.minwindow = minwindow;
+ }
+
+ /**
+ * Run the clustering algorithm on a data relation.
+ *
+ * @param relation Relation
+ * @return Clustering result
+ */
+ public Clustering<ClusterModel> run(Relation<V> relation) {
+ ArrayModifiableDBIDs ids = DBIDUtil.newArray(relation.getDBIDs());
+ final int size = ids.size();
+
+ // Sort by the sole dimension
+ ids.sort(new VectorUtil.SortDBIDsBySingleDimension(relation, dim));
+
+ // Density storage.
+ WritableDoubleDataStore density = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, 0.);
+
+ DBIDArrayIter iter = ids.iter(), iter2 = ids.iter();
+
+ StepProgress sprog = LOG.isVerbose() ? new StepProgress("Clustering steps", 2) : null;
+
+ if(sprog != null) {
+ sprog.beginStep(1, "Kernel density estimation.", LOG);
+ }
+ {
+ double[] scratch = new double[2 * k];
+ iter.seek(0);
+ for(int i = 0; i < size; i++, iter.advance()) {
+ // Current value.
+ final double curv = relation.get(iter).doubleValue(dim);
+
+ final int pre = Math.max(i - k, 0), prek = i - pre;
+ final int pos = Math.min(i + k, size - 1), posk = pos - i;
+ iter2.seek(pre);
+ for(int j = 0; j < prek; j++, iter2.advance()) {
+ scratch[j] = curv - relation.get(iter2).doubleValue(dim);
+ }
+ assert (iter2.getOffset() == i);
+ iter2.advance();
+ for(int j = 0; j < posk; j++, iter2.advance()) {
+ scratch[prek + j] = relation.get(iter2).doubleValue(dim) - curv;
+ }
+
+ assert (prek + posk >= k);
+ double kdist = QuickSelect.quickSelect(scratch, 0, prek + posk, k);
+ switch(mode){
+ case BALLOON: {
+ double dens = 0.;
+ if(kdist > 0.) {
+ for(int j = 0; j < prek + posk; j++) {
+ dens += kernel.density(scratch[j] / kdist);
+ }
+ }
+ else {
+ dens = Double.POSITIVE_INFINITY;
+ }
+ assert (iter.getOffset() == i);
+ density.putDouble(iter, dens);
+ break;
+ }
+ case SAMPLE: {
+ if(kdist > 0.) {
+ iter2.seek(pre);
+ for(int j = 0; j < prek; j++, iter2.advance()) {
+ double delta = curv - relation.get(iter2).doubleValue(dim);
+ density.putDouble(iter2, density.doubleValue(iter2) + kernel.density(delta / kdist));
+ }
+ assert (iter2.getOffset() == i);
+ iter2.advance();
+ for(int j = 0; j < posk; j++, iter2.advance()) {
+ double delta = relation.get(iter2).doubleValue(dim) - curv;
+ density.putDouble(iter2, density.doubleValue(iter2) + kernel.density(delta / kdist));
+ }
+ }
+ else {
+ iter2.seek(pre);
+ for(int j = 0; j < prek; j++, iter2.advance()) {
+ double delta = curv - relation.get(iter2).doubleValue(dim);
+ if(!(delta > 0.)) {
+ density.putDouble(iter2, Double.POSITIVE_INFINITY);
+ }
+ }
+ assert (iter2.getOffset() == i);
+ iter2.advance();
+ for(int j = 0; j < posk; j++, iter2.advance()) {
+ double delta = relation.get(iter2).doubleValue(dim) - curv;
+ if(!(delta > 0.)) {
+ density.putDouble(iter2, Double.POSITIVE_INFINITY);
+ }
+ }
+ }
+ break;
+ }
+ default:
+ throw new UnsupportedOperationException("Unknown mode specified.");
+ }
+ }
+ }
+
+ if(sprog != null) {
+ sprog.beginStep(2, "Local minima detection.", LOG);
+ }
+ Clustering<ClusterModel> clustering = new Clustering<>("onedimensional-kde-clustering", "One-Dimensional clustering using kernel density estimation.");
+ {
+ double[] scratch = new double[2 * minwindow + 1];
+ int begin = 0;
+ int halfw = (minwindow + 1) >> 1;
+ iter.seek(0);
+ // Fill initial buffer.
+ for(int i = 0; i < size; i++, iter.advance()) {
+ final int m = i % scratch.length, t = (i - minwindow - 1) % scratch.length;
+ scratch[m] = density.doubleValue(iter);
+ if(i > scratch.length) {
+ double min = Double.POSITIVE_INFINITY;
+ for(int j = 0; j < scratch.length; j++) {
+ if(j != t && scratch[j] < min) {
+ min = scratch[j];
+ }
+ }
+ // Local minimum:
+ if(scratch[t] < min) {
+ int end = i - minwindow + 1;
+ { // Test on which side the kNN is
+ iter2.seek(end);
+ double curv = relation.get(iter2).doubleValue(dim);
+ iter2.seek(end - halfw);
+ double left = relation.get(iter2).doubleValue(dim) - curv;
+ iter2.seek(end + halfw);
+ double right = curv - relation.get(iter2).doubleValue(dim);
+ if(left < right) {
+ end++;
+ }
+ }
+ iter2.seek(begin);
+ ArrayModifiableDBIDs cids = DBIDUtil.newArray(end - begin);
+ for(int j = 0; j < end - begin; j++, iter2.advance()) {
+ cids.add(iter2);
+ }
+ clustering.addToplevelCluster(new Cluster<>(cids, ClusterModel.CLUSTER));
+ begin = end;
+ }
+ }
+ }
+ // Extract last cluster
+ int end = size;
+ iter2.seek(begin);
+ ArrayModifiableDBIDs cids = DBIDUtil.newArray(end - begin);
+ for(int j = 0; j < end - begin; j++, iter2.advance()) {
+ cids.add(iter2);
+ }
+ clustering.addToplevelCluster(new Cluster<>(cids, ClusterModel.CLUSTER));
+ }
+
+ if(sprog != null) {
+ sprog.setCompleted(LOG);
+ }
+ return clustering;
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(new VectorFieldTypeInformation<>(NumberVector.class, dim + 1, Integer.MAX_VALUE));
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
+ /**
+ * Dimension to use for clustering.
+ */
+ public static final OptionID DIM_ID = new OptionID("kernelcluster.dim", "Dimension to use for clustering. For one-dimensional data, use 0.");
+
+ /**
+ * Kernel function.
+ */
+ public static final OptionID KERNEL_ID = new OptionID("kernelcluster.kernel", "Kernel function for density estimation.");
+
+ /**
+ * KDE mode.
+ */
+ public static final OptionID MODE_ID = new OptionID("kernelcluster.mode", "Kernel density estimation mode (baloon estimator vs. sample point estimator).");
+
+ /**
+ * Number of neighbors for bandwidth estimation.
+ */
+ public static final OptionID K_ID = new OptionID("kernelcluster.knn", "Number of nearest neighbors to use for bandwidth estimation.");
+
+ /**
+ * Half window width to find local minima.
+ */
+ public static final OptionID WINDOW_ID = new OptionID("kernelcluster.window", "Half width of sliding window to find local minima.");
+
+ /**
+ * Dimension to use for clustering.
+ */
+ protected int dim;
+
+ /**
+ * Kernel density function.
+ */
+ protected KernelDensityFunction kernel;
+
+ /**
+ * Estimation modes.
+ */
+ protected Mode mode;
+
+ /**
+ * Number of neighbors to use for bandwidth.
+ */
+ protected int k;
+
+ /**
+ * Window width, for local minima criterions.
+ */
+ protected int minwindow;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ IntParameter dimP = new IntParameter(DIM_ID, 0);
+ dimP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_INT);
+ if(config.grab(dimP)) {
+ dim = dimP.intValue();
+ }
+
+ ObjectParameter<KernelDensityFunction> kernelP = new ObjectParameter<>(KERNEL_ID, KernelDensityFunction.class, EpanechnikovKernelDensityFunction.class);
+ if(config.grab(kernelP)) {
+ kernel = kernelP.instantiateClass(config);
+ }
+
+ EnumParameter<Mode> modeP = new EnumParameter<>(MODE_ID, Mode.class, Mode.BALLOON);
+ if(config.grab(modeP)) {
+ mode = modeP.getValue();
+ }
+
+ IntParameter kP = new IntParameter(K_ID);
+ kP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
+ if(config.grab(kP)) {
+ k = kP.intValue();
+ }
+
+ IntParameter windowP = new IntParameter(WINDOW_ID);
+ windowP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
+ if(config.grab(windowP)) {
+ minwindow = windowP.intValue();
+ }
+ }
+
+ @Override
+ protected KNNKernelDensityMinimaClustering<V> makeInstance() {
+ return new KNNKernelDensityMinimaClustering<>(dim, kernel, mode, k, minwindow);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/onedimensional/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/onedimensional/package-info.java
new file mode 100644
index 00000000..c6c55244
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/onedimensional/package-info.java
@@ -0,0 +1,27 @@
+/**
+ * Clustering algorithms for one-dimensional data.
+ */
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package de.lmu.ifi.dbs.elki.algorithm.clustering.onedimensional; \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/CLIQUE.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/CLIQUE.java
index db026e93..617d74cd 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/CLIQUE.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/CLIQUE.java
@@ -56,8 +56,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag;
@@ -594,14 +593,14 @@ public class CLIQUE<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
IntParameter xsiP = new IntParameter(XSI_ID);
- xsiP.addConstraint(new GreaterConstraint(0));
+ xsiP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
if(config.grab(xsiP)) {
xsi = xsiP.intValue();
}
DoubleParameter tauP = new DoubleParameter(TAU_ID);
- tauP.addConstraint(new GreaterConstraint(0));
- tauP.addConstraint(new LessConstraint(1));
+ tauP.addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE);
+ tauP.addConstraint(CommonConstraints.LESS_THAN_ONE_DOUBLE);
if(config.grab(tauP)) {
tau = tauP.doubleValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DOC.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DOC.java
new file mode 100644
index 00000000..5f798a66
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DOC.java
@@ -0,0 +1,605 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.subspace;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.BitSet;
+import java.util.Random;
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
+import de.lmu.ifi.dbs.elki.data.Cluster;
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.Subspace;
+import de.lmu.ifi.dbs.elki.data.model.SubspaceModel;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDVar;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
+import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceMaximumDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
+
+/**
+ * <p>
+ * Provides the DOC algorithm, and it's heuristic variant, FastDOC. DOC is a
+ * sampling based subspace clustering algorithm.
+ * </p>
+ *
+ * <p>
+ * Reference: <br/>
+ * C. M. Procopiuc, M. Jones, P. K. Agarwal, T. M. Murali<br />
+ * A Monte Carlo algorithm for fast projective clustering. <br/>
+ * In: Proc. ACM SIGMOD Int. Conf. on Management of Data (SIGMOD '02).
+ * </p>
+ *
+ * @author Florian Nuecke
+ *
+ * @apiviz.has SubspaceModel
+ *
+ * @param <V> the type of NumberVector handled by this Algorithm.
+ */
+@Title("DOC: Density-based Optimal projective Clustering")
+@Reference(authors = "C. M. Procopiuc, M. Jones, P. K. Agarwal, T. M. Murali", title = "A Monte Carlo algorithm for fast projective clustering", booktitle = "Proc. ACM SIGMOD Int. Conf. on Management of Data (SIGMOD '02)", url = "http://dx.doi.org/10.1145/564691.564739")
+public class DOC<V extends NumberVector<?>> extends AbstractAlgorithm<Clustering<SubspaceModel<V>>> implements SubspaceClusteringAlgorithm<SubspaceModel<V>> {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging LOG = Logging.getLogger(DOC.class);
+
+ /**
+ * Relative density threshold parameter alpha.
+ */
+ private double alpha;
+
+ /**
+ * Balancing parameter for importance of points vs. dimensions
+ */
+ private double beta;
+
+ /**
+ * Half width parameter.
+ */
+ private double w;
+
+ /**
+ * Holds the value of {@link Parameterizer#HEURISTICS_ID}.
+ */
+ private boolean heuristics;
+
+ /**
+ * Holds the value of {@link Parameterizer#D_ZERO_ID}.
+ */
+ private int d_zero;
+
+ /**
+ * Randomizer used internally for sampling points.
+ */
+ private RandomFactory rnd;
+
+ /**
+ * Constructor.
+ *
+ * @param alpha &alpha; relative density threshold.
+ * @param beta &beta; balancing parameter for size vs. dimensionality.
+ * @param w <em>w</em> half width parameter.
+ * @param heuristics whether to use heuristics (FastDOC) or not.
+ * @param random Random factory
+ */
+ public DOC(double alpha, double beta, double w, boolean heuristics, int d_zero, RandomFactory random) {
+ this.alpha = alpha;
+ this.beta = beta;
+ this.w = w;
+ this.heuristics = heuristics;
+ this.d_zero = d_zero;
+ this.rnd = random;
+ }
+
+ /**
+ * Performs the DOC or FastDOC (as configured) algorithm on the given
+ * Database.
+ *
+ * <p>
+ * This will run exhaustively, i.e. run DOC until no clusters are found
+ * anymore / the database size has shrunk below the threshold for minimum
+ * cluster size.
+ * </p>
+ *
+ * @param database Database
+ * @param relation Data relation
+ */
+ public Clustering<SubspaceModel<V>> run(Database database, Relation<V> relation) {
+ // Dimensionality of our set.
+ final int d = RelationUtil.dimensionality(relation);
+
+ // Get available DBIDs as a set we can remove items from.
+ ArrayModifiableDBIDs S = DBIDUtil.newArray(relation.getDBIDs());
+
+ // Precompute values as described in Figure 2.
+ double r = Math.abs(Math.log(d + d) / Math.log(beta * .5));
+ // Outer loop count.
+ int n = (int) (2. / alpha);
+ // Inner loop count.
+ int m = (int) (Math.pow(2. / alpha, r) * Math.log(4));
+ if(heuristics) {
+ m = Math.min(m, Math.min(1000000, d * d));
+ }
+
+ // Minimum size for a cluster for it to be accepted.
+ int minClusterSize = (int) (alpha * S.size());
+
+ // List of all clusters we found.
+ Clustering<SubspaceModel<V>> result = new Clustering<>("DOC Clusters", "DOC");
+
+ // Inform the user about the number of actual clusters found so far.
+ IndefiniteProgress cprogress = LOG.isVerbose() ? new IndefiniteProgress("Number of clusters", LOG) : null;
+
+ // To not only find a single cluster, we continue running until our set
+ // of points is empty.
+ while(S.size() > minClusterSize) {
+ Cluster<SubspaceModel<V>> C;
+ if(heuristics) {
+ C = runFastDOC(relation, S, d, n, m, (int) r);
+ }
+ else {
+ C = runDOC(relation, S, d, n, m, (int) r, minClusterSize);
+ }
+
+ if(C == null) {
+ // Stop trying if we couldn't find a cluster.
+ break;
+ }
+ // Found a cluster, remember it, remove its points from the set.
+ result.addToplevelCluster(C);
+
+ // Remove all points of the cluster from the set and continue.
+ S.removeDBIDs(C.getIDs());
+
+ if(cprogress != null) {
+ cprogress.setProcessed(result.getAllClusters().size(), LOG);
+ }
+ }
+
+ // Add the remainder as noise.
+ if(S.size() > 0) {
+ BitSet alldims = new BitSet();
+ alldims.set(0, d);
+ result.addToplevelCluster(new Cluster<>(S, true, new SubspaceModel<>(new Subspace(alldims), Centroid.make(relation, S).toVector(relation))));
+ }
+
+ if(cprogress != null) {
+ cprogress.setCompleted(LOG);
+ }
+
+ return result;
+ }
+
+ /**
+ * Performs a single run of DOC, finding a single cluster.
+ *
+ * @param relation used to get actual values for DBIDs.
+ * @param S The set of points we're working on.
+ * @param d Dimensionality of the data set we're currently working on.
+ * @param r Size of random samples.
+ * @param m Number of inner iterations (per seed point).
+ * @param n Number of outer iterations (seed points).
+ * @param minClusterSize Minimum size a cluster must have to be accepted.
+ * @return a cluster, if one is found, else <code>null</code>.
+ */
+ private Cluster<SubspaceModel<V>> runDOC(Relation<V> relation, ArrayModifiableDBIDs S, final int d, int n, int m, int r, int minClusterSize) {
+ final DoubleDistance wd = new DoubleDistance(w);
+ // Best cluster for the current run.
+ DBIDs C = null;
+ // Relevant attributes for the best cluster.
+ BitSet D = null;
+ // Quality of the best cluster.
+ double quality = Double.NEGATIVE_INFINITY;
+
+ // Bounds for our cluster.
+ // ModifiableHyperBoundingBox bounds = new ModifiableHyperBoundingBox(new
+ // double[d], new double[d]);
+
+ // Weights for distance (= rectangle query)
+ SubspaceMaximumDistanceFunction df = new SubspaceMaximumDistanceFunction(new BitSet(d));
+ DistanceQuery<V, DoubleDistance> dq = relation.getDatabase().getDistanceQuery(relation, df);
+ RangeQuery<V, DoubleDistance> rq = relation.getDatabase().getRangeQuery(dq);
+
+ // Inform the user about the progress in the current iteration.
+ FiniteProgress iprogress = LOG.isVerbose() ? new FiniteProgress("Iteration progress for current cluster", m * n, LOG) : null;
+
+ Random random = rnd.getSingleThreadedRandom();
+ DBIDArrayIter iter = S.iter();
+
+ for(int i = 0; i < n; ++i) {
+ // Pick a random seed point.
+ iter.seek(random.nextInt(S.size()));
+
+ for(int j = 0; j < m; ++j) {
+ // Choose a set of random points.
+ DBIDs randomSet = DBIDUtil.randomSample(S, Math.min(S.size(), r), random);
+
+ // Initialize cluster info.
+ BitSet nD = new BitSet(d);
+
+ // Test each dimension and build bounding box.
+ for(int k = 0; k < d; ++k) {
+ if(dimensionIsRelevant(k, relation, randomSet)) {
+ nD.set(k);
+ }
+ }
+ if(nD.cardinality() > 0) {
+ // Get all points in the box.
+ df.setSelectedDimensions(nD);
+ // TODO: add filtering capabilities into query API!
+ DBIDs nC = DBIDUtil.intersection(S, rq.getRangeForDBID(iter, wd));
+
+ if(LOG.isDebuggingFiner()) {
+ LOG.finer("Testing a cluster candidate, |C| = " + nC.size() + ", |D| = " + nD.cardinality());
+ }
+
+ // Is the cluster large enough?
+ if(nC.size() < minClusterSize) {
+ // Too small.
+ if(LOG.isDebuggingFiner()) {
+ LOG.finer("... but it's too small.");
+ }
+ }
+ else {
+ // Better cluster than before?
+ double nQuality = computeClusterQuality(nC.size(), nD.cardinality());
+ if(nQuality > quality) {
+ if(LOG.isDebuggingFiner()) {
+ LOG.finer("... and it's the best so far: " + nQuality + " vs. " + quality);
+ }
+ C = nC;
+ D = nD;
+ quality = nQuality;
+ }
+ else {
+ if(LOG.isDebuggingFiner()) {
+ LOG.finer("... but we already have a better one.");
+ }
+ }
+ }
+ }
+
+ if(iprogress != null) {
+ iprogress.incrementProcessed(LOG);
+ }
+ }
+ }
+
+ if(iprogress != null) {
+ iprogress.ensureCompleted(LOG);
+ }
+
+ if(C != null) {
+ return makeCluster(relation, C, D);
+ }
+ else {
+ return null;
+ }
+ }
+
+ /**
+ * Performs a single run of FastDOC, finding a single cluster.
+ *
+ * @param relation used to get actual values for DBIDs.
+ * @param S The set of points we're working on.
+ * @param d Dimensionality of the data set we're currently working on.
+ * @param r Size of random samples.
+ * @param m Number of inner iterations (per seed point).
+ * @param n Number of outer iterations (seed points).
+ * @return a cluster, if one is found, else <code>null</code>.
+ */
+ private Cluster<SubspaceModel<V>> runFastDOC(Relation<V> relation, ArrayModifiableDBIDs S, int d, int n, int m, int r) {
+ // Relevant attributes of highest cardinality.
+ BitSet D = null;
+ // The seed point for the best dimensions.
+ DBIDVar dV = DBIDUtil.newVar();
+
+ // Inform the user about the progress in the current iteration.
+ FiniteProgress iprogress = LOG.isVerbose() ? new FiniteProgress("Iteration progress for current cluster", m * n, LOG) : null;
+
+ Random random = rnd.getSingleThreadedRandom();
+
+ DBIDArrayIter iter = S.iter();
+ outer: for(int i = 0; i < n; ++i) {
+ // Pick a random seed point.
+ iter.seek(random.nextInt(S.size()));
+
+ for(int j = 0; j < m; ++j) {
+ // Choose a set of random points.
+ DBIDs randomSet = DBIDUtil.randomSample(S, Math.min(S.size(), r), random);
+
+ // Initialize cluster info.
+ BitSet nD = new BitSet(d);
+
+ // Test each dimension.
+ for(int k = 0; k < d; ++k) {
+ if(dimensionIsRelevant(k, relation, randomSet)) {
+ nD.set(k);
+ }
+ }
+
+ if(D == null || nD.cardinality() > D.cardinality()) {
+ D = nD;
+ dV.set(iter);
+
+ if(D.cardinality() >= d_zero) {
+ if(iprogress != null) {
+ iprogress.setProcessed(iprogress.getTotal(), LOG);
+ }
+ break outer;
+ }
+ }
+
+ if(iprogress != null) {
+ iprogress.incrementProcessed(LOG);
+ }
+ }
+ }
+
+ if(iprogress != null) {
+ iprogress.ensureCompleted(LOG);
+ }
+
+ // If no relevant dimensions were found, skip it.
+ if(D == null || D.cardinality() == 0) {
+ return null;
+ }
+
+ // Get all points in the box.
+ SubspaceMaximumDistanceFunction df = new SubspaceMaximumDistanceFunction(D);
+ DistanceQuery<V, DoubleDistance> dq = relation.getDatabase().getDistanceQuery(relation, df);
+ RangeQuery<V, DoubleDistance> rq = relation.getDatabase().getRangeQuery(dq, DatabaseQuery.HINT_SINGLE);
+
+ // TODO: add filtering capabilities into query API!
+ DBIDs C = DBIDUtil.intersection(S, rq.getRangeForDBID(dV, new DoubleDistance(w)));
+
+ // If we have a non-empty cluster, return it.
+ if(C.size() > 0) {
+ return makeCluster(relation, C, D);
+ }
+ else {
+ return null;
+ }
+ }
+
+ /**
+ * Utility method to test if a given dimension is relevant as determined via a
+ * set of reference points (i.e. if the variance along the attribute is lower
+ * than the threshold).
+ *
+ * @param dimension the dimension to test.
+ * @param relation used to get actual values for DBIDs.
+ * @param points the points to test.
+ * @return <code>true</code> if the dimension is relevant.
+ */
+ private boolean dimensionIsRelevant(int dimension, Relation<V> relation, DBIDs points) {
+ double min = Double.POSITIVE_INFINITY;
+ double max = Double.NEGATIVE_INFINITY;
+ for(DBIDIter iter = points.iter(); iter.valid(); iter.advance()) {
+ V xV = relation.get(iter);
+ min = Math.min(min, xV.doubleValue(dimension));
+ max = Math.max(max, xV.doubleValue(dimension));
+ if(max - min > w) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /**
+ * Utility method to create a subspace cluster from a list of DBIDs and the
+ * relevant attributes.
+ *
+ * @param relation to compute a centroid.
+ * @param C the cluster points.
+ * @param D the relevant dimensions.
+ * @return an object representing the subspace cluster.
+ */
+ private Cluster<SubspaceModel<V>> makeCluster(Relation<V> relation, DBIDs C, BitSet D) {
+ DBIDs ids = DBIDUtil.newHashSet(C); // copy, also to lose distance values!
+ Cluster<SubspaceModel<V>> cluster = new Cluster<>(ids);
+ cluster.setModel(new SubspaceModel<>(new Subspace(D), Centroid.make(relation, ids).toVector(relation)));
+ return cluster;
+ }
+
+ /**
+ * Computes the quality of a cluster based on its size and number of relevant
+ * attributes, as described via the &mu;-function from the paper.
+ *
+ * @param clusterSize the size of the cluster.
+ * @param numRelevantDimensions the number of dimensions relevant to the
+ * cluster.
+ * @return a quality measure (only use this to compare the quality to that
+ * other clusters).
+ */
+ private double computeClusterQuality(int clusterSize, int numRelevantDimensions) {
+ return clusterSize * Math.pow(1. / beta, numRelevantDimensions);
+ }
+
+ // ---------------------------------------------------------------------- //
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(TypeUtil.NUMBER_VECTOR_FIELD);
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Florian Nuecke
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
+ /**
+ * Relative density threshold parameter Alpha.
+ */
+ public static final OptionID ALPHA_ID = new OptionID("doc.alpha", "Minimum relative density for a set of points to be considered a cluster (|C|>=doc.alpha*|S|).");
+
+ /**
+ * Balancing parameter for importance of points vs. dimensions
+ */
+ public static final OptionID BETA_ID = new OptionID("doc.beta", "Preference of cluster size versus number of relevant dimensions (higher value means higher priority on larger clusters).");
+
+ /**
+ * Half width parameter.
+ */
+ public static final OptionID W_ID = new OptionID("doc.w", "Maximum extent of scattering of points along a single attribute for the attribute to be considered relevant.");
+
+ /**
+ * Parameter to enable FastDOC heuristics.
+ */
+ public static final OptionID HEURISTICS_ID = new OptionID("doc.fastdoc", "Use heuristics as described, thus using the FastDOC algorithm (not yet implemented).");
+
+ /**
+ * Stopping threshold for FastDOC.
+ */
+ public static final OptionID D_ZERO_ID = new OptionID("doc.d0", "Parameter for FastDOC, setting the number of relevant attributes which, when found for a cluster, are deemed enough to stop iterating.");
+
+ /**
+ * Random seeding parameter.
+ */
+ public static final OptionID RANDOM_ID = new OptionID("doc.random-seed", "Random seed, for reproducible experiments.");
+
+ /**
+ * Relative density threshold parameter Alpha.
+ */
+ protected double alpha;
+
+ /**
+ * Balancing parameter for importance of points vs. dimensions
+ */
+ protected double beta;
+
+ /**
+ * Half width parameter.
+ */
+ protected double w;
+
+ /**
+ * Parameter to enable FastDOC heuristics.
+ */
+ protected boolean heuristics;
+
+ /**
+ * Stopping threshold for FastDOC.
+ */
+ protected int d_zero;
+
+ /**
+ * Random seeding factory.
+ */
+ protected RandomFactory random = RandomFactory.DEFAULT;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ {
+ DoubleParameter param = new DoubleParameter(ALPHA_ID, 0.2);
+ param.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE);
+ param.addConstraint(CommonConstraints.LESS_EQUAL_ONE_DOUBLE);
+ if(config.grab(param)) {
+ alpha = param.getValue();
+ }
+ }
+
+ {
+ DoubleParameter param = new DoubleParameter(BETA_ID, 0.8);
+ param.addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE);
+ param.addConstraint(CommonConstraints.LESS_THAN_ONE_DOUBLE);
+ if(config.grab(param)) {
+ beta = param.getValue();
+ }
+ }
+
+ {
+ DoubleParameter param = new DoubleParameter(W_ID, 0.05);
+ param.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE);
+ if(config.grab(param)) {
+ w = param.getValue();
+ }
+ }
+
+ {
+ Flag param = new Flag(HEURISTICS_ID);
+ if(config.grab(param)) {
+ heuristics = param.getValue();
+ }
+ }
+
+ if(heuristics) {
+ IntParameter param = new IntParameter(D_ZERO_ID, 5);
+ param.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
+ if(config.grab(param)) {
+ d_zero = param.getValue();
+ }
+ }
+
+ {
+ RandomParameter param = new RandomParameter(RANDOM_ID);
+ if(config.grab(param)) {
+ random = param.getValue();
+ }
+ }
+ }
+
+ @Override
+ protected DOC<V> makeInstance() {
+ return new DOC<>(alpha, beta, w, heuristics, d_zero, random);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DiSH.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DiSH.java
index b17ebebb..cd5e51b8 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DiSH.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DiSH.java
@@ -69,8 +69,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ChainedParameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
@@ -170,12 +169,12 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
*/
public Clustering<SubspaceModel<V>> run(Database database, Relation<V> relation) {
// Instantiate DiSH distance (and thus run the preprocessor)
- if (LOG.isVerbose()) {
+ if(LOG.isVerbose()) {
LOG.verbose("*** Run DiSH preprocessor.");
}
DiSHDistanceFunction.Instance<V> dishDistanceQuery = dishDistance.instantiate(relation);
// Configure and run OPTICS.
- if (LOG.isVerbose()) {
+ if(LOG.isVerbose()) {
LOG.verbose("*** Run OPTICS algorithm.");
}
ListParameterization opticsconfig = new ListParameterization(opticsAlgorithmParameters);
@@ -186,7 +185,7 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
optics = opticsconfig.tryInstantiate(cls);
ClusterOrderResult<PreferenceVectorBasedCorrelationDistance> opticsResult = optics.run(database, relation);
- if (LOG.isVerbose()) {
+ if(LOG.isVerbose()) {
LOG.verbose("*** Compute Clusters.");
}
return computeClusters(relation, opticsResult, dishDistanceQuery);
@@ -206,10 +205,10 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
// extract clusters
Map<BitSet, List<Pair<BitSet, ArrayModifiableDBIDs>>> clustersMap = extractClusters(database, distFunc, clusterOrder);
- if (LOG.isVerbose()) {
+ if(LOG.isVerbose()) {
StringBuilder msg = new StringBuilder("Step 1: extract clusters");
- for (List<Pair<BitSet, ArrayModifiableDBIDs>> clusterList : clustersMap.values()) {
- for (Pair<BitSet, ArrayModifiableDBIDs> c : clusterList) {
+ for(List<Pair<BitSet, ArrayModifiableDBIDs>> clusterList : clustersMap.values()) {
+ for(Pair<BitSet, ArrayModifiableDBIDs> c : clusterList) {
msg.append('\n').append(FormatUtil.format(dimensionality, c.first)).append(" ids ").append(c.second.size());
}
}
@@ -218,10 +217,10 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
// check if there are clusters < minpts
checkClusters(database, distFunc, clustersMap, minpts);
- if (LOG.isVerbose()) {
+ if(LOG.isVerbose()) {
StringBuilder msg = new StringBuilder("Step 2: check clusters");
- for (List<Pair<BitSet, ArrayModifiableDBIDs>> clusterList : clustersMap.values()) {
- for (Pair<BitSet, ArrayModifiableDBIDs> c : clusterList) {
+ for(List<Pair<BitSet, ArrayModifiableDBIDs>> clusterList : clustersMap.values()) {
+ for(Pair<BitSet, ArrayModifiableDBIDs> c : clusterList) {
msg.append('\n').append(FormatUtil.format(dimensionality, c.first)).append(" ids ").append(c.second.size());
}
}
@@ -230,9 +229,9 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
// sort the clusters
List<Cluster<SubspaceModel<V>>> clusters = sortClusters(database, clustersMap);
- if (LOG.isVerbose()) {
+ if(LOG.isVerbose()) {
StringBuilder msg = new StringBuilder("Step 3: sort clusters");
- for (Cluster<SubspaceModel<V>> c : clusters) {
+ for(Cluster<SubspaceModel<V>> c : clusters) {
msg.append('\n').append(FormatUtil.format(dimensionality, c.getModel().getSubspace().getDimensions())).append(" ids ").append(c.size());
}
LOG.verbose(msg.toString());
@@ -241,14 +240,14 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
// build the hierarchy
Clustering<SubspaceModel<V>> clustering = new Clustering<>("DiSH clustering", "dish-clustering");
buildHierarchy(database, distFunc, clustering, clusters, dimensionality);
- if (LOG.isVerbose()) {
+ if(LOG.isVerbose()) {
StringBuilder msg = new StringBuilder("Step 4: build hierarchy");
- for (Cluster<SubspaceModel<V>> c : clusters) {
+ for(Cluster<SubspaceModel<V>> c : clusters) {
msg.append('\n').append(FormatUtil.format(dimensionality, c.getModel().getDimensions())).append(" ids ").append(c.size());
- for (Iter<Cluster<SubspaceModel<V>>> iter = clustering.getClusterHierarchy().iterParents(c); iter.valid(); iter.advance()) {
+ for(Iter<Cluster<SubspaceModel<V>>> iter = clustering.getClusterHierarchy().iterParents(c); iter.valid(); iter.advance()) {
msg.append("\n parent ").append(iter.get());
}
- for (Iter<Cluster<SubspaceModel<V>>> iter = clustering.getClusterHierarchy().iterChildren(c); iter.valid(); iter.advance()) {
+ for(Iter<Cluster<SubspaceModel<V>>> iter = clustering.getClusterHierarchy().iterChildren(c); iter.valid(); iter.advance()) {
msg.append("\n child ").append(iter.get());
}
}
@@ -256,8 +255,8 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
}
// build result
- for (Cluster<SubspaceModel<V>> c : clusters) {
- if (clustering.getClusterHierarchy().numParents(c) == 0) {
+ for(Cluster<SubspaceModel<V>> c : clusters) {
+ if(clustering.getClusterHierarchy().numParents(c) == 0) {
clustering.addToplevelCluster(c);
}
}
@@ -278,7 +277,7 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
Map<BitSet, List<Pair<BitSet, ArrayModifiableDBIDs>>> clustersMap = new HashMap<>();
Map<DBID, ClusterOrderEntry<PreferenceVectorBasedCorrelationDistance>> entryMap = new HashMap<>();
Map<DBID, Pair<BitSet, ArrayModifiableDBIDs>> entryToClusterMap = new HashMap<>();
- for (Iterator<ClusterOrderEntry<PreferenceVectorBasedCorrelationDistance>> it = clusterOrder.iterator(); it.hasNext();) {
+ for(Iterator<ClusterOrderEntry<PreferenceVectorBasedCorrelationDistance>> it = clusterOrder.iterator(); it.hasNext();) {
ClusterOrderEntry<PreferenceVectorBasedCorrelationDistance> entry = it.next();
entryMap.put(entry.getID(), entry);
@@ -287,43 +286,43 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
// get the list of (parallel) clusters for the preference vector
List<Pair<BitSet, ArrayModifiableDBIDs>> parallelClusters = clustersMap.get(preferenceVector);
- if (parallelClusters == null) {
+ if(parallelClusters == null) {
parallelClusters = new ArrayList<>();
clustersMap.put(preferenceVector, parallelClusters);
}
// look for the proper cluster
Pair<BitSet, ArrayModifiableDBIDs> cluster = null;
- for (Pair<BitSet, ArrayModifiableDBIDs> c : parallelClusters) {
+ for(Pair<BitSet, ArrayModifiableDBIDs> c : parallelClusters) {
V c_centroid = ProjectedCentroid.make(c.first, database, c.second).toVector(database);
PreferenceVectorBasedCorrelationDistance dist = distFunc.correlationDistance(object, c_centroid, preferenceVector, preferenceVector);
- if (dist.getCorrelationValue() == entry.getReachability().getCorrelationValue()) {
+ if(dist.getCorrelationValue() == entry.getReachability().getCorrelationValue()) {
double d = distFunc.weightedDistance(object, c_centroid, dist.getCommonPreferenceVector());
- if (d <= 2 * epsilon) {
+ if(d <= 2 * epsilon) {
cluster = c;
break;
}
}
}
- if (cluster == null) {
+ if(cluster == null) {
cluster = new Pair<>(preferenceVector, DBIDUtil.newArray());
parallelClusters.add(cluster);
}
cluster.second.add(entry.getID());
entryToClusterMap.put(entry.getID(), cluster);
- if (progress != null) {
+ if(progress != null) {
progress.setProcessed(++processed, LOG);
}
}
- if (progress != null) {
+ if(progress != null) {
progress.ensureCompleted(LOG);
}
- if (LOG.isDebuggingFiner()) {
+ if(LOG.isDebuggingFiner()) {
StringBuilder msg = new StringBuilder("Step 0");
- for (List<Pair<BitSet, ArrayModifiableDBIDs>> clusterList : clustersMap.values()) {
- for (Pair<BitSet, ArrayModifiableDBIDs> c : clusterList) {
+ for(List<Pair<BitSet, ArrayModifiableDBIDs>> clusterList : clustersMap.values()) {
+ for(Pair<BitSet, ArrayModifiableDBIDs> c : clusterList) {
msg.append('\n').append(FormatUtil.format(RelationUtil.dimensionality(database), c.first)).append(" ids ").append(c.second.size());
}
}
@@ -331,24 +330,24 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
}
// add the predecessor to the cluster
- for (BitSet pv : clustersMap.keySet()) {
+ for(BitSet pv : clustersMap.keySet()) {
List<Pair<BitSet, ArrayModifiableDBIDs>> parallelClusters = clustersMap.get(pv);
- for (Pair<BitSet, ArrayModifiableDBIDs> cluster : parallelClusters) {
- if (cluster.second.isEmpty()) {
+ for(Pair<BitSet, ArrayModifiableDBIDs> cluster : parallelClusters) {
+ if(cluster.second.isEmpty()) {
continue;
}
DBID firstID = cluster.second.get(0);
ClusterOrderEntry<PreferenceVectorBasedCorrelationDistance> entry = entryMap.get(firstID);
DBID predecessorID = entry.getPredecessorID();
- if (predecessorID == null) {
+ if(predecessorID == null) {
continue;
}
ClusterOrderEntry<PreferenceVectorBasedCorrelationDistance> predecessor = entryMap.get(predecessorID);
// parallel cluster
- if (predecessor.getReachability().getCommonPreferenceVector().equals(entry.getReachability().getCommonPreferenceVector())) {
+ if(predecessor.getReachability().getCommonPreferenceVector().equals(entry.getReachability().getCommonPreferenceVector())) {
continue;
}
- if (predecessor.getReachability().compareTo(entry.getReachability()) < 0) {
+ if(predecessor.getReachability().compareTo(entry.getReachability()) < 0) {
continue;
}
@@ -375,16 +374,17 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
final int db_dim = RelationUtil.dimensionality(database);
// int num = 1;
List<Cluster<SubspaceModel<V>>> clusters = new ArrayList<>();
- for (BitSet pv : clustersMap.keySet()) {
+ for(BitSet pv : clustersMap.keySet()) {
List<Pair<BitSet, ArrayModifiableDBIDs>> parallelClusters = clustersMap.get(pv);
- for (int i = 0; i < parallelClusters.size(); i++) {
+ for(int i = 0; i < parallelClusters.size(); i++) {
Pair<BitSet, ArrayModifiableDBIDs> c = parallelClusters.get(i);
Cluster<SubspaceModel<V>> cluster = new Cluster<>(c.second);
cluster.setModel(new SubspaceModel<>(new Subspace(c.first), Centroid.make(database, c.second).toVector(database)));
String subspace = FormatUtil.format(cluster.getModel().getSubspace().getDimensions(), db_dim, "");
- if (parallelClusters.size() > 1) {
+ if(parallelClusters.size() > 1) {
cluster.setName("Cluster_" + subspace + "_" + i);
- } else {
+ }
+ else {
cluster.setName("Cluster_" + subspace);
}
clusters.add(cluster);
@@ -417,11 +417,11 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
List<Pair<BitSet, ArrayModifiableDBIDs>> notAssigned = new ArrayList<>();
Map<BitSet, List<Pair<BitSet, ArrayModifiableDBIDs>>> newClustersMap = new HashMap<>();
Pair<BitSet, ArrayModifiableDBIDs> noise = new Pair<>(new BitSet(), DBIDUtil.newArray());
- for (BitSet pv : clustersMap.keySet()) {
+ for(BitSet pv : clustersMap.keySet()) {
// noise
- if (pv.cardinality() == 0) {
+ if(pv.cardinality() == 0) {
List<Pair<BitSet, ArrayModifiableDBIDs>> parallelClusters = clustersMap.get(pv);
- for (Pair<BitSet, ArrayModifiableDBIDs> c : parallelClusters) {
+ for(Pair<BitSet, ArrayModifiableDBIDs> c : parallelClusters) {
noise.second.addDBIDs(c.second);
}
}
@@ -429,10 +429,11 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
else {
List<Pair<BitSet, ArrayModifiableDBIDs>> parallelClusters = clustersMap.get(pv);
List<Pair<BitSet, ArrayModifiableDBIDs>> newParallelClusters = new ArrayList<>(parallelClusters.size());
- for (Pair<BitSet, ArrayModifiableDBIDs> c : parallelClusters) {
- if (!pv.equals(new BitSet()) && c.second.size() < minpts) {
+ for(Pair<BitSet, ArrayModifiableDBIDs> c : parallelClusters) {
+ if(!pv.equals(new BitSet()) && c.second.size() < minpts) {
notAssigned.add(c);
- } else {
+ }
+ else {
newParallelClusters.add(c);
}
}
@@ -443,14 +444,15 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
clustersMap.clear();
clustersMap.putAll(newClustersMap);
- for (Pair<BitSet, ArrayModifiableDBIDs> c : notAssigned) {
- if (c.second.isEmpty()) {
+ for(Pair<BitSet, ArrayModifiableDBIDs> c : notAssigned) {
+ if(c.second.isEmpty()) {
continue;
}
Pair<BitSet, ArrayModifiableDBIDs> parent = findParent(database, distFunc, c, clustersMap);
- if (parent != null) {
+ if(parent != null) {
parent.second.addDBIDs(c.second);
- } else {
+ }
+ else {
noise.second.addDBIDs(c.second);
}
}
@@ -477,23 +479,23 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
BitSet childPV = child.first;
int childCardinality = childPV.cardinality();
- for (BitSet parentPV : clustersMap.keySet()) {
+ for(BitSet parentPV : clustersMap.keySet()) {
int parentCardinality = parentPV.cardinality();
- if (parentCardinality >= childCardinality) {
+ if(parentCardinality >= childCardinality) {
continue;
}
- if (resultCardinality != -1 && parentCardinality <= resultCardinality) {
+ if(resultCardinality != -1 && parentCardinality <= resultCardinality) {
continue;
}
BitSet pv = (BitSet) childPV.clone();
pv.and(parentPV);
- if (pv.equals(parentPV)) {
+ if(pv.equals(parentPV)) {
List<Pair<BitSet, ArrayModifiableDBIDs>> parentList = clustersMap.get(parentPV);
- for (Pair<BitSet, ArrayModifiableDBIDs> parent : parentList) {
+ for(Pair<BitSet, ArrayModifiableDBIDs> parent : parentList) {
V parent_centroid = ProjectedCentroid.make(parentPV, database, parent.second).toVector(database);
double d = distFunc.weightedDistance(child_centroid, parent_centroid, parentPV);
- if (d <= 2 * epsilon) {
+ if(d <= 2 * epsilon) {
result = parent;
resultCardinality = parentCardinality;
break;
@@ -519,57 +521,59 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
final int db_dim = RelationUtil.dimensionality(database);
Hierarchy<Cluster<SubspaceModel<V>>> hier = clustering.getClusterHierarchy();
- for (int i = 0; i < clusters.size() - 1; i++) {
+ for(int i = 0; i < clusters.size() - 1; i++) {
Cluster<SubspaceModel<V>> c_i = clusters.get(i);
int subspaceDim_i = dimensionality - c_i.getModel().getSubspace().dimensionality();
V ci_centroid = ProjectedCentroid.make(c_i.getModel().getDimensions(), database, c_i.getIDs()).toVector(database);
- for (int j = i + 1; j < clusters.size(); j++) {
+ for(int j = i + 1; j < clusters.size(); j++) {
Cluster<SubspaceModel<V>> c_j = clusters.get(j);
int subspaceDim_j = dimensionality - c_j.getModel().getSubspace().dimensionality();
- if (subspaceDim_i < subspaceDim_j) {
- if (LOG.isDebugging()) {
+ if(subspaceDim_i < subspaceDim_j) {
+ if(LOG.isDebugging()) {
msg.append("\n l_i=").append(subspaceDim_i).append(" pv_i=[").append(FormatUtil.format(db_dim, c_i.getModel().getSubspace().getDimensions())).append(']');
msg.append("\n l_j=").append(subspaceDim_j).append(" pv_j=[").append(FormatUtil.format(db_dim, c_j.getModel().getSubspace().getDimensions())).append(']');
}
// noise level reached
- if (c_j.getModel().getSubspace().dimensionality() == 0) {
+ if(c_j.getModel().getSubspace().dimensionality() == 0) {
// no parents exists -> parent is noise
- if (hier.numParents(c_i) == 0) {
+ if(hier.numParents(c_i) == 0) {
clustering.addChildCluster(c_j, c_i);
- if (LOG.isDebugging()) {
+ if(LOG.isDebugging()) {
msg.append("\n [").append(FormatUtil.format(db_dim, c_j.getModel().getSubspace().getDimensions()));
msg.append("] is parent of [").append(FormatUtil.format(db_dim, c_i.getModel().getSubspace().getDimensions()));
msg.append(']');
}
}
- } else {
+ }
+ else {
V cj_centroid = ProjectedCentroid.make(c_j.getModel().getDimensions(), database, c_j.getIDs()).toVector(database);
PreferenceVectorBasedCorrelationDistance distance = distFunc.correlationDistance(ci_centroid, cj_centroid, c_i.getModel().getSubspace().getDimensions(), c_j.getModel().getSubspace().getDimensions());
double d = distFunc.weightedDistance(ci_centroid, cj_centroid, distance.getCommonPreferenceVector());
- if (LOG.isDebugging()) {
+ if(LOG.isDebugging()) {
msg.append("\n dist = ").append(distance.getCorrelationValue());
}
- if (distance.getCorrelationValue() == subspaceDim_j) {
- if (LOG.isDebugging()) {
+ if(distance.getCorrelationValue() == subspaceDim_j) {
+ if(LOG.isDebugging()) {
msg.append("\n d = ").append(d);
}
- if (d <= 2 * epsilon) {
+ if(d <= 2 * epsilon) {
// no parent exists or c_j is not a parent of the already
// existing parents
- if (hier.numParents(c_i) == 0 || !isParent(database, distFunc, c_j, hier.iterParents(c_i))) {
+ if(hier.numParents(c_i) == 0 || !isParent(database, distFunc, c_j, hier.iterParents(c_i))) {
clustering.addChildCluster(c_j, c_i);
- if (LOG.isDebugging()) {
+ if(LOG.isDebugging()) {
msg.append("\n [").append(FormatUtil.format(db_dim, c_j.getModel().getSubspace().getDimensions()));
msg.append("] is parent of [");
msg.append(FormatUtil.format(db_dim, c_i.getModel().getSubspace().getDimensions()));
msg.append(']');
}
}
- } else {
+ }
+ else {
throw new RuntimeException("Should never happen: d = " + d);
}
}
@@ -577,7 +581,7 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
}
}
}
- if (LOG.isDebugging()) {
+ if(LOG.isDebugging()) {
LOG.debug(msg.toString());
}
}
@@ -599,11 +603,11 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
int dimensionality = RelationUtil.dimensionality(database);
int subspaceDim_parent = dimensionality - parent.getModel().getSubspace().dimensionality();
- for (; iter.valid(); iter.advance()) {
+ for(; iter.valid(); iter.advance()) {
Cluster<SubspaceModel<V>> child = iter.get();
V child_centroid = ProjectedCentroid.make(child.getModel().getDimensions(), database, child.getIDs()).toVector(database);
PreferenceVectorBasedCorrelationDistance distance = distFunc.correlationDistance(parent_centroid, child_centroid, parent.getModel().getSubspace().getDimensions(), child.getModel().getSubspace().getDimensions());
- if (distance.getCorrelationValue() == subspaceDim_parent) {
+ if(distance.getCorrelationValue() == subspaceDim_parent) {
return true;
}
}
@@ -642,14 +646,14 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
super.makeOptions(config);
DoubleParameter epsilonP = new DoubleParameter(EPSILON_ID, 0.001);
- epsilonP.addConstraint(new GreaterEqualConstraint(0));
- if (config.grab(epsilonP)) {
+ epsilonP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE);
+ if(config.grab(epsilonP)) {
epsilon = epsilonP.doubleValue();
}
IntParameter muP = new IntParameter(MU_ID, 1);
- muP.addConstraint(new GreaterConstraint(0));
- if (config.grab(muP)) {
+ muP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
+ if(config.grab(muP)) {
mu = muP.intValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/HiSC.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/HiSC.java
index 9ac7c072..3f135564 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/HiSC.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/HiSC.java
@@ -34,8 +34,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ChainedParameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
@@ -95,8 +94,8 @@ public class HiSC<V extends NumberVector<?>> extends OPTICS<V, PreferenceVectorB
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
DoubleParameter alphaP = new DoubleParameter(HiSCPreferenceVectorIndex.Factory.ALPHA_ID, HiSCPreferenceVectorIndex.Factory.DEFAULT_ALPHA);
- alphaP.addConstraint(new GreaterConstraint(0.0));
- alphaP.addConstraint(new LessConstraint(1.0));
+ alphaP.addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE);
+ alphaP.addConstraint(CommonConstraints.LESS_THAN_ONE_DOUBLE);
double alpha = 0.0;
if(config.grab(alphaP)) {
alpha = alphaP.doubleValue();
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/P3C.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/P3C.java
new file mode 100644
index 00000000..9d1ee94d
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/P3C.java
@@ -0,0 +1,1000 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.subspace;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.BitSet;
+import java.util.Iterator;
+import java.util.List;
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.clustering.EM;
+import de.lmu.ifi.dbs.elki.data.Cluster;
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.Subspace;
+import de.lmu.ifi.dbs.elki.data.VectorUtil;
+import de.lmu.ifi.dbs.elki.data.VectorUtil.SortDBIDsBySingleDimension;
+import de.lmu.ifi.dbs.elki.data.model.SubspaceModel;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDMIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.SetDBIDs;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.MutableProgress;
+import de.lmu.ifi.dbs.elki.logging.progress.StepProgress;
+import de.lmu.ifi.dbs.elki.math.MathUtil;
+import de.lmu.ifi.dbs.elki.math.MeanVariance;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.VMath;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
+import de.lmu.ifi.dbs.elki.math.statistics.distribution.ChiSquaredDistribution;
+import de.lmu.ifi.dbs.elki.math.statistics.distribution.PoissonDistribution;
+import de.lmu.ifi.dbs.elki.utilities.BitsUtil;
+import de.lmu.ifi.dbs.elki.utilities.FormatUtil;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+
+/**
+ * P3C: A Robust Projected Clustering Algorithm.
+ *
+ * <p>
+ * Reference: <br/>
+ * Gabriela Moise, Jörg Sander, Martin Ester<br />
+ * P3C: A Robust Projected Clustering Algorithm.<br/>
+ * In: Proc. Sixth International Conference on Data Mining (ICDM '06)
+ * </p>
+ *
+ * This is not a complete implementation of P3C, but good enough for most users.
+ * Improvements are welcome. The most obviously missing step is section 3.5 of
+ * P3C, where the cluster subspaces are refined.
+ *
+ * @author Florian Nuecke
+ * @author Erich Schubert
+ *
+ * @apiviz.uses EM
+ * @apiviz.has SubspaceModel
+ * @apiviz.has ClusterCandidate
+ * @apiviz.has Signature
+ *
+ * @param <V> the type of NumberVector handled by this Algorithm.
+ */
+@Title("P3C: A Robust Projected Clustering Algorithm.")
+@Reference(authors = "Gabriela Moise, Jörg Sander, Martin Ester", title = "P3C: A Robust Projected Clustering Algorithm", booktitle = "Proc. Sixth International Conference on Data Mining (ICDM '06)", url = "http://dx.doi.org/10.1109/ICDM.2006.123")
+public class P3C<V extends NumberVector<?>> extends AbstractAlgorithm<Clustering<SubspaceModel<V>>> implements SubspaceClusteringAlgorithm<SubspaceModel<V>> {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging LOG = Logging.getLogger(P3C.class);
+
+ /**
+ * Parameter for the Poisson test threshold.
+ */
+ protected double poissonThreshold;
+
+ /**
+ * Maximum number of iterations for the EM step.
+ */
+ protected int maxEmIterations;
+
+ /**
+ * Threshold when to stop EM iterations.
+ */
+ protected double emDelta;
+
+ /**
+ * Minimum cluster size for noise flagging. (Not existing in the original
+ * publication).
+ */
+ protected int minClusterSize;
+
+ /**
+ * Alpha threshold for testing.
+ */
+ protected double alpha = 0.001;
+
+ /**
+ * Constructor.
+ *
+ * @param alpha ChiSquared test threshold
+ * @param poissonThreshold Poisson test threshold
+ * @param maxEmIterations Maximum number of EM iterations
+ * @param emDelta EM stopping threshold
+ * @param minClusterSize Minimum cluster size
+ */
+ public P3C(double alpha, double poissonThreshold, int maxEmIterations, double emDelta, int minClusterSize) {
+ super();
+ this.alpha = alpha;
+ this.poissonThreshold = poissonThreshold;
+ this.maxEmIterations = maxEmIterations;
+ this.emDelta = emDelta;
+ this.minClusterSize = minClusterSize;
+ }
+
+ /**
+ * Performs the P3C algorithm on the given Database.
+ */
+ public Clustering<SubspaceModel<V>> run(Database database, Relation<V> relation) {
+ final int dim = RelationUtil.dimensionality(relation);
+
+ // Overall progress.
+ StepProgress stepProgress = LOG.isVerbose() ? new StepProgress(8) : null;
+
+ if(stepProgress != null) {
+ stepProgress.beginStep(1, "Grid-partitioning data.", LOG);
+ }
+
+ // Desired number of bins, as per Sturge:
+ final int binCount = (int) Math.ceil(1 + (Math.log(relation.size()) / MathUtil.LOG2));
+
+ // Perform 1-dimensional projections, and split into bins.
+ SetDBIDs[][] partitions = partitionData(relation, binCount);
+
+ if(stepProgress != null) {
+ stepProgress.beginStep(2, "Searching for non-uniform bins in support histograms.", LOG);
+ }
+
+ // Set markers for each attribute until they're all deemed uniform.
+ final long[][] markers = new long[dim][];
+ int numuniform = 0;
+ for(int d = 0; d < dim; d++) {
+ final SetDBIDs[] parts = partitions[d];
+ if(parts == null) {
+ continue; // Never mark any on constant dimensions.
+ }
+ final long[] marked = markers[d] = BitsUtil.zero(binCount);
+ int card = 0;
+ while(card < dim - 1) {
+ // Find bin with largest support, test only the dimensions that were not
+ // previously marked.
+ int bestBin = chiSquaredUniformTest(parts, marked, card);
+ if(bestBin < 0) {
+ numuniform++;
+ break; // Uniform
+ }
+ BitsUtil.setI(marked, bestBin);
+ card++;
+ }
+ if(LOG.isDebugging()) {
+ LOG.debug("Marked bins in dim " + d + ": " + BitsUtil.toString(marked, binCount));
+ }
+ }
+
+ if(stepProgress != null) {
+ stepProgress.beginStep(3, "Merging marked bins to 1-signatures.", LOG);
+ }
+
+ ArrayList<Signature> signatures = constructOneSignatures(partitions, markers);
+
+ if(stepProgress != null) {
+ stepProgress.beginStep(4, "Computing cluster cores from merged p-signatures.", LOG);
+ }
+
+ ArrayList<Signature> clusterCores = mergeClusterCores(binCount, signatures);
+
+ if(stepProgress != null) {
+ stepProgress.beginStep(5, "Pruning redundant cluster cores.", LOG);
+ }
+
+ clusterCores = pruneRedundantClusterCores(clusterCores);
+ if(LOG.isVerbose()) {
+ LOG.verbose("Number of cluster cores found: " + clusterCores.size());
+ }
+
+ if(clusterCores.size() == 0) {
+ stepProgress.setCompleted(LOG);
+ Clustering<SubspaceModel<V>> c = new Clustering<>("P3C", "P3C");
+ c.addToplevelCluster(new Cluster<SubspaceModel<V>>(relation.getDBIDs(), true));
+ return c;
+ }
+
+ if(stepProgress != null) {
+ stepProgress.beginStep(5, "Refining cluster cores to clusters via EM.", LOG);
+ }
+
+ // Track objects not assigned to any cluster:
+ ModifiableDBIDs noise = DBIDUtil.newHashSet();
+ WritableDataStore<double[]> probClusterIGivenX = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_SORTED, double[].class);
+ int k = clusterCores.size();
+ double[] clusterWeights = new double[k];
+ computeFuzzyMembership(relation, clusterCores, noise, probClusterIGivenX, clusterWeights);
+
+ // Initial estimate of covariances, to assign noise objects
+ Vector[] means = new Vector[k];
+ Matrix[] covarianceMatrices = new Matrix[k], invCovMatr = new Matrix[k];
+ final double norm = MathUtil.powi(MathUtil.TWOPI, dim);
+ double[] normDistrFactor = new double[k];
+ Arrays.fill(normDistrFactor, 1. / Math.sqrt(norm));
+ EM.recomputeCovarianceMatrices(relation, probClusterIGivenX, means, covarianceMatrices, dim);
+ EM.computeInverseMatrixes(covarianceMatrices, invCovMatr, normDistrFactor, norm);
+ assignUnassigned(relation, probClusterIGivenX, means, invCovMatr, clusterWeights, noise);
+
+ double emNew = EM.assignProbabilitiesToInstances(relation, normDistrFactor, means, invCovMatr, clusterWeights, probClusterIGivenX);
+ for(int it = 1; it <= maxEmIterations || maxEmIterations < 0; it++) {
+ final double emOld = emNew;
+ EM.recomputeCovarianceMatrices(relation, probClusterIGivenX, means, covarianceMatrices, dim);
+ EM.computeInverseMatrixes(covarianceMatrices, invCovMatr, normDistrFactor, norm);
+ // reassign probabilities
+ emNew = EM.assignProbabilitiesToInstances(relation, normDistrFactor, means, invCovMatr, clusterWeights, probClusterIGivenX);
+
+ if(LOG.isVerbose()) {
+ LOG.verbose("iteration " + it + " - expectation value: " + emNew);
+ }
+ if((emNew - emOld) <= emDelta) {
+ break;
+ }
+ }
+
+ // Perform EM clustering.
+
+ if(stepProgress != null) {
+ stepProgress.beginStep(6, "Generating hard clustering.", LOG);
+ }
+
+ // Create a hard clustering, making sure each data point only is part of one
+ // cluster, based on the best match from the membership matrix.
+ ArrayList<ClusterCandidate> clusterCandidates = hardClustering(probClusterIGivenX, clusterCores, relation.getDBIDs());
+
+ if(stepProgress != null) {
+ stepProgress.beginStep(7, "Looking for outliers and moving them to the noise set.", LOG);
+ }
+
+ // Outlier detection. Remove points from clusters that have a Mahalanobis
+ // distance larger than the critical value of the ChiSquare distribution.
+ findOutliers(relation, means, invCovMatr, clusterCandidates, dim - numuniform, noise);
+
+ if(stepProgress != null) {
+ stepProgress.beginStep(8, "Removing empty clusters.", LOG);
+ }
+
+ // Remove near-empty clusters.
+ for(Iterator<ClusterCandidate> it = clusterCandidates.iterator(); it.hasNext();) {
+ ClusterCandidate cand = it.next();
+ final int size = cand.ids.size();
+ if(size < minClusterSize) {
+ if(size > 0) {
+ noise.addDBIDs(cand.ids);
+ }
+ it.remove();
+ }
+ }
+
+ if(LOG.isVerbose()) {
+ LOG.verbose("Number of clusters remaining: " + clusterCandidates.size());
+ }
+
+ // TODO Check all attributes previously deemed uniform (section 3.5).
+
+ if(stepProgress != null) {
+ stepProgress.beginStep(9, "Generating final result.", LOG);
+ }
+
+ // Generate final output.
+ Clustering<SubspaceModel<V>> result = new Clustering<>("P3C", "P3C");
+ for(int cluster = 0; cluster < clusterCandidates.size(); ++cluster) {
+ ClusterCandidate candidate = clusterCandidates.get(cluster);
+ CovarianceMatrix cvm = CovarianceMatrix.make(relation, candidate.ids);
+ result.addToplevelCluster(new Cluster<>(candidate.ids, new SubspaceModel<>(new Subspace(candidate.dimensions), cvm.getMeanVector(relation))));
+ }
+ LOG.verbose("Noise size: " + noise.size());
+ if(noise.size() > 0) {
+ result.addToplevelCluster(new Cluster<SubspaceModel<V>>(noise, true));
+ }
+
+ if(stepProgress != null) {
+ stepProgress.ensureCompleted(LOG);
+ }
+
+ return result;
+ }
+
+ /**
+ * Construct the 1-signatures by merging adjacent dense bins.
+ *
+ * @param partitions Initial partitions.
+ * @param markers Markers for dense partitions.
+ * @return 1-signatures
+ */
+ private ArrayList<Signature> constructOneSignatures(SetDBIDs[][] partitions, final long[][] markers) {
+ final int dim = partitions.length;
+ // Generate projected p-signature intervals.
+ ArrayList<Signature> signatures = new ArrayList<>();
+ for(int d = 0; d < dim; d++) {
+ final DBIDs[] parts = partitions[d];
+ if(parts == null) {
+ continue; // Never mark any on constant dimensions.
+ }
+ final long[] marked = markers[d];
+ // Find sequences of 1s in marked.
+ for(int start = BitsUtil.nextSetBit(marked, 0); start >= 0;) {
+ int end = BitsUtil.nextClearBit(marked, start + 1);
+ end = (end == -1) ? dim : end;
+ int[] signature = new int[dim << 1];
+ Arrays.fill(signature, -1);
+ signature[d << 1] = start;
+ signature[(d << 1) + 1] = end - 1; // inclusive
+ HashSetModifiableDBIDs sids = unionDBIDs(parts, start, end /* exclusive */);
+ if(LOG.isDebugging()) {
+ LOG.debug("1-signature: " + d + " " + start + "-" + (end - 1));
+ }
+ signatures.add(new Signature(signature, sids));
+ start = (end < dim) ? BitsUtil.nextSetBit(marked, end + 1) : -1;
+ }
+ }
+ return signatures;
+ }
+
+ /**
+ * Merge 1-signatures into p-signatures.
+ *
+ * @param binCount Number of bins in each dimension.
+ * @param signatures 1-signatures
+ * @return p-signatures
+ */
+ private ArrayList<Signature> mergeClusterCores(final int binCount, ArrayList<Signature> signatures) {
+ MutableProgress mergeProgress = LOG.isVerbose() ? new MutableProgress("Merging signatures.", signatures.size(), LOG) : null;
+
+ // Annotate dimensions to 1-signatures for quick stopping.
+ int[] firstdim = new int[signatures.size()];
+ for(int i = 0; i < signatures.size(); i++) {
+ firstdim[i] = signatures.get(i).getFirstDim();
+ }
+ LOG.debug("First dimensions: " + FormatUtil.format(firstdim));
+
+ // Merge to (p+1)-signatures (cluster cores).
+ ArrayList<Signature> clusterCores = new ArrayList<>(signatures);
+ // Try adding merge 1-signature with each cluster core.
+ for(int i = 0; i < clusterCores.size(); i++) {
+ final Signature parent = clusterCores.get(i);
+ final int end = parent.getFirstDim();
+ for(int j = 0; j < signatures.size() && firstdim[j] < end; j++) {
+ final Signature onesig = signatures.get(j);
+ final Signature merge = mergeSignatures(parent, onesig, binCount);
+ if(merge != null) {
+ // We add each potential core to the list to allow remaining
+ // 1-signatures to try merging with this p-signature as well.
+ clusterCores.add(merge);
+ // Flag both "parents" for removal.
+ parent.prune = true;
+ onesig.prune = true;
+ }
+ }
+ if(mergeProgress != null) {
+ mergeProgress.setTotal(clusterCores.size());
+ mergeProgress.incrementProcessed(LOG);
+ }
+ }
+ if(mergeProgress != null) {
+ mergeProgress.setProcessed(mergeProgress.getTotal(), LOG);
+ }
+ return clusterCores;
+ }
+
+ private ArrayList<Signature> pruneRedundantClusterCores(ArrayList<Signature> clusterCores) {
+ // Prune cluster cores based on Definition 3, Condition 2.
+ ArrayList<Signature> retain = new ArrayList<>(clusterCores.size());
+ outer: for(Signature clusterCore : clusterCores) {
+ if(clusterCore.prune) {
+ continue;
+ }
+ for(int k = 0; k < clusterCores.size(); k++) {
+ Signature other = clusterCores.get(k);
+ if(other != clusterCore) {
+ if(other.isSuperset(clusterCore)) {
+ continue outer;
+ }
+ }
+ }
+ if(LOG.isDebugging()) {
+ LOG.debug("Retained cluster core: " + clusterCore);
+ }
+ retain.add(clusterCore);
+ }
+ clusterCores = retain;
+ return clusterCores;
+ }
+
+ /**
+ * Partition the data set into {@code bins} bins in each dimension
+ * <i>independently</i>.
+ *
+ * This can be used to construct a grid approximation of the data using O(d n)
+ * memory.
+ *
+ * When a dimension is found to be constant, it will not be partitioned, but
+ * instead the corresponding array will be set to {@code null}.
+ *
+ * @param relation Data relation to partition
+ * @param bins Number of bins
+ * @return Partitions of each dimension.
+ */
+ private SetDBIDs[][] partitionData(final Relation<V> relation, final int bins) {
+ final int dim = RelationUtil.dimensionality(relation);
+ SetDBIDs[][] partitions = new SetDBIDs[dim][bins];
+ ArrayModifiableDBIDs ids = DBIDUtil.newArray(relation.getDBIDs());
+ DBIDArrayIter iter = ids.iter(); // will be reused.
+ SortDBIDsBySingleDimension sorter = new VectorUtil.SortDBIDsBySingleDimension(relation, 0);
+ for(int d = 0; d < dim; d++) {
+ sorter.setDimension(d);
+ ids.sort(sorter);
+ // Minimum:
+ iter.seek(0);
+ double min = relation.get(iter).doubleValue(d);
+ // Extend:
+ iter.seek(ids.size() - 1);
+ double delta = (relation.get(iter).doubleValue(d) - min) / bins;
+ if(delta > 0.) {
+ SetDBIDs[] dimparts = partitions[d];
+ double split = min + delta;
+ HashSetModifiableDBIDs pids = DBIDUtil.newHashSet();
+ dimparts[0] = pids;
+ int i = 0;
+ for(iter.seek(0); iter.valid(); iter.advance()) {
+ final double v = relation.get(iter).doubleValue(d);
+ if(v <= split || i == dimparts.length - 1) {
+ pids.add(iter);
+ }
+ else {
+ i++;
+ split += delta;
+ pids = DBIDUtil.newHashSet();
+ dimparts[i] = pids;
+ }
+ }
+ for(++i; i < dimparts.length; ++i) {
+ dimparts[i] = pids;
+ }
+ }
+ else {
+ partitions[d] = null; // Flag whole dimension as bad
+ }
+ }
+ return partitions;
+ }
+
+ /**
+ * Compute the union of multiple DBID sets.
+ *
+ * @param parts Parts array
+ * @param start Array start index
+ * @param end Array end index (exclusive)
+ * @return
+ */
+ protected HashSetModifiableDBIDs unionDBIDs(final DBIDs[] parts, int start, int end) {
+ int sum = 0;
+ for(int i = start; i < end; i++) {
+ sum += parts[i].size();
+ }
+ HashSetModifiableDBIDs sids = DBIDUtil.newHashSet(sum);
+ for(int i = start; i < end; i++) {
+ sids.addDBIDs(parts[i]);
+ }
+ return sids;
+ }
+
+ /**
+ * Performs a ChiSquared test to determine whether an attribute has a uniform
+ * distribution.
+ *
+ * @param parts Data partitions.
+ * @param marked the marked bins that should be ignored.
+ * @param card Cardinality
+ * @return Position of maximum, or -1 when uniform.
+ */
+ private int chiSquaredUniformTest(SetDBIDs[] parts, long[] marked, int card) {
+ // Remaining number of bins.
+ final int binCount = parts.length - card;
+ // Get global mean over all unmarked bins.
+ int max = 0, maxpos = -1;
+ MeanVariance mv = new MeanVariance();
+ for(int i = 0; i < parts.length; i++) {
+ // Ignore already marked bins.
+ if(BitsUtil.get(marked, i)) {
+ continue;
+ }
+ final int binSupport = parts[i].size();
+ mv.put(binSupport);
+ if(binSupport > max) {
+ max = binSupport;
+ maxpos = i;
+ }
+ }
+ if(mv.getCount() < 1. || !(mv.getNaiveVariance() > 0.)) {
+ return -1;
+ }
+ // ChiSquare statistic is the naive variance of the sizes!
+ final double chiSquare = mv.getNaiveVariance() / mv.getMean();
+ final double test = ChiSquaredDistribution.cdf(chiSquare, Math.max(1, binCount - card - 1));
+ if((1. - alpha) < test) {
+ return maxpos;
+ }
+ return -1;
+ }
+
+ /**
+ * Computes a fuzzy membership with the weights based on which cluster cores
+ * each data point is part of.
+ *
+ * @param relation Data relation
+ * @param clusterCores the cluster cores.
+ * @param unassigned set to which to add unassigned points.
+ * @param probClusterIGivenX Membership probabilities.
+ * @param clusterWeights Cluster weights
+ */
+ private void computeFuzzyMembership(Relation<V> relation, ArrayList<Signature> clusterCores, ModifiableDBIDs unassigned, WritableDataStore<double[]> probClusterIGivenX, double[] clusterWeights) {
+ final int n = relation.size();
+ final int k = clusterCores.size();
+
+ for(DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
+ int count = 0;
+ double[] weights = new double[k];
+ for(int cluster = 0; cluster < k; ++cluster) {
+ if(clusterCores.get(cluster).ids.contains(iter)) {
+ weights[cluster] = 1.;
+ ++count;
+ }
+ }
+
+ // Set value(s) in membership matrix.
+ if(count > 0) {
+ // Rescale.
+ VMath.timesEquals(weights, 1. / count);
+ VMath.plusTimesEquals(clusterWeights, weights, 1. / n);
+ }
+ else {
+ // Does not match any cluster, mark it.
+ unassigned.add(iter);
+ }
+ probClusterIGivenX.put(iter, weights);
+ }
+ }
+
+ /**
+ * Assign unassigned objects to best candidate based on shortest Mahalanobis
+ * distance.
+ *
+ * @param relation Data relation
+ * @param probClusterIGivenX fuzzy membership matrix.
+ * @param means Cluster means.
+ * @param invCovMatr Cluster covariance matrices.
+ * @param clusterWeights
+ * @param assigned mapping of matrix row to DBID.
+ * @param unassigned the list of points not yet assigned.
+ */
+ private void assignUnassigned(Relation<V> relation, WritableDataStore<double[]> probClusterIGivenX, Vector[] means, Matrix[] invCovMatr, double[] clusterWeights, ModifiableDBIDs unassigned) {
+ if(unassigned.size() == 0) {
+ return;
+ }
+ final int k = means.length;
+ double pweight = 1. / relation.size();
+
+ for(DBIDIter iter = unassigned.iter(); iter.valid(); iter.advance()) {
+ // Find the best matching known cluster core using the Mahalanobis
+ // distance.
+ Vector v = relation.get(iter).getColumnVector();
+ int bestCluster = -1;
+ double minDistance = Double.POSITIVE_INFINITY;
+ for(int c = 0; c < k; ++c) {
+ final double distance = MathUtil.mahalanobisDistance(invCovMatr[c], v.minus(means[c]));
+ if(distance < minDistance) {
+ minDistance = distance;
+ bestCluster = c;
+ }
+ }
+ // Assign to best core.
+ double[] weights = new double[k];
+ weights[bestCluster] = 1.0;
+ clusterWeights[bestCluster] += pweight;
+ probClusterIGivenX.put(iter, weights);
+ }
+
+ // Clear the list of unassigned objects.
+ unassigned.clear();
+ }
+
+ /**
+ * Creates a hard clustering from the specified soft membership matrix.
+ *
+ * @param probClusterIGivenX the membership matrix.
+ * @param dbids mapping matrix row to DBID.
+ * @return a hard clustering based on the matrix.
+ */
+ private ArrayList<ClusterCandidate> hardClustering(WritableDataStore<double[]> probClusterIGivenX, List<Signature> clusterCores, DBIDs dbids) {
+ final int k = clusterCores.size();
+
+ // Initialize cluster sets.
+ ArrayList<ClusterCandidate> candidates = new ArrayList<>();
+ for(Signature sig : clusterCores) {
+ candidates.add(new ClusterCandidate(sig));
+ }
+
+ // Perform hard partitioning, assigning each data point only to one cluster,
+ // namely that one it is most likely to belong to.
+ for(DBIDIter iter = dbids.iter(); iter.valid(); iter.advance()) {
+ final double[] probs = probClusterIGivenX.get(iter);
+ int bestCluster = 0;
+ double bestProbability = probs[0];
+ for(int c = 1; c < k; ++c) {
+ if(probs[c] > bestProbability) {
+ bestCluster = c;
+ bestProbability = probs[c];
+ }
+ }
+ candidates.get(bestCluster).ids.add(iter);
+ }
+
+ return candidates;
+ }
+
+ /**
+ * Performs outlier detection by testing the Mahalanobis distance of each
+ * point in a cluster against the critical value of the ChiSquared
+ * distribution with as many degrees of freedom as the cluster has relevant
+ * attributes.
+ *
+ * @param relation Data relation
+ * @param means Cluster means
+ * @param invCovMatr Inverse covariance matrixes
+ * @param clusterCandidates the list of clusters to check.
+ * @param nonUniformDimensionCount the number of dimensions to consider when
+ * testing.
+ * @param noise the set to which to add points deemed outliers.
+ */
+ private void findOutliers(Relation<V> relation, Vector[] means, Matrix[] invCovMatr, ArrayList<ClusterCandidate> clusterCandidates, int nonUniformDimensionCount, ModifiableDBIDs noise) {
+ final int k = clusterCandidates.size();
+
+ for(int c = 0; c < k; ++c) {
+ final ClusterCandidate candidate = clusterCandidates.get(c);
+ if(candidate.ids.size() < 2) {
+ continue;
+ }
+ final int dof = candidate.dimensions.cardinality();
+ final double threshold = ChiSquaredDistribution.quantile(1 - .001, dof);
+ for(DBIDMIter iter = candidate.ids.iter(); iter.valid(); iter.advance()) {
+ final Vector mean = means[c];
+ final Vector delta = relation.get(iter).getColumnVector().minusEquals(mean);
+ final Matrix invCov = invCovMatr[c];
+ final double distance = MathUtil.mahalanobisDistance(invCov, delta);
+ if(distance >= threshold) {
+ // Outlier, remove it and add it to the outlier set.
+ noise.add(iter);
+ iter.remove();
+ }
+ }
+ }
+ }
+
+ /**
+ * Generates a merged signature of this and another one, where the other
+ * signature must be a 1-signature.
+ *
+ * @param first First signature.
+ * @param second Second signature, must be a 1-signature.
+ * @param numBins Number of bins per dimension.
+ * @return the merged signature, or null if the merge failed.
+ */
+ protected Signature mergeSignatures(Signature first, Signature second, int numBins) {
+ int d2 = -1;
+ for(int i = 0; i < second.spec.length; i += 2) {
+ if(second.spec[i] >= 0) {
+ assert (d2 == -1) : "Merging with non-1-signature?!?";
+ d2 = i;
+ }
+ }
+ assert (d2 >= 0) : "Merging with empty signature?";
+
+ // Avoid generating redundant signatures.
+ if(first.spec[d2] >= 0) {
+ return null;
+ }
+
+ // Definition 3, Condition 1:
+ // True support:
+ final ModifiableDBIDs intersection = DBIDUtil.intersection(first.ids, second.ids);
+ final int support = intersection.size();
+ // Interval width, computed using selected number of bins / total bins
+ double width = (second.spec[d2 + 1] - second.spec[d2] + 1.) / (double) numBins;
+ // Expected size thus:
+ double expect = first.ids.size() * width;
+ if(support <= expect || support < minClusterSize) {
+ return null;
+ }
+ final double test = PoissonDistribution.rawProbability(support, expect);
+ if((poissonThreshold) <= test) {
+ return null;
+ }
+ // Create merged signature.
+ int[] spec = first.spec.clone();
+ spec[d2] = second.spec[d2];
+ spec[d2 + 1] = second.spec[d2];
+
+ final Signature newsig = new Signature(spec, intersection);
+ if(LOG.isDebugging()) {
+ LOG.debug(newsig.toString());
+ }
+ return newsig;
+ }
+
+ /**
+ * P3C Cluster signature.
+ *
+ * @author Erich Schubert
+ */
+ private static class Signature {
+ /**
+ * Subspace specification
+ */
+ int[] spec;
+
+ /**
+ * Object ids.
+ */
+ DBIDs ids;
+
+ /**
+ * Pruning flag.
+ */
+ boolean prune = false;
+
+ /**
+ * Constructor.
+ *
+ * @param spec Subspace specification
+ * @param ids IDs.
+ */
+ private Signature(int[] spec, DBIDs ids) {
+ super();
+ this.spec = spec;
+ this.ids = ids;
+ }
+
+ /**
+ * Test whether this is a superset of the other signature.
+ *
+ * @param other Other signature.
+ * @return {@code true} when this is a superset.
+ */
+ public boolean isSuperset(Signature other) {
+ for(int i = 0; i < spec.length; i += 2) {
+ if(spec[i] != other.spec[i] || spec[i + 1] != other.spec[i]) {
+ if(other.spec[i] != -1) {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
+ /**
+ * Find the first dimension set in this signature.
+ *
+ * @return Dimension
+ */
+ public int getFirstDim() {
+ for(int i = 0; i < spec.length; i += 2) {
+ if(spec[i] >= 0) {
+ return (i >>> 1);
+ }
+ }
+ return -1;
+ }
+
+ @Override
+ public String toString() {
+ int p = 0;
+ for(int i = 0; i < spec.length; i += 2) {
+ if(spec[i] >= 0) {
+ p++;
+ }
+ }
+ StringBuilder buf = new StringBuilder();
+ buf.append(p).append("-signature: ");
+ for(int i = 0; i < spec.length; i += 2) {
+ if(spec[i] >= 0) {
+ buf.append(i >>> 1).append(':');
+ buf.append(spec[i]).append('-').append(spec[i + 1]).append(' ');
+ }
+ }
+ buf.append(" size: ").append(ids.size());
+ return buf.toString();
+ }
+ }
+
+ /**
+ * This class is used to represent potential clusters.
+ *
+ * @author Erich Schubert
+ */
+ private static class ClusterCandidate {
+ /**
+ * Selected dimensions
+ */
+ public final BitSet dimensions;
+
+ /**
+ * Objects contained in cluster.
+ */
+ public final ModifiableDBIDs ids;
+
+ /**
+ * Constructor.
+ *
+ * @param clusterCore Signature
+ */
+ public ClusterCandidate(Signature clusterCore) {
+ this.dimensions = new BitSet(clusterCore.spec.length >> 1);
+ for(int i = 0; i < clusterCore.spec.length; i += 2) {
+ this.dimensions.set(i >> 1);
+ }
+ this.ids = DBIDUtil.newArray(clusterCore.ids.size());
+ }
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(TypeUtil.NUMBER_VECTOR_FIELD);
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Florian Nuecke
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
+ /**
+ * Parameter for the chi squared test threshold.
+ */
+ public static final OptionID ALPHA_THRESHOLD_ID = new OptionID("p3c.alpha", "The significance level for uniform testing in the initial binning step.");
+
+ /**
+ * Parameter for the poisson test threshold.
+ */
+ public static final OptionID POISSON_THRESHOLD_ID = new OptionID("p3c.threshold", "The threshold value for the poisson test used when merging signatures.");
+
+ /**
+ * Maximum number of iterations for the EM step.
+ */
+ public static final OptionID MAX_EM_ITERATIONS_ID = new OptionID("p3c.em.maxiter", "The maximum number of iterations for the EM step. Use -1 to run until delta convergence.");
+
+ /**
+ * Threshold when to stop EM iterations.
+ */
+ public static final OptionID EM_DELTA_ID = new OptionID("p3c.em.delta", "The change delta for the EM step below which to stop.");
+
+ /**
+ * Minimum cluster size for noise flagging. (Not existant in the original
+ * publication).
+ */
+ public static final OptionID MIN_CLUSTER_SIZE_ID = new OptionID("p3c.minsize", "The minimum size of a cluster, otherwise it is seen as noise (this is a cheat, it is not mentioned in the paper).");
+
+ /**
+ * Parameter for the chi squared test threshold.
+ *
+ * While statistical values such as 0.01 are a good choice, we found the
+ * need to modify this parameter in our experiments.
+ */
+ protected double alpha;
+
+ /**
+ * Parameter for the poisson test threshold.
+ */
+ protected double poissonThreshold;
+
+ /**
+ * Maximum number of iterations for the EM step.
+ */
+ protected int maxEmIterations;
+
+ /**
+ * Threshold when to stop EM iterations.
+ */
+ protected double emDelta;
+
+ /**
+ * Minimum cluster size for noise flagging. (Not existant in the original
+ * publication).
+ */
+ protected int minClusterSize;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ {
+ DoubleParameter param = new DoubleParameter(ALPHA_THRESHOLD_ID, .001);
+ param.addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE);
+ param.addConstraint(CommonConstraints.LESS_THAN_HALF_DOUBLE);
+ if(config.grab(param)) {
+ alpha = param.getValue();
+ }
+ }
+
+ {
+ DoubleParameter param = new DoubleParameter(POISSON_THRESHOLD_ID, 1.e-4);
+ param.addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE);
+ param.addConstraint(CommonConstraints.LESS_THAN_HALF_DOUBLE);
+ if(config.grab(param)) {
+ poissonThreshold = param.getValue();
+ }
+ }
+
+ {
+ IntParameter param = new IntParameter(MAX_EM_ITERATIONS_ID, 20);
+ param.addConstraint(CommonConstraints.GREATER_EQUAL_MINUSONE_INT);
+ if(config.grab(param)) {
+ maxEmIterations = param.getValue();
+ }
+ }
+
+ {
+ DoubleParameter param = new DoubleParameter(EM_DELTA_ID, 1.e-5);
+ param.addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE);
+ if(config.grab(param)) {
+ emDelta = param.getValue();
+ }
+ }
+
+ {
+ IntParameter param = new IntParameter(MIN_CLUSTER_SIZE_ID, 1);
+ param.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
+ if(config.grab(param)) {
+ minClusterSize = param.getValue();
+ }
+ }
+ }
+
+ @Override
+ protected P3C<V> makeInstance() {
+ return new P3C<>(alpha, poissonThreshold, maxEmIterations, emDelta, minClusterSize);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PROCLUS.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PROCLUS.java
index 92158734..03e9978f 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PROCLUS.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PROCLUS.java
@@ -67,7 +67,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
@@ -148,7 +148,7 @@ public class PROCLUS<V extends NumberVector<?>> extends AbstractProjectedCluster
public Clustering<SubspaceModel<V>> run(Database database, Relation<V> relation) {
DistanceQuery<V, DoubleDistance> distFunc = this.getDistanceQuery(database);
RangeQuery<V, DoubleDistance> rangeQuery = database.getRangeQuery(distFunc);
- final Random random = rnd.getRandom();
+ final Random random = rnd.getSingleThreadedRandom();
if (RelationUtil.dimensionality(relation) < l) {
throw new IllegalStateException("Dimensionality of data < parameter l! " + "(" + RelationUtil.dimensionality(relation) + " < " + l + ")");
@@ -844,7 +844,7 @@ public class PROCLUS<V extends NumberVector<?>> extends AbstractProjectedCluster
configL(config);
IntParameter m_iP = new IntParameter(M_I_ID, 10);
- m_iP.addConstraint(new GreaterConstraint(0));
+ m_iP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
if (config.grab(m_iP)) {
m_i = m_iP.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SUBCLU.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SUBCLU.java
index c8d0833e..e6245f6e 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SUBCLU.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SUBCLU.java
@@ -54,7 +54,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DistanceParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -77,7 +77,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
* @author Elke Achtert
*
* @apiviz.uses DBSCAN
- * @apiviz.uses AbstractDimensionsSelectingDoubleDistanceFunction
+ * @apiviz.uses DimensionSelectingSubspaceDistanceFunction
* @apiviz.has SubspaceModel
*
* @param <V> the type of FeatureVector handled by this Algorithm
@@ -488,7 +488,7 @@ public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
}
IntParameter minptsP = new IntParameter(MINPTS_ID);
- minptsP.addConstraint(new GreaterConstraint(0));
+ minptsP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
if (config.grab(minptsP)) {
minpts = minptsP.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ABOD.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ABOD.java
index ad0b8175..65447713 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ABOD.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ABOD.java
@@ -23,59 +23,45 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import java.util.HashMap;
-
-import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
-import de.lmu.ifi.dbs.elki.database.QueryUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDRange;
import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair;
-import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
-import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.query.similarity.SimilarityQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
-import de.lmu.ifi.dbs.elki.distance.similarityfunction.PrimitiveSimilarityFunction;
+import de.lmu.ifi.dbs.elki.distance.similarityfunction.SimilarityFunction;
import de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.KernelMatrix;
import de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.PolynomialKernelFunction;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.math.MeanVariance;
-import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
-import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ComparableMaxHeap;
-import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ComparableMinHeap;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
-import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
/**
- * Angle-Based Outlier Detection
+ * Angle-Based Outlier Detection / Angle-Based Outlier Factor.
*
* Outlier detection using variance analysis on angles, especially for high
- * dimensional data sets.
+ * dimensional data sets. Exact version, which has cubic runtime (see also
+ * {@link FastABOD} and {@link LBABOD} for faster versions).
*
* H.-P. Kriegel, M. Schubert, and A. Zimek: Angle-Based Outlier Detection in
* High-dimensional Data. In: Proc. 14th ACM SIGKDD Int. Conf. on Knowledge
@@ -84,475 +70,107 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
* @author Matthias Schubert (Original Code)
* @author Erich Schubert (ELKIfication)
*
- * @apiviz.has KNNQuery
- *
* @param <V> Vector type
*/
@Title("ABOD: Angle-Based Outlier Detection")
@Description("Outlier detection using variance analysis on angles, especially for high dimensional data sets.")
@Reference(authors = "H.-P. Kriegel, M. Schubert, and A. Zimek", title = "Angle-Based Outlier Detection in High-dimensional Data", booktitle = "Proc. 14th ACM SIGKDD Int. Conf. on Knowledge Discovery and Data Mining (KDD '08), Las Vegas, NV, 2008", url = "http://dx.doi.org/10.1145/1401890.1401946")
-public class ABOD<V extends NumberVector<?>> extends AbstractDistanceBasedAlgorithm<V, DoubleDistance, OutlierResult> implements OutlierAlgorithm {
+public class ABOD<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
private static final Logging LOG = Logging.getLogger(ABOD.class);
/**
- * Parameter for k, the number of neighbors used in kNN queries.
- */
- public static final OptionID K_ID = new OptionID("abod.k", "Parameter k for kNN queries.");
-
- /**
- * Parameter for sample size to be used in fast mode.
- */
- public static final OptionID FAST_SAMPLE_ID = new OptionID("abod.samplesize", "Sample size to enable fast mode.");
-
- /**
- * Parameter for the kernel function.
- */
- public static final OptionID KERNEL_FUNCTION_ID = new OptionID("abod.kernelfunction", "Kernel function to use.");
-
- /**
- * The preprocessor used to materialize the kNN neighborhoods.
- */
- public static final OptionID PREPROCESSOR_ID = new OptionID("abod.knnquery", "Processor to compute the kNN neighborhoods.");
-
- /**
- * use alternate code below.
- */
- private static final boolean USE_RND_SAMPLE = false;
-
- /**
- * k parameter.
- */
- private int k;
-
- /**
- * Variable to store fast mode sampling value.
- */
- int sampleSize = 0;
-
- /**
* Store the configured Kernel version.
*/
- private PrimitiveSimilarityFunction<? super V, DoubleDistance> primitiveKernelFunction;
-
- /**
- * Static DBID map.
- */
- private ArrayDBIDs staticids = null;
+ protected SimilarityFunction<? super V, DoubleDistance> kernelFunction;
/**
- * Actual constructor, with parameters. Fast mode (sampling).
+ * Constructor for Angle-Based Outlier Detection (ABOD).
*
- * @param k k parameter
- * @param sampleSize sample size
- * @param primitiveKernelFunction Kernel function to use
- * @param distanceFunction Distance function
+ * @param kernelFunction kernel function to use
*/
- public ABOD(int k, int sampleSize, PrimitiveSimilarityFunction<? super V, DoubleDistance> primitiveKernelFunction, DistanceFunction<V, DoubleDistance> distanceFunction) {
- super(distanceFunction);
- this.k = k;
- this.sampleSize = sampleSize;
- this.primitiveKernelFunction = primitiveKernelFunction;
+ public ABOD(SimilarityFunction<? super V, DoubleDistance> kernelFunction) {
+ super();
+ this.kernelFunction = kernelFunction;
}
/**
- * Actual constructor, with parameters. Slow mode (exact).
+ * Run ABOD on the data set.
*
- * @param k k parameter
- * @param primitiveKernelFunction kernel function to use
- * @param distanceFunction Distance function
+ * @param relation Relation to process
+ * @return Outlier detection result
*/
- public ABOD(int k, PrimitiveSimilarityFunction<? super V, DoubleDistance> primitiveKernelFunction, DistanceFunction<V, DoubleDistance> distanceFunction) {
- super(distanceFunction);
- this.k = k;
- this.sampleSize = 0;
- this.primitiveKernelFunction = primitiveKernelFunction;
- }
+ public OutlierResult run(Database db, Relation<V> relation) {
+ DBIDs ids = relation.getDBIDs();
+ // Build a kernel matrix, to make O(n^3) slightly less bad.
+ SimilarityQuery<V, DoubleDistance> sq = db.getSimilarityQuery(relation, kernelFunction);
+ KernelMatrix kernelMatrix = new KernelMatrix(sq, relation, ids);
- /**
- * Main part of the algorithm. Exact version.
- *
- * @param relation Relation to query
- * @return result
- */
- public OutlierResult getRanking(Relation<V> relation) {
- // Fix a static set of IDs
- if (relation.getDBIDs() instanceof DBIDRange) {
- staticids = (DBIDRange) relation.getDBIDs();
- } else {
- staticids = DBIDUtil.newArray(relation.getDBIDs());
- ((ArrayModifiableDBIDs) staticids).sort();
- }
-
- KernelMatrix kernelMatrix = new KernelMatrix(primitiveKernelFunction, relation, staticids);
- ComparableMaxHeap<DoubleDBIDPair> pq = new ComparableMaxHeap<>(relation.size());
-
- // preprocess kNN neighborhoods
- KNNQuery<V, DoubleDistance> knnQuery = QueryUtil.getKNNQuery(relation, getDistanceFunction(), k);
+ WritableDoubleDataStore abodvalues = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
+ DoubleMinMax minmaxabod = new DoubleMinMax();
MeanVariance s = new MeanVariance();
- for (DBIDIter objKey = relation.iterDBIDs(); objKey.valid(); objKey.advance()) {
- s.reset();
-
- KNNList<DoubleDistance> neighbors = knnQuery.getKNNForDBID(objKey, k);
- for (DBIDIter key1 = neighbors.iter(); key1.valid(); key1.advance()) {
- for (DBIDIter key2 = neighbors.iter(); key2.valid(); key2.advance()) {
- if (DBIDUtil.equal(key2, key1) || DBIDUtil.equal(key1, objKey) || DBIDUtil.equal(key2, objKey)) {
- continue;
- }
- double nenner = calcDenominator(kernelMatrix, objKey, key1, key2);
-
- if (nenner != 0) {
- double sqrtnenner = Math.sqrt(nenner);
- double tmp = calcNumerator(kernelMatrix, objKey, key1, key2) / nenner;
- s.put(tmp, 1 / sqrtnenner);
- }
-
- }
- }
- // Sample variance probably would be correct, however the numerical
- // instabilities can actually break ABOD here.
- pq.add(DBIDUtil.newPair(s.getNaiveVariance(), objKey));
+ for (DBIDIter pA = ids.iter(); pA.valid(); pA.advance()) {
+ final double abof = computeABOF(relation, kernelMatrix, pA, s);
+ minmaxabod.put(abof);
+ abodvalues.putDouble(pA, abof);
}
- DoubleMinMax minmaxabod = new DoubleMinMax();
- WritableDoubleDataStore abodvalues = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
- while (!pq.isEmpty()) {
- DoubleDBIDPair pair = pq.poll();
- abodvalues.putDouble(pair, pair.doubleValue());
- minmaxabod.put(pair.doubleValue());
- }
// Build result representation.
- Relation<Double> scoreResult = new MaterializedRelation<>("Angle-based Outlier Degree", "abod-outlier", TypeUtil.DOUBLE, abodvalues, relation.getDBIDs());
+ Relation<Double> scoreResult = new MaterializedRelation<>("Angle-Based Outlier Degree", "abod-outlier", TypeUtil.DOUBLE, abodvalues, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new InvertedOutlierScoreMeta(minmaxabod.getMin(), minmaxabod.getMax(), 0.0, Double.POSITIVE_INFINITY);
return new OutlierResult(scoreMeta, scoreResult);
}
/**
- * Main part of the algorithm. Fast version.
+ * Compute the exact ABOF value.
*
- * @param relation Relation to use
- * @return result
- */
- public OutlierResult getFastRanking(Relation<V> relation) {
- final DBIDs ids = relation.getDBIDs();
- // Fix a static set of IDs
- // TODO: add a DBIDUtil.ensureSorted?
- if (relation.getDBIDs() instanceof DBIDRange) {
- staticids = (DBIDRange) relation.getDBIDs();
- } else {
- staticids = DBIDUtil.newArray(relation.getDBIDs());
- ((ArrayModifiableDBIDs) staticids).sort();
- }
-
- KernelMatrix kernelMatrix = new KernelMatrix(primitiveKernelFunction, relation, staticids);
-
- ComparableMaxHeap<DoubleDBIDPair> pq = new ComparableMaxHeap<>(relation.size());
- // get Candidate Ranking
- for (DBIDIter aKey = relation.iterDBIDs(); aKey.valid(); aKey.advance()) {
- WritableDoubleDataStore dists = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT);
- // determine kNearestNeighbors and pairwise distances
- ComparableMinHeap<DoubleDBIDPair> nn;
- if (!USE_RND_SAMPLE) {
- nn = calcDistsandNN(relation, kernelMatrix, sampleSize, aKey, dists);
- } else {
- // alternative:
- nn = calcDistsandRNDSample(relation, kernelMatrix, sampleSize, aKey, dists);
- }
-
- // get normalization
- double[] counter = calcFastNormalization(aKey, dists, staticids);
- // umsetzen von Pq zu list
- ModifiableDBIDs neighbors = DBIDUtil.newArray(nn.size());
- while (!nn.isEmpty()) {
- neighbors.add(nn.poll());
- }
- // getFilter
- double var = getAbofFilter(kernelMatrix, aKey, dists, counter[1], counter[0], neighbors);
- pq.add(DBIDUtil.newPair(var, aKey));
- }
- // refine Candidates
- ComparableMinHeap<DoubleDBIDPair> resqueue = new ComparableMinHeap<>(k);
- MeanVariance s = new MeanVariance();
- while (!pq.isEmpty()) {
- if (resqueue.size() == k && pq.peek().doubleValue() > resqueue.peek().doubleValue()) {
- break;
- }
- // double approx = pq.peek().getFirst();
- DBIDRef aKey = pq.poll();
- s.reset();
- for (DBIDIter bKey = relation.iterDBIDs(); bKey.valid(); bKey.advance()) {
- if (DBIDUtil.equal(bKey, aKey)) {
- continue;
- }
- for (DBIDIter cKey = relation.iterDBIDs(); cKey.valid(); cKey.advance()) {
- if (DBIDUtil.equal(cKey, aKey)) {
- continue;
- }
- // double nenner = dists[y]*dists[z];
- double nenner = calcDenominator(kernelMatrix, aKey, bKey, cKey);
- if (nenner != 0) {
- double tmp = calcNumerator(kernelMatrix, aKey, bKey, cKey) / nenner;
- double sqrtNenner = Math.sqrt(nenner);
- s.put(tmp, 1 / sqrtNenner);
- }
- }
- }
- double var = s.getSampleVariance();
- if (resqueue.size() < k) {
- resqueue.add(DBIDUtil.newPair(var, aKey));
- } else {
- if (resqueue.peek().doubleValue() > var) {
- resqueue.replaceTopElement(DBIDUtil.newPair(var, aKey));
- }
- }
-
- }
- DoubleMinMax minmaxabod = new DoubleMinMax();
- WritableDoubleDataStore abodvalues = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
- while (!pq.isEmpty()) {
- DoubleDBIDPair pair = pq.poll();
- abodvalues.putDouble(pair, pair.doubleValue());
- minmaxabod.put(pair.doubleValue());
- }
- // Build result representation.
- Relation<Double> scoreResult = new MaterializedRelation<>("Angle-based Outlier Detection", "abod-outlier", TypeUtil.DOUBLE, abodvalues, ids);
- OutlierScoreMeta scoreMeta = new InvertedOutlierScoreMeta(minmaxabod.getMin(), minmaxabod.getMax(), 0.0, Double.POSITIVE_INFINITY);
- return new OutlierResult(scoreMeta, scoreResult);
- }
-
- private double[] calcFastNormalization(DBIDRef x, WritableDoubleDataStore dists, DBIDs ids) {
- double[] result = new double[2];
-
- double sum = 0;
- double sumF = 0;
- for (DBIDIter yKey = ids.iter(); yKey.valid(); yKey.advance()) {
- if (dists.doubleValue(yKey) != 0) {
- double tmp = 1 / Math.sqrt(dists.doubleValue(yKey));
- sum += tmp;
- sumF += (1 / dists.doubleValue(yKey)) * tmp;
- }
- }
- double sofar = 0;
- double sofarF = 0;
- for (DBIDIter zKey = ids.iter(); zKey.valid(); zKey.advance()) {
- if (dists.doubleValue(zKey) != 0) {
- double tmp = 1 / Math.sqrt(dists.doubleValue(zKey));
- sofar += tmp;
- double rest = sum - sofar;
- result[0] += tmp * rest;
-
- sofarF += (1 / dists.doubleValue(zKey)) * tmp;
- double restF = sumF - sofarF;
- result[1] += (1 / dists.doubleValue(zKey)) * tmp * restF;
- }
- }
- return result;
- }
-
- private double getAbofFilter(KernelMatrix kernelMatrix, DBIDRef aKey, WritableDoubleDataStore dists, double fulCounter, double counter, DBIDs neighbors) {
- double sum = 0.0;
- double sqrSum = 0.0;
- double partCounter = 0;
- for (DBIDIter bKey = neighbors.iter(); bKey.valid(); bKey.advance()) {
- if (DBIDUtil.equal(bKey, aKey)) {
+ * @param relation Relation
+ * @param kernelMatrix Kernel matrix
+ * @param pA Object A to compute ABOF for
+ * @param s Statistics tracker
+ * @return ABOF value
+ */
+ protected double computeABOF(Relation<V> relation, KernelMatrix kernelMatrix, DBIDRef pA, MeanVariance s) {
+ s.reset(); // Reused
+ double simAA = kernelMatrix.getSimilarity(pA, pA);
+
+ for (DBIDIter nB = relation.iterDBIDs(); nB.valid(); nB.advance()) {
+ if (DBIDUtil.equal(nB, pA)) {
continue;
}
- for (DBIDIter cKey = neighbors.iter(); cKey.valid(); cKey.advance()) {
- if (DBIDUtil.equal(cKey, aKey)) {
- continue;
- }
- if (DBIDUtil.compare(bKey, cKey) > 0) {
- double nenner = dists.doubleValue(bKey) * dists.doubleValue(cKey);
- if (nenner != 0) {
- double tmp = calcNumerator(kernelMatrix, aKey, bKey, cKey) / nenner;
- double sqrtNenner = Math.sqrt(nenner);
- sum += tmp * (1 / sqrtNenner);
- sqrSum += tmp * tmp * (1 / sqrtNenner);
- partCounter += (1 / (sqrtNenner * nenner));
- }
- }
- }
- }
- // TODO: Document the meaning / use of fulCounter, partCounter.
- double mu = (sum + (fulCounter - partCounter)) / counter;
- return (sqrSum / counter) - (mu * mu);
- }
-
- /**
- * Compute the cosinus value between vectors aKey and bKey.
- *
- * @param kernelMatrix
- * @param aKey
- * @param bKey
- * @return cosinus value
- */
- private double calcCos(KernelMatrix kernelMatrix, DBIDRef aKey, DBIDRef bKey) {
- final int ai = mapDBID(aKey);
- final int bi = mapDBID(bKey);
- return kernelMatrix.getDistance(ai, ai) + kernelMatrix.getDistance(bi, bi) - 2 * kernelMatrix.getDistance(ai, bi);
- }
-
- private int mapDBID(DBIDRef aKey) {
- // TODO: this is not the most efficient...
- int off = staticids.binarySearch(aKey);
- if (off < 0) {
- throw new AbortException("Did not find id " + aKey.toString() + " in staticids. " + staticids.contains(aKey));
- }
- return off + 1;
- }
-
- private double calcDenominator(KernelMatrix kernelMatrix, DBIDRef aKey, DBIDRef bKey, DBIDRef cKey) {
- return calcCos(kernelMatrix, aKey, bKey) * calcCos(kernelMatrix, aKey, cKey);
- }
-
- private double calcNumerator(KernelMatrix kernelMatrix, DBIDRef aKey, DBIDRef bKey, DBIDRef cKey) {
- final int ai = mapDBID(aKey);
- final int bi = mapDBID(bKey);
- final int ci = mapDBID(cKey);
- return (kernelMatrix.getDistance(ai, ai) + kernelMatrix.getDistance(bi, ci) - kernelMatrix.getDistance(ai, ci) - kernelMatrix.getDistance(ai, bi));
- }
-
- private ComparableMinHeap<DoubleDBIDPair> calcDistsandNN(Relation<V> data, KernelMatrix kernelMatrix, int sampleSize, DBIDRef aKey, WritableDoubleDataStore dists) {
- ComparableMinHeap<DoubleDBIDPair> nn = new ComparableMinHeap<>(sampleSize);
- for (DBIDIter bKey = data.iterDBIDs(); bKey.valid(); bKey.advance()) {
- double val = calcCos(kernelMatrix, aKey, bKey);
- dists.putDouble(bKey, val);
- if (nn.size() < sampleSize) {
- nn.add(DBIDUtil.newPair(val, bKey));
- } else {
- if (val < nn.peek().doubleValue()) {
- nn.replaceTopElement(DBIDUtil.newPair(val, bKey));
- }
- }
- }
- return nn;
- }
-
- private ComparableMinHeap<DoubleDBIDPair> calcDistsandRNDSample(Relation<V> data, KernelMatrix kernelMatrix, int sampleSize, DBIDRef aKey, WritableDoubleDataStore dists) {
- ComparableMinHeap<DoubleDBIDPair> nn = new ComparableMinHeap<>(sampleSize);
- int step = (int) ((double) data.size() / (double) sampleSize);
- int counter = 0;
- for (DBIDIter bKey = data.iterDBIDs(); bKey.valid(); bKey.advance()) {
- double val = calcCos(kernelMatrix, aKey, bKey);
- dists.putDouble(bKey, val);
- if (counter % step == 0) {
- nn.add(DBIDUtil.newPair(val, bKey));
+ double simBB = kernelMatrix.getSimilarity(nB, nB);
+ double simAB = kernelMatrix.getSimilarity(pA, nB);
+ double sqdAB = simAA + simBB - simAB - simAB;
+ if (!(sqdAB > 0.)) {
+ continue;
}
- counter++;
- }
- return nn;
- }
-
- /**
- * Get explanations for points in the database.
- *
- * @param data to get explanations for
- * @return String explanation
- */
- // TODO: this should be done by the result classes.
- public String getExplanations(Relation<V> data) {
- KernelMatrix kernelMatrix = new KernelMatrix(primitiveKernelFunction, data, staticids);
- // PQ for Outlier Ranking
- ComparableMaxHeap<DoubleDBIDPair> pq = new ComparableMaxHeap<>(data.size());
- HashMap<DBID, DBIDs> explaintab = new HashMap<>();
- // test all objects
- MeanVariance s = new MeanVariance(), s2 = new MeanVariance();
- for (DBIDIter objKey = data.iterDBIDs(); objKey.valid(); objKey.advance()) {
- s.reset();
- // Queue for the best explanation
- ComparableMinHeap<DoubleDBIDPair> explain = new ComparableMinHeap<>();
- // determine Object
- // for each pair of other objects
- for (DBIDIter key1 = data.iterDBIDs(); key1.valid(); key1.advance()) {
- // Collect Explanation Vectors
- s2.reset();
- if (DBIDUtil.equal(objKey, key1)) {
+ for (DBIDIter nC = relation.iterDBIDs(); nC.valid(); nC.advance()) {
+ if (DBIDUtil.equal(nC, pA) || DBIDUtil.compare(nC, nB) < 0) {
continue;
}
- for (DBIDIter key2 = data.iterDBIDs(); key2.valid(); key2.advance()) {
- if (DBIDUtil.equal(key2, key1) || DBIDUtil.equal(objKey, key2)) {
- continue;
- }
- double nenner = calcDenominator(kernelMatrix, objKey, key1, key2);
- if (nenner != 0) {
- double tmp = calcNumerator(kernelMatrix, objKey, key1, key2) / nenner;
- double sqr = Math.sqrt(nenner);
- s2.put(tmp, 1 / sqr);
- }
- }
- explain.add(DBIDUtil.newPair(s2.getSampleVariance(), key1));
- s.put(s2);
- }
- // build variance of the observed vectors
- pq.add(DBIDUtil.newPair(s.getSampleVariance(), objKey));
- //
- ModifiableDBIDs expList = DBIDUtil.newArray();
- expList.add(explain.poll());
- while (!explain.isEmpty()) {
- DBIDRef nextKey = explain.poll();
- if (DBIDUtil.equal(nextKey, objKey)) {
+ double simCC = kernelMatrix.getSimilarity(nC, nC);
+ double simAC = kernelMatrix.getSimilarity(pA, nC);
+ double sqdAC = simAA + simCC - simAC;
+ if (!(sqdAC > 0.)) {
continue;
}
- double max = Double.MIN_VALUE;
- for (DBIDIter exp = expList.iter(); exp.valid(); exp.advance()) {
- if (DBIDUtil.equal(exp, objKey) || DBIDUtil.equal(nextKey, exp)) {
- continue;
- }
- double nenner = Math.sqrt(calcCos(kernelMatrix, objKey, nextKey)) * Math.sqrt(calcCos(kernelMatrix, objKey, exp));
- double angle = calcNumerator(kernelMatrix, objKey, nextKey, exp) / nenner;
- max = Math.max(angle, max);
- }
- if (max < 0.5) {
- expList.add(nextKey);
- }
- }
- explaintab.put(DBIDUtil.deref(objKey), expList);
- }
- StringBuilder buf = new StringBuilder();
- buf.append("Result: ABOD\n");
- int count = 0;
- while (!pq.isEmpty()) {
- if (count > 10) {
- break;
+ // Exploit bilinearity of scalar product:
+ // <B-A, C-A> = <B, C-A> - <A,C-A>
+ // = <B,C> - <B,A> - <A,C> + <A,A>
+ // For computing variance, AA is a constant and can be ignored.
+ double simBC = kernelMatrix.getSimilarity(nB, nC);
+ double numerator = simBC - simAB - simAC; // + simAA;
+ double val = numerator / (sqdAB * sqdAC);
+ s.put(val, 1. / Math.sqrt(sqdAB * sqdAC));
}
- double factor = pq.peek().doubleValue();
- DBIDRef key = pq.poll();
- buf.append(data.get(key)).append(' ');
- buf.append(count).append(" Factor=").append(factor).append(' ').append(key).append('\n');
- DBIDs expList = explaintab.get(key);
- generateExplanation(buf, data, key, expList);
- count++;
- }
- return buf.toString();
- }
-
- private void generateExplanation(StringBuilder buf, Relation<V> data, DBIDRef key, DBIDs expList) {
- Vector vect1 = data.get(key).getColumnVector();
- for (DBIDIter iter = expList.iter(); iter.valid(); iter.advance()) {
- buf.append("Outlier: ").append(vect1).append('\n');
- Vector exp = data.get(iter).getColumnVector();
- buf.append("Most common neighbor: ").append(exp).append('\n');
- // determine difference Vector
- Vector vals = exp.minus(vect1);
- buf.append(vals).append('\n');
- }
- }
-
- /**
- * Run ABOD on the data set.
- *
- * @param relation Relation to process
- * @return Outlier detection result
- */
- public OutlierResult run(Relation<V> relation) {
- if (sampleSize > 0) {
- return getFastRanking(relation);
- } else {
- return getRanking(relation);
}
+ // Sample variance probably would be correct, but the ABOD publication
+ // uses the naive variance.
+ final double abof = s.getNaiveVariance();
+ return abof;
}
@Override
@@ -572,45 +190,29 @@ public class ABOD<V extends NumberVector<?>> extends AbstractDistanceBasedAlgori
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<V, DoubleDistance> {
- /**
- * k Parameter.
- */
- protected int k = 0;
-
+ public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
/**
- * Sample size.
+ * Parameter for the kernel function.
*/
- protected int sampleSize = 0;
+ public static final OptionID KERNEL_FUNCTION_ID = new OptionID("abod.kernelfunction", "Kernel function to use.");
/**
* Distance function.
*/
- protected PrimitiveSimilarityFunction<V, DoubleDistance> primitiveKernelFunction = null;
+ protected SimilarityFunction<V, DoubleDistance> kernelFunction = null;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final IntParameter kP = new IntParameter(K_ID, 30);
- kP.addConstraint(new GreaterEqualConstraint(1));
- if (config.grab(kP)) {
- k = kP.getValue();
- }
- final IntParameter sampleSizeP = new IntParameter(FAST_SAMPLE_ID);
- sampleSizeP.addConstraint(new GreaterEqualConstraint(1));
- sampleSizeP.setOptional(true);
- if (config.grab(sampleSizeP)) {
- sampleSize = sampleSizeP.getValue();
- }
- final ObjectParameter<PrimitiveSimilarityFunction<V, DoubleDistance>> param = new ObjectParameter<>(KERNEL_FUNCTION_ID, PrimitiveSimilarityFunction.class, PolynomialKernelFunction.class);
+ final ObjectParameter<SimilarityFunction<V, DoubleDistance>> param = new ObjectParameter<>(KERNEL_FUNCTION_ID, SimilarityFunction.class, PolynomialKernelFunction.class);
if (config.grab(param)) {
- primitiveKernelFunction = param.instantiateClass(config);
+ kernelFunction = param.instantiateClass(config);
}
}
@Override
protected ABOD<V> makeInstance() {
- return new ABOD<>(k, sampleSize, primitiveKernelFunction, distanceFunction);
+ return new ABOD<>(kernelFunction);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractAggarwalYuOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractAggarwalYuOutlier.java
index 99356aef..2b12b306 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractAggarwalYuOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractAggarwalYuOutlier.java
@@ -38,11 +38,12 @@ import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair;
import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
+import de.lmu.ifi.dbs.elki.math.MathUtil;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.pairs.IntIntPair;
@@ -161,7 +162,7 @@ public abstract class AbstractAggarwalYuOutlier<V extends NumberVector<?>> exten
protected static double sparsity(final int setsize, final int dbsize, final int k, final double phi) {
// calculate sparsity c
final double f = 1. / phi;
- final double fK = Math.pow(f, k);
+ final double fK = MathUtil.powi(f, k);
final double sC = (setsize - (dbsize * fK)) / Math.sqrt(dbsize * fK * (1 - fK));
return sC;
}
@@ -242,12 +243,12 @@ public abstract class AbstractAggarwalYuOutlier<V extends NumberVector<?>> exten
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
final IntParameter kP = new IntParameter(K_ID);
- kP.addConstraint(new GreaterEqualConstraint(2));
+ kP.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
if(config.grab(kP)) {
k = kP.getValue();
}
final IntParameter phiP = new IntParameter(PHI_ID);
- phiP.addConstraint(new GreaterEqualConstraint(2));
+ phiP.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
if(config.grab(phiP)) {
phi = phiP.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuEvolutionary.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuEvolutionary.java
index 89be0e66..c4e5cc5d 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuEvolutionary.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuEvolutionary.java
@@ -56,7 +56,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
@@ -132,24 +132,24 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA
final int dbsize = relation.size();
ArrayList<ArrayList<DBIDs>> ranges = buildRanges(relation);
- Heap<Individuum>.UnorderedIter individuums = (new EvolutionarySearch(relation, ranges, m, rnd.getRandom())).run();
+ Heap<Individuum>.UnorderedIter individuums = (new EvolutionarySearch(relation, ranges, m, rnd.getSingleThreadedRandom())).run();
WritableDoubleDataStore outlierScore = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
- for (; individuums.valid(); individuums.advance()) {
+ for(; individuums.valid(); individuums.advance()) {
DBIDs ids = computeSubspaceForGene(individuums.get().getGene(), ranges);
double sparsityC = sparsity(ids.size(), dbsize, k, phi);
- for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
double prev = outlierScore.doubleValue(iter);
- if (Double.isNaN(prev) || sparsityC < prev) {
+ if(Double.isNaN(prev) || sparsityC < prev) {
outlierScore.putDouble(iter, sparsityC);
}
}
}
DoubleMinMax minmax = new DoubleMinMax();
- for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double val = outlierScore.doubleValue(iditer);
- if (Double.isNaN(val)) {
+ if(Double.isNaN(val)) {
outlierScore.putDouble(iditer, 0.0);
val = 0.0;
}
@@ -219,12 +219,12 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA
ArrayList<Individuum> pop = initialPopulation(m);
// best Population
TopBoundedHeap<Individuum> bestSol = new TopBoundedHeap<>(m, Collections.reverseOrder());
- for (Individuum ind : pop) {
+ for(Individuum ind : pop) {
bestSol.add(ind);
}
int iterations = 0;
- while (!checkConvergence(pop)) {
+ while(!checkConvergence(pop)) {
Collections.sort(pop);
pop = rouletteRankSelection(pop);
// Crossover
@@ -232,28 +232,28 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA
// Mutation with probability 0.25 , 0.25
pop = mutation(pop, 0.5, 0.5);
// Avoid duplicates
- ind: for (Individuum ind : pop) {
- for (Heap<Individuum>.UnorderedIter it = bestSol.unorderedIter(); it.valid(); it.advance()) {
- if (it.get().equals(ind)) {
+ ind: for(Individuum ind : pop) {
+ for(Heap<Individuum>.UnorderedIter it = bestSol.unorderedIter(); it.valid(); it.advance()) {
+ if(it.get().equals(ind)) {
continue ind;
}
}
bestSol.add(ind);
}
- if (LOG.isDebuggingFinest()) {
+ if(LOG.isDebuggingFinest()) {
StringBuilder buf = new StringBuilder();
buf.append("Top solutions:\n");
- for (Heap<Individuum>.UnorderedIter it = bestSol.unorderedIter(); it.valid(); it.advance()) {
+ for(Heap<Individuum>.UnorderedIter it = bestSol.unorderedIter(); it.valid(); it.advance()) {
buf.append(it.get().toString()).append('\n');
}
buf.append("Population:\n");
- for (Individuum ind : pop) {
+ for(Individuum ind : pop) {
buf.append(ind.toString()).append('\n');
}
LOG.debugFinest(buf.toString());
}
iterations++;
- if (iterations > MAX_ITERATIONS) {
+ if(iterations > MAX_ITERATIONS) {
LOG.warning("Maximum iterations reached.");
break;
}
@@ -268,18 +268,18 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA
* @return Convergence
*/
private boolean checkConvergence(Collection<Individuum> pop) {
- if (pop.size() == 0) {
+ if(pop.size() == 0) {
return true;
}
// Gene occurrence counter
int[][] occur = new int[dim][phi + 1];
// Count gene occurrences
- for (Individuum ind : pop) {
+ for(Individuum ind : pop) {
int[] gene = ind.getGene();
- for (int d = 0; d < dim; d++) {
+ for(int d = 0; d < dim; d++) {
int val = gene[d] + DONT_CARE;
- if (val < 0 || val >= phi + 1) {
+ if(val < 0 || val >= phi + 1) {
LOG.warning("Invalid gene value encountered: " + val + " in " + ind.toString());
continue;
}
@@ -288,20 +288,20 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA
}
int conv = (int) (pop.size() * 0.95);
- if (LOG.isDebuggingFine()) {
+ if(LOG.isDebuggingFine()) {
LOG.debugFine("Convergence at " + conv + " of " + pop.size() + " individuums.");
}
- for (int d = 0; d < dim; d++) {
+ for(int d = 0; d < dim; d++) {
boolean converged = false;
- for (int val = 0; val < phi + 1; val++) {
- if (occur[d][val] >= conv) {
+ for(int val = 0; val < phi + 1; val++) {
+ if(occur[d][val] >= conv) {
converged = true;
break;
}
}
// A single failure to converge is sufficient to continue.
- if (!converged) {
+ if(!converged) {
return false;
}
}
@@ -318,19 +318,19 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA
// Initial Population
ArrayList<Individuum> population = new ArrayList<>(popsize);
// fill population
- for (int i = 0; i < popsize; i++) {
+ for(int i = 0; i < popsize; i++) {
// Random Individual
int[] gene = new int[dim];
// fill don't care ( any dimension == don't care)
- for (int j = 0; j < dim; j++) {
+ for(int j = 0; j < dim; j++) {
gene[j] = DONT_CARE;
}
// count of don't care positions
int countDim = k;
// fill non don't care positions of the Individual
- while (countDim > 0) {
+ while(countDim > 0) {
int z = random.nextInt(dim);
- if (gene[z] == DONT_CARE) {
+ if(gene[z] == DONT_CARE) {
gene[z] = random.nextInt(phi) + 1;
countDim--;
}
@@ -361,20 +361,21 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA
ArrayList<Individuum> survivors = new ArrayList<>(popsize);
// position of selection
- for (int i = 0; i < popsize; i++) {
+ for(int i = 0; i < popsize; i++) {
int z = random.nextInt(totalweight);
- for (int j = 0; j < popsize; j++) {
- if (z < popsize - j) {
+ for(int j = 0; j < popsize; j++) {
+ if(z < popsize - j) {
// TODO: need clone?
survivors.add(population.get(j));
break;
- } else {
+ }
+ else {
// decrement
z -= (popsize - j);
}
}
}
- if (survivors.size() != popsize) {
+ if(survivors.size() != popsize) {
throw new AbortException("Selection step failed - implementation error?");
}
// Don't sort, to avoid biasing the crossover!
@@ -394,23 +395,24 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA
TreeSet<Integer> R = new TreeSet<>();
// for each individuum
- for (int j = 0; j < population.size(); j++) {
+ for(int j = 0; j < population.size(); j++) {
// clear the Sets
Q.clear();
R.clear();
// Fill the Sets with the Positions
- for (int i = 0; i < dim; i++) {
- if (population.get(j).getGene()[i] == DONT_CARE) {
+ for(int i = 0; i < dim; i++) {
+ if(population.get(j).getGene()[i] == DONT_CARE) {
Q.add(i);
- } else {
+ }
+ else {
R.add(i);
}
}
//
double r1 = random.nextDouble();
- if (Q.size() != 0) {
+ if(Q.size() != 0) {
// Mutation Variant 1
- if (r1 <= perc1) {
+ if(r1 <= perc1) {
// calc Mutation Spot
Integer[] pos = new Integer[Q.size()];
pos = Q.toArray(pos);
@@ -435,7 +437,7 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA
}
r1 = random.nextDouble();
// Mutation Variant 2
- if (r1 <= perc2) {
+ if(r1 <= perc2) {
// calc Mutation Spot
Integer[] pos = new Integer[R.size()];
pos = R.toArray(pos);
@@ -471,14 +473,14 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA
// Crossover Set of population Set
ArrayList<Individuum> crossover = new ArrayList<>();
- for (int i = 0; i < population.size() - 1; i += 2) {
+ for(int i = 0; i < population.size() - 1; i += 2) {
Pair<Individuum, Individuum> recombine = recombineOptimized(population.get(i), population.get(i + 1));
// add the Solutions to the new Set
crossover.add(recombine.getFirst());
crossover.add(recombine.getSecond());
}
// if the set contains an odd number of Subspaces, retain the last one
- if (population.size() % 2 == 1) {
+ if(population.size() % 2 == 1) {
crossover.add(population.get(population.size() - 1));
}
// Collections.sort(crossover);
@@ -499,14 +501,14 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA
// Set of Positions in which neither s1 or s2 is don't care
ArrayList<Integer> R = new ArrayList<>(dim);
- for (int i = 0; i < dim; i++) {
- if ((parent1.getGene()[i] == DONT_CARE) && (parent2.getGene()[i] != DONT_CARE)) {
+ for(int i = 0; i < dim; i++) {
+ if((parent1.getGene()[i] == DONT_CARE) && (parent2.getGene()[i] != DONT_CARE)) {
Q.add(i);
}
- if ((parent1.getGene()[i] != DONT_CARE) && (parent2.getGene()[i] == DONT_CARE)) {
+ if((parent1.getGene()[i] != DONT_CARE) && (parent2.getGene()[i] == DONT_CARE)) {
Q.add(i);
}
- if ((parent1.getGene()[i] != DONT_CARE) && (parent2.getGene()[i] != DONT_CARE)) {
+ if((parent1.getGene()[i] != DONT_CARE) && (parent2.getGene()[i] != DONT_CARE)) {
R.add(i);
}
}
@@ -518,11 +520,11 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA
int count = k - R.size();
Iterator<Integer> q = Q.iterator();
- while (count > 0) {
+ while(count > 0) {
int[] l1 = b.clone();
int[] l2 = b.clone();
- while (q.hasNext()) {
+ while(q.hasNext()) {
int next = q.next();
// pos = next;
@@ -536,14 +538,15 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA
final double sparsityL1 = sparsity(computeSubspaceForGene(l1, ranges).size(), dbsize, k, phi);
final double sparsityL2 = sparsity(computeSubspaceForGene(l2, ranges).size(), dbsize, k, phi);
- if (sparsityL1 <= sparsityL2) {
+ if(sparsityL1 <= sparsityL2) {
b = l1.clone();
- if (s1Null) {
+ if(s1Null) {
count--;
}
- } else {
+ }
+ else {
b = l2.clone();
- if (s2Null) {
+ if(s2Null) {
count--;
}
}
@@ -555,10 +558,11 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA
// create the complementary String
int[] comp = new int[dim];
- for (int i = 0; i < dim; i++) {
- if (b[i] == parent1.getGene()[i]) {
+ for(int i = 0; i < dim; i++) {
+ if(b[i] == parent1.getGene()[i]) {
comp[i] = parent2.getGene()[i];
- } else {
+ }
+ else {
comp[i] = parent2.getGene()[i];
}
}
@@ -581,7 +585,7 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA
* @return best gene combination
*/
private Individuum combineRecursive(ArrayList<Integer> r, int i, int[] current, Individuum parent1, Individuum parent2) {
- if (i == r.size()) {
+ if(i == r.size()) {
return makeIndividuum(current);
}
// Position to modify
@@ -594,9 +598,10 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA
Individuum i1 = combineRecursive(r, i + 1, gene1, parent1, parent2);
Individuum i2 = combineRecursive(r, i + 1, gene2, parent1, parent2);
// Return the better result.
- if (i1.getFitness() < i2.getFitness()) {
+ if(i1.getFitness() < i2.getFitness()) {
return i1;
- } else {
+ }
+ else {
return i2;
}
}
@@ -657,15 +662,15 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA
@Override
public boolean equals(Object obj) {
- if (!(obj instanceof Individuum)) {
+ if(!(obj instanceof Individuum)) {
return false;
}
Individuum other = (Individuum) obj;
- if (other.second.length != this.second.length) {
+ if(other.second.length != this.second.length) {
return false;
}
- for (int i = 0; i < this.second.length; i++) {
- if (other.second[i] != this.second[i]) {
+ for(int i = 0; i < this.second.length; i++) {
+ if(other.second[i] != this.second[i]) {
return false;
}
}
@@ -703,12 +708,12 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
final IntParameter mP = new IntParameter(M_ID);
- mP.addConstraint(new GreaterEqualConstraint(2));
- if (config.grab(mP)) {
+ mP.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
+ if(config.grab(mP)) {
m = mP.getValue();
}
final RandomParameter rndP = new RandomParameter(SEED_ID);
- if (config.grab(rndP)) {
+ if(config.grab(rndP)) {
rnd = rndP.getValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/COP.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/COP.java
index 06168c5a..190d14fe 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/COP.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/COP.java
@@ -62,11 +62,12 @@ import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.EnumParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
@@ -194,6 +195,11 @@ public class COP<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
DistanceDist dist = DistanceDist.CHISQUARED;
/**
+ * Include models in output.
+ */
+ boolean models;
+
+ /**
* Constructor.
*
* @param distanceFunction distance function
@@ -201,13 +207,15 @@ public class COP<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
* @param pca PCA computation method
* @param expect Expected fraction of outliers (for score normalization)
* @param dist Distance distribution model (ChiSquared, Gamma)
+ * @param models Report models
*/
- public COP(DistanceFunction<? super V, D> distanceFunction, int k, PCARunner<V> pca, double expect, DistanceDist dist) {
+ public COP(DistanceFunction<? super V, D> distanceFunction, int k, PCARunner<V> pca, double expect, DistanceDist dist, boolean models) {
super(distanceFunction);
this.k = k;
this.pca = pca;
this.expect = expect;
this.dist = dist;
+ this.models = models;
}
/**
@@ -221,22 +229,26 @@ public class COP<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
KNNQuery<V, D> knnQuery = QueryUtil.getKNNQuery(relation, getDistanceFunction(), k + 1);
final int dim = RelationUtil.dimensionality(relation);
- if (k <= dim + 1) {
+ if(k <= dim + 1) {
LOG.warning("PCA is underspecified with a too low k! k should be at much larger than " + dim);
}
WritableDoubleDataStore cop_score = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
- WritableDataStore<Vector> cop_err_v = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, Vector.class);
- WritableIntegerDataStore cop_dim = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, -1);
+ WritableDataStore<Vector> cop_err_v = null;
+ WritableIntegerDataStore cop_dim = null;
+ if(models) {
+ cop_err_v = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, Vector.class);
+ cop_dim = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, -1);
+ }
// compute neighbors of each db object
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Correlation Outlier Probabilities", relation.size(), LOG) : null;
- for (DBIDIter id = ids.iter(); id.valid(); id.advance()) {
+ for(DBIDIter id = ids.iter(); id.valid(); id.advance()) {
KNNList<D> neighbors = knnQuery.getKNNForDBID(id, k + 1);
ModifiableDBIDs nids = DBIDUtil.newHashSet(neighbors);
nids.remove(id); // Do not use query object
- Vector centroid = Centroid.make(relation, nids).toVector(relation).getColumnVector();
+ Vector centroid = Centroid.make(relation, nids);
Vector relative = relation.get(id).getColumnVector().minusEquals(centroid);
PCAResult pcares = pca.processIds(nids, relation);
@@ -246,17 +258,17 @@ public class COP<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
double min = Double.POSITIVE_INFINITY;
int vdim = dim;
- switch(dist) {
+ switch(dist){
case CHISQUARED: {
double sqdevs = 0;
- for (int d = 0; d < dim; d++) {
+ for(int d = 0; d < dim; d++) {
// Scale with Stddev
double dev = projected.get(d);
// Accumulate
sqdevs += dev * dev / evs[d];
// Evaluate
double score = 1 - ChiSquaredDistribution.cdf(sqdevs, d + 1);
- if (score < min) {
+ if(score < min) {
min = score;
vdim = d + 1;
}
@@ -267,21 +279,21 @@ public class COP<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
double[][] dists = new double[dim][nids.size()];
int j = 0;
Vector srel = new Vector(dim);
- for (DBIDIter s = nids.iter(); s.valid() && j < nids.size(); s.advance()) {
+ for(DBIDIter s = nids.iter(); s.valid() && j < nids.size(); s.advance()) {
V vec = relation.get(s);
- for (int d = 0; d < dim; d++) {
+ for(int d = 0; d < dim; d++) {
srel.set(d, vec.doubleValue(d) - centroid.get(d));
}
Vector serr = evecs.transposeTimes(srel);
double sqdist = 0.0;
- for (int d = 0; d < dim; d++) {
+ for(int d = 0; d < dim; d++) {
sqdist += serr.get(d) * serr.get(d) / evs[d];
dists[d][j] = sqdist;
}
j++;
}
double sqdevs = 0;
- for (int d = 0; d < dim; d++) {
+ for(int d = 0; d < dim; d++) {
// Scale with Stddev
final double dev = projected.get(d);
// Accumulate
@@ -290,7 +302,7 @@ public class COP<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
Arrays.sort(dists[d]);
// Evaluate
double score = 1 - GammaChoiWetteEstimator.STATIC.estimate(dists[d], SHORTENED_ARRAY).cdf(sqdevs);
- if (score < min) {
+ if(score < min) {
min = score;
vdim = d + 1;
}
@@ -301,20 +313,22 @@ public class COP<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
// Normalize the value
final double prob = expect * (1 - min) / (expect + min);
// Construct the error vector:
- for (int d = vdim; d < dim; d++) {
+ for(int d = vdim; d < dim; d++) {
projected.set(d, 0.0);
}
Vector ev = evecs.times(projected).timesEquals(-1 * prob);
cop_score.putDouble(id, prob);
- cop_err_v.put(id, ev);
- cop_dim.putInt(id, dim + 1 - vdim);
+ if(models) {
+ cop_err_v.put(id, ev);
+ cop_dim.putInt(id, dim + 1 - vdim);
+ }
- if (prog != null) {
+ if(prog != null) {
prog.incrementProcessed(LOG);
}
}
- if (prog != null) {
+ if(prog != null) {
prog.ensureCompleted(LOG);
}
@@ -322,8 +336,10 @@ public class COP<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
Relation<Double> scoreResult = new MaterializedRelation<>("Correlation Outlier Probabilities", COP_SCORES, TypeUtil.DOUBLE, cop_score, ids);
OutlierScoreMeta scoreMeta = new ProbabilisticOutlierScore();
OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
- result.addChildResult(new MaterializedRelation<>("Local Dimensionality", COP_DIM, TypeUtil.INTEGER, cop_dim, ids));
- result.addChildResult(new MaterializedRelation<>("Error vectors", COP_ERRORVEC, TypeUtil.VECTOR, cop_err_v, ids));
+ if(models) {
+ result.addChildResult(new MaterializedRelation<>("Local Dimensionality", COP_DIM, TypeUtil.INTEGER, cop_dim, ids));
+ result.addChildResult(new MaterializedRelation<>("Error vectors", COP_ERRORVEC, TypeUtil.VECTOR, cop_err_v, ids));
+ }
return result;
}
@@ -382,6 +398,16 @@ public class COP<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
public static final OptionID EXPECT_ID = new OptionID("cop.expect", "Expected share of outliers. Only affect score normalization.");
/**
+ * Include COP error vectors in output.
+ * <p>
+ * Key: {@code -cop.models}
+ *
+ * Default: off
+ * </p>
+ */
+ public static final OptionID MODELS_ID = new OptionID("cop.models", "Include COP models (error vectors) in output. This needs more memory.");
+
+ /**
* Number of neighbors to be considered.
*/
int k;
@@ -401,33 +427,42 @@ public class COP<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
*/
double expect;
+ /**
+ * Include COP models
+ */
+ boolean models = false;
+
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
IntParameter kP = new IntParameter(K_ID);
kP.addConstraint(new GreaterConstraint(5));
- if (config.grab(kP)) {
+ if(config.grab(kP)) {
k = kP.intValue();
}
EnumParameter<DistanceDist> distP = new EnumParameter<>(DIST_ID, DistanceDist.class, DistanceDist.GAMMA);
- if (config.grab(distP)) {
+ if(config.grab(distP)) {
dist = distP.getValue();
}
DoubleParameter expectP = new DoubleParameter(EXPECT_ID, 0.001);
- expectP.addConstraint(new GreaterConstraint(0));
- expectP.addConstraint(new LessConstraint(1.0));
- if (config.grab(expectP)) {
+ expectP.addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE);
+ expectP.addConstraint(CommonConstraints.LESS_THAN_ONE_DOUBLE);
+ if(config.grab(expectP)) {
expect = expectP.doubleValue();
}
ObjectParameter<PCARunner<V>> pcaP = new ObjectParameter<>(PCARUNNER_ID, PCARunner.class, PCARunner.class);
- if (config.grab(pcaP)) {
+ if(config.grab(pcaP)) {
pca = pcaP.instantiateClass(config);
}
+ Flag modelsF = new Flag(MODELS_ID);
+ if(config.grab(modelsF)) {
+ models = modelsF.isTrue();
+ }
}
@Override
protected COP<V, D> makeInstance() {
- return new COP<>(distanceFunction, k, pca, expect, dist);
+ return new COP<>(distanceFunction, k, pca, expect, dist, models);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DWOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DWOF.java
new file mode 100644
index 00000000..ef782390
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DWOF.java
@@ -0,0 +1,407 @@
+package de.lmu.ifi.dbs.elki.algorithm.outlier;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDList;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
+import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
+import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress;
+import de.lmu.ifi.dbs.elki.logging.progress.StepProgress;
+import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.math.Mean;
+import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+
+/**
+ * <p>
+ * Algorithm to compute dynamic-window outlier factors in a database based on a
+ * specified parameter {@link Parameterizer#K_ID} ({@code -dwof.k}).
+ * </p>
+ *
+ * <p>
+ * The parameter {@link Parameterizer#K_ID} specifies the number of the
+ * neighbors to be considered during the calculation of the DWOF score.
+ * </p>
+ *
+ * <p>
+ * All the distance queries -KNN and Range- are determined using the parameter
+ * {@link AbstractDistanceBasedAlgorithm#DISTANCE_FUNCTION_ID}
+ * </p>
+ *
+ * <p>
+ * Reference: <br>
+ * Rana Momtaz, Nesma Mohssen and Mohammad A. Gowayyed: DWOF: A Robust
+ * Density-Based OutlierDetection Approach. <br>
+ * In: Pattern Recognition and Image Analysis , Proc. 6th Iberian Conference,
+ * IbPRIA 2013, Funchal, Madeira, Portugal, June 5-7, 2013.
+ * </p>
+ *
+ * @author Omar Yousry
+ *
+ * @param <O> the type of DatabaseObjects handled by this Algorithm
+ * @param <D> Distance type
+ */
+
+@Title("DWOF: Dynamic Window Outlier Factor")
+@Description("Algorithm to compute dynamic-window outlier factors in a database based on the neighborhood size parameter 'k'")
+@Reference(authors = "R. Momtaz, N. Mohssen, M. A. Gowayyed", title = "DWOF: A Robust Density-Based OutlierDetection Approach", booktitle = "Pattern Recognition and Image Analysis, Proc. 6th Iberian Conference, IbPRIA 2013, Funchal, Madeira, Portugal, 2013.", url = "http://dx.doi.org/10.1007%2F978-3-642-38628-2_61")
+public class DWOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging LOG = Logging.getLogger(DWOF.class);
+
+ /**
+ * Holds the value of {@link Parameterizer#K_ID} i.e. Number of neighbors to
+ * consider during the calculation of DWOF scores.
+ */
+ protected int k;
+
+ /**
+ * The radii changing ratio
+ */
+ private double delta = 1.1;
+
+ /**
+ * Constructor.
+ *
+ * @param distanceFunction Distance function to use in queries
+ * @param k the value of k
+ * @param delta Radius increase factor
+ */
+ public DWOF(DistanceFunction<? super O, D> distanceFunction, int k, double delta) {
+ super(distanceFunction);
+ this.k = k + 1;
+ this.delta = delta;
+ }
+
+ /**
+ * Performs the Generalized DWOF_SCORE algorithm on the given database by
+ * calling all the other methods in the proper order.
+ *
+ * @param database Database to query
+ * @param relation Data to process
+ * @return new OutlierResult instance
+ */
+ public OutlierResult run(Database database, Relation<O> relation) {
+ final DBIDs ids = relation.getDBIDs();
+ DistanceQuery<O, D> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
+ // Get k nearest neighbor and range query on the relation.
+ KNNQuery<O, D> knnq = database.getKNNQuery(distFunc, k, DatabaseQuery.HINT_HEAVY_USE);
+ RangeQuery<O, D> rnnQuery = database.getRangeQuery(distFunc, DatabaseQuery.HINT_HEAVY_USE);
+
+ StepProgress stepProg = LOG.isVerbose() ? new StepProgress("DWOF", 2) : null;
+ // DWOF output score storage.
+ WritableDoubleDataStore dwofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_DB | DataStoreFactory.HINT_HOT, 0.);
+ if(stepProg != null) {
+ stepProg.beginStep(1, "Initializing objects' Radii", LOG);
+ }
+ WritableDoubleDataStore radii = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, 0.);
+ // Find an initial radius for each object:
+ initializeRadii(ids, knnq, distFunc, radii);
+ WritableIntegerDataStore oldSizes = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT, 1);
+ WritableIntegerDataStore newSizes = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT, 1);
+ int countUnmerged = relation.size();
+ if(stepProg != null) {
+ stepProg.beginStep(2, "Clustering-Evaluating Cycles.", LOG);
+ }
+ IndefiniteProgress clusEvalProgress = LOG.isVerbose() ? new IndefiniteProgress("Evaluating DWOFs", LOG) : null;
+ while(countUnmerged > 0) {
+ if(clusEvalProgress != null) {
+ clusEvalProgress.incrementProcessed(LOG);
+ }
+ // Increase radii
+ for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ radii.putDouble(iter, radii.doubleValue(iter) * delta);
+ }
+ // stores the clustering label for each object
+ WritableDataStore<ModifiableDBIDs> labels = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_TEMP, ModifiableDBIDs.class);
+ // Cluster objects based on the current radius
+ clusterData(ids, rnnQuery, radii, labels);
+ // simple reference swap
+ WritableIntegerDataStore temp = newSizes;
+ newSizes = oldSizes;
+ oldSizes = temp;
+
+ // Update the cluster size count for each object.
+ countUnmerged = updateSizes(ids, labels, newSizes);
+ labels.destroy();
+ // Update DWOF scores.
+ for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ double newScore = (newSizes.intValue(iter) > 0) ? ((double) (oldSizes.intValue(iter) - 1) / (double) newSizes.intValue(iter)) : 0.0;
+ dwofs.putDouble(iter, dwofs.doubleValue(iter) + newScore);
+ }
+ }
+ if(clusEvalProgress != null) {
+ clusEvalProgress.setCompleted(LOG);
+ }
+ if(stepProg != null) {
+ stepProg.setCompleted(LOG);
+ }
+ // Build result representation.
+ DoubleMinMax minmax = new DoubleMinMax();
+ for(DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
+ minmax.put(dwofs.doubleValue(iter));
+ }
+ OutlierScoreMeta meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY);
+ Relation<Double> rel = new MaterializedRelation<>("Dynamic-Window Outlier Factors", "dwof-outlier", TypeUtil.DOUBLE, dwofs, ids);
+ return new OutlierResult(meta, rel);
+ }
+
+ /**
+ * This method prepares a container for the radii of the objects and
+ * initializes radii according to the equation:
+ *
+ * initialRadii of a certain object = (absoluteMinDist of all objects) *
+ * (avgDist of the object) / (minAvgDist of all objects)
+ *
+ * @param ids Database IDs to process
+ * @param distFunc Distance function
+ * @param knnq kNN search function
+ * @param radii WritableDoubleDataStore to store radii
+ */
+ private void initializeRadii(DBIDs ids, KNNQuery<O, D> knnq, DistanceQuery<O, D> distFunc, WritableDoubleDataStore radii) {
+ FiniteProgress avgDistProgress = LOG.isVerbose() ? new FiniteProgress("Calculating average kNN distances-", ids.size(), LOG) : null;
+ double absoluteMinDist = Double.POSITIVE_INFINITY;
+ double minAvgDist = Double.POSITIVE_INFINITY;
+ // to get the mean for each object
+ Mean mean = new Mean();
+ // Iterate over all objects
+ for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ KNNList<D> iterNeighbors = knnq.getKNNForDBID(iter, k);
+ // skip the point itself
+ mean.reset();
+ for(DBIDIter neighbor1 = iterNeighbors.iter(); neighbor1.valid(); neighbor1.advance()) {
+ if(DBIDUtil.equal(neighbor1, iter)) {
+ continue;
+ }
+ for(DBIDIter neighbor2 = iterNeighbors.iter(); neighbor2.valid(); neighbor2.advance()) {
+ if(DBIDUtil.equal(neighbor1, neighbor2) || DBIDUtil.equal(neighbor2, iter)) {
+ continue;
+ }
+ double distance = distFunc.distance(neighbor1, neighbor2).doubleValue();
+ mean.put(distance);
+ if(distance > 0. && distance < absoluteMinDist) {
+ absoluteMinDist = distance;
+ }
+ }
+ }
+ double currentMean = mean.getMean();
+ radii.putDouble(iter, currentMean);
+ if(currentMean < minAvgDist) {
+ minAvgDist = currentMean;
+ }
+ if(avgDistProgress != null) {
+ avgDistProgress.incrementProcessed(LOG);
+ }
+ }
+ if(avgDistProgress != null) {
+ avgDistProgress.ensureCompleted(LOG);
+ }
+
+ // Initializing the radii of all objects.
+ for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ radii.putDouble(iter, (minAvgDist > 0) ? (absoluteMinDist * radii.doubleValue(iter) / minAvgDist) : Double.POSITIVE_INFINITY);
+ }
+ }
+
+ /**
+ * This method applies a density based clustering algorithm.
+ *
+ * It looks for an unclustered object and builds a new cluster for it, then
+ * adds all the points within its radius to that cluster.
+ *
+ * nChain represents the points to be added to the cluster and its
+ * radius-neighbors
+ *
+ * @param ids Database IDs to process
+ * @param rnnQuery Data to process
+ * @param radii Radii to cluster accordingly
+ * @param labels Label storage.
+ */
+ private void clusterData(DBIDs ids, RangeQuery<O, D> rnnQuery, WritableDoubleDataStore radii, WritableDataStore<ModifiableDBIDs> labels) {
+ FiniteProgress clustProg = LOG.isVerbose() ? new FiniteProgress("Density-Based Clustering", ids.size(), LOG) : null;
+ // Iterate over all objects
+ for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ if(labels.get(iter) != null) {
+ continue;
+ }
+ ModifiableDBIDs newCluster = DBIDUtil.newArray();
+ newCluster.add(iter);
+ labels.put(iter, newCluster);
+ if(clustProg != null) {
+ clustProg.incrementProcessed(LOG);
+ }
+ // container of the points to be added and their radii neighbors to the
+ // cluster
+ ModifiableDBIDs nChain = DBIDUtil.newArray();
+ nChain.add(iter);
+ // iterate over nChain
+ for(DBIDIter toGetNeighbors = nChain.iter(); toGetNeighbors.valid(); toGetNeighbors.advance()) {
+ D range = rnnQuery.getDistanceFactory().fromDouble(radii.doubleValue(toGetNeighbors));
+ DistanceDBIDList<D> nNeighbors = rnnQuery.getRangeForDBID(toGetNeighbors, range);
+ for(DistanceDBIDListIter<D> iter2 = nNeighbors.iter(); iter2.valid(); iter2.advance()) {
+ if(DBIDUtil.equal(toGetNeighbors, iter2)) {
+ continue;
+ }
+ if(labels.get(iter2) == null) {
+ newCluster.add(iter2);
+ labels.put(iter2, newCluster);
+ nChain.add(iter2);
+ if(clustProg != null) {
+ clustProg.incrementProcessed(LOG);
+ }
+ }
+ else if(labels.get(iter2) != newCluster) {
+ ModifiableDBIDs toBeDeleted = labels.get(iter2);
+ newCluster.addDBIDs(toBeDeleted);
+ for(DBIDIter iter3 = toBeDeleted.iter(); iter3.valid(); iter3.advance()) {
+ labels.put(iter3, newCluster);
+ }
+ toBeDeleted.clear();
+ }
+ }
+ }
+ }
+ if(clustProg != null) {
+ clustProg.ensureCompleted(LOG);
+ }
+ }
+
+ /**
+ * This method updates each object's cluster size after the clustering step.
+ *
+ * @param ids Object IDs to process
+ * @param labels references for each object's cluster
+ * @param newSizes the sizes container to be updated
+ * @return the number of unclustered objects
+ */
+ private int updateSizes(DBIDs ids, WritableDataStore<ModifiableDBIDs> labels, WritableIntegerDataStore newSizes) {
+ // to count the unclustered all over
+ int countUnmerged = 0;
+ for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ // checking the point's new cluster size after the clustering step
+ int newClusterSize = labels.get(iter).size();
+ newSizes.putInt(iter, newClusterSize);
+ // the point is alone in the cluster --> not merged with other points
+ if(newClusterSize == 1) {
+ countUnmerged++;
+ }
+ }
+ return countUnmerged;
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(getDistanceFunction().getInputTypeRestriction());
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Omar Yousry
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ /**
+ * Option ID for the number of neighbors.
+ */
+ public static final OptionID K_ID = OptionID.getOrCreateOptionID("dwof.k", "Number of neighbors to get for DWOF score outlier detection.");
+
+ /**
+ * Option ID for radius increases
+ */
+ public static final OptionID DELTA_ID = OptionID.getOrCreateOptionID("dwof.delta", "Radius increase factor.");
+
+ /**
+ * Number of neighbors to get
+ */
+ protected int k = 2;
+
+ /**
+ * Radius increase factor.
+ */
+ protected double delta = 1.1;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ // The super class has the distance function parameter!
+ super.makeOptions(config);
+ IntParameter kP = new IntParameter(K_ID);
+ kP.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
+ if(config.grab(kP)) {
+ k = kP.getValue();
+ }
+ DoubleParameter deltaP = new DoubleParameter(DELTA_ID);
+ deltaP.setDefaultValue(1.1);
+ deltaP.addConstraint(CommonConstraints.GREATER_THAN_ONE_DOUBLE);
+ if(config.grab(deltaP)) {
+ delta = deltaP.getValue();
+ }
+ }
+
+ @Override
+ protected DWOF<O, D> makeInstance() {
+ return new DWOF<>(distanceFunction, k, delta);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/EMOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/EMOutlier.java
index f8fd686f..76191cf2 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/EMOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/EMOutlier.java
@@ -38,10 +38,12 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.result.Result;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore;
import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy.Hierarchy.Iter;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
@@ -91,15 +93,27 @@ public class EMOutlier<V extends NumberVector<?>> extends AbstractAlgorithm<Outl
* @return Outlier result
*/
public OutlierResult run(Database database, Relation<V> relation) {
+ emClustering.setSoft(true);
Clustering<EMModel<V>> emresult = emClustering.run(database, relation);
+ Relation<double[]> soft = null;
+ for (Iter<Result> iter = emresult.getHierarchy().iterChildren(emresult); iter.valid(); iter.advance()) {
+ if (!(iter.get() instanceof Relation)) {
+ continue;
+ }
+ if (((Relation<?>) iter.get()).getDataTypeInformation() == EM.SOFT_TYPE) {
+ @SuppressWarnings("unchecked")
+ Relation<double[]> rel = (Relation<double[]>) iter.get();
+ soft = rel;
+ }
+ }
double globmax = 0.0;
WritableDoubleDataStore emo_score = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT);
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double maxProb = Double.POSITIVE_INFINITY;
- double[] probs = emClustering.getProbClusterIGivenX(iditer);
- for(double prob : probs) {
- maxProb = Math.min(1 - prob, maxProb);
+ double[] probs = soft.get(iditer);
+ for (double prob : probs) {
+ maxProb = Math.min(1. - prob, maxProb);
}
emo_score.putDouble(iditer, maxProb);
globmax = Math.max(maxProb, globmax);
@@ -145,4 +159,4 @@ public class EMOutlier<V extends NumberVector<?>> extends AbstractAlgorithm<Outl
return new EMOutlier<>(em);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/FastABOD.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/FastABOD.java
new file mode 100644
index 00000000..ee6bd434
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/FastABOD.java
@@ -0,0 +1,219 @@
+package de.lmu.ifi.dbs.elki.algorithm.outlier;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair;
+import de.lmu.ifi.dbs.elki.database.query.similarity.SimilarityQuery;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
+import de.lmu.ifi.dbs.elki.distance.similarityfunction.SimilarityFunction;
+import de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.KernelMatrix;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.math.MeanVariance;
+import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ComparableMaxHeap;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ObjectHeap;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+
+/**
+ * Angle-Based Outlier Detection / Angle-Based Outlier Factor.
+ *
+ * Fast-ABOD (approximateABOF) version.
+ *
+ * H.-P. Kriegel, M. Schubert, and A. Zimek: Angle-Based Outlier Detection in
+ * High-dimensional Data. In: Proc. 14th ACM SIGKDD Int. Conf. on Knowledge
+ * Discovery and Data Mining (KDD '08), Las Vegas, NV, 2008.
+ *
+ * @author Matthias Schubert (Original Code)
+ * @author Erich Schubert (ELKIfication)
+ *
+ * @param <V> Vector type
+ */
+@Title("Approximate ABOD: Angle-Based Outlier Detection")
+@Description("Outlier detection using variance analysis on angles, especially for high dimensional data sets.")
+@Reference(authors = "H.-P. Kriegel, M. Schubert, and A. Zimek", title = "Angle-Based Outlier Detection in High-dimensional Data", booktitle = "Proc. 14th ACM SIGKDD Int. Conf. on Knowledge Discovery and Data Mining (KDD '08), Las Vegas, NV, 2008", url = "http://dx.doi.org/10.1145/1401890.1401946")
+public class FastABOD<V extends NumberVector<?>> extends ABOD<V> {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging LOG = Logging.getLogger(FastABOD.class);
+
+ /**
+ * Number of nearest neighbors.
+ */
+ protected int k;
+
+ /**
+ * Constructor for Angle-Based Outlier Detection (ABOD).
+ *
+ * @param kernelFunction kernel function to use
+ * @param k Number of nearest neighbors
+ */
+ public FastABOD(SimilarityFunction<? super V, DoubleDistance> kernelFunction, int k) {
+ super(kernelFunction);
+ this.k = k;
+ }
+
+ /**
+ * Run Fast-ABOD on the data set.
+ *
+ * @param relation Relation to process
+ * @return Outlier detection result
+ */
+ @Override
+ public OutlierResult run(Database db, Relation<V> relation) {
+ DBIDs ids = relation.getDBIDs();
+ // Build a kernel matrix, to make O(n^3) slightly less bad.
+ SimilarityQuery<V, DoubleDistance> sq = db.getSimilarityQuery(relation, kernelFunction);
+ KernelMatrix kernelMatrix = new KernelMatrix(sq, relation, ids);
+
+ WritableDoubleDataStore abodvalues = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
+ DoubleMinMax minmaxabod = new DoubleMinMax();
+
+ MeanVariance s = new MeanVariance();
+ for (DBIDIter pA = ids.iter(); pA.valid(); pA.advance()) {
+ s.reset();
+ final double simAA = kernelMatrix.getSimilarity(pA, pA);
+
+ // Choose the k-min nearest
+ ComparableMaxHeap<DoubleDBIDPair> nn = new ComparableMaxHeap<>(k);
+ for (DBIDIter nB = relation.iterDBIDs(); nB.valid(); nB.advance()) {
+ if (DBIDUtil.equal(nB, pA)) {
+ continue;
+ }
+ double simBB = kernelMatrix.getSimilarity(nB, nB);
+ double simAB = kernelMatrix.getSimilarity(pA, nB);
+ double sqdAB = simAA + simBB - simAB - simAB;
+ if (!(sqdAB > 0.)) {
+ continue;
+ }
+ if (nn.size() < k) {
+ nn.add(DBIDUtil.newPair(sqdAB, nB));
+ } else if (sqdAB < nn.peek().doubleValue()) {
+ nn.replaceTopElement(DBIDUtil.newPair(sqdAB, nB));
+ }
+ }
+
+ for (ObjectHeap.UnsortedIter<DoubleDBIDPair> iB = nn.unsortedIter(); iB.valid(); iB.advance()) {
+ DoubleDBIDPair nB = iB.get();
+ double sqdAB = nB.doubleValue();
+ double simAB = kernelMatrix.getSimilarity(pA, nB);
+ if (!(sqdAB > 0.)) {
+ continue;
+ }
+ for (ObjectHeap.UnsortedIter<DoubleDBIDPair> iC = nn.unsortedIter(); iC.valid(); iC.advance()) {
+ DoubleDBIDPair nC = iC.get();
+ if (DBIDUtil.compare(nC, nB) < 0) {
+ continue;
+ }
+ double sqdAC = nC.doubleValue();
+ double simAC = kernelMatrix.getSimilarity(pA, nC);
+ if (!(sqdAC > 0.)) {
+ continue;
+ }
+ // Exploit bilinearity of scalar product:
+ // <B-A, C-A> = <B, C-A> - <A,C-A>
+ // = <B,C> - <B,A> - <A,C> + <A,A>
+ // For computing variance, AA is a constant and can be ignored.
+ double simBC = kernelMatrix.getSimilarity(nB, nC);
+ double numerator = simBC - simAB - simAC; // + simAA;
+ double val = numerator / (sqdAB * sqdAC);
+ s.put(val, 1. / Math.sqrt(sqdAB * sqdAC));
+ }
+ }
+ // Sample variance probably would be correct, but the ABOD publication
+ // uses the naive variance.
+ final double abof = s.getNaiveVariance();
+ minmaxabod.put(abof);
+ abodvalues.putDouble(pA, abof);
+ }
+
+ // Build result representation.
+ Relation<Double> scoreResult = new MaterializedRelation<>("Angle-Based Outlier Degree", "abod-outlier", TypeUtil.DOUBLE, abodvalues, relation.getDBIDs());
+ OutlierScoreMeta scoreMeta = new InvertedOutlierScoreMeta(minmaxabod.getMin(), minmaxabod.getMax(), 0.0, Double.POSITIVE_INFINITY);
+ return new OutlierResult(scoreMeta, scoreResult);
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(TypeUtil.NUMBER_VECTOR_FIELD);
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer<V extends NumberVector<?>> extends ABOD.Parameterizer<V> {
+ /**
+ * Parameter for the nearest neighbors.
+ */
+ public static final OptionID K_ID = new OptionID("fastabod.k", "Number of nearest neighbors to use for ABOD.");
+
+ /**
+ * Number of neighbors.
+ */
+ protected int k;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ final IntParameter kP = new IntParameter(K_ID);
+ if (config.grab(kP)) {
+ k = kP.intValue();
+ }
+ }
+
+ @Override
+ protected FastABOD<V> makeInstance() {
+ return new FastABOD<>(kernelFunction, k);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianModel.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianModel.java
index c9e6a634..3f8bb484 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianModel.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianModel.java
@@ -112,7 +112,7 @@ public class GaussianModel<V extends NumberVector<?>> extends AbstractAlgorithm<
Matrix covarianceTransposed = covarianceMatrix.cheatToAvoidSingularity(SINGULARITY_CHEAT).inverse();
// Normalization factors for Gaussian PDF
- final double fakt = (1.0 / (Math.sqrt(Math.pow(MathUtil.TWOPI, RelationUtil.dimensionality(relation)) * covarianceMatrix.det())));
+ final double fakt = (1.0 / (Math.sqrt(MathUtil.powi(MathUtil.TWOPI, RelationUtil.dimensionality(relation)) * covarianceMatrix.det())));
// for each object compute Mahalanobis distance
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianUniformMixture.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianUniformMixture.java
index 294592e8..e6659a8f 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianUniformMixture.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianUniformMixture.java
@@ -219,7 +219,7 @@ public class GaussianUniformMixture<V extends NumberVector<?>> extends AbstractA
Matrix covInv = covarianceMatrix.cheatToAvoidSingularity(SINGULARITY_CHEAT).inverse();
double covarianceDet = covarianceMatrix.det();
- double fakt = 1.0 / Math.sqrt(Math.pow(MathUtil.TWOPI, RelationUtil.dimensionality(database)) * covarianceDet);
+ double fakt = 1.0 / Math.sqrt(MathUtil.powi(MathUtil.TWOPI, RelationUtil.dimensionality(database)) * covarianceDet);
// for each object compute probability and sum
double prob = 0;
for (DBIDIter iter = objids.iter(); iter.valid(); iter.advance()) {
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LBABOD.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LBABOD.java
new file mode 100644
index 00000000..37b4d050
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LBABOD.java
@@ -0,0 +1,288 @@
+package de.lmu.ifi.dbs.elki.algorithm.outlier;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair;
+import de.lmu.ifi.dbs.elki.database.query.similarity.SimilarityQuery;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
+import de.lmu.ifi.dbs.elki.distance.similarityfunction.SimilarityFunction;
+import de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.KernelMatrix;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.Logging.Level;
+import de.lmu.ifi.dbs.elki.logging.LoggingConfiguration;
+import de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic;
+import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.math.MeanVariance;
+import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ComparableMaxHeap;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ComparableMinHeap;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.DoubleMinHeap;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ObjectHeap;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+
+/**
+ * Angle-Based Outlier Detection / Angle-Based Outlier Factor.
+ *
+ * LB-ABOD (lower-bound) version. Exact on the top k outliers, approximate on
+ * the remaining.
+ *
+ * Outlier detection using variance analysis on angles, especially for high
+ * dimensional data sets.
+ *
+ * H.-P. Kriegel, M. Schubert, and A. Zimek: Angle-Based Outlier Detection in
+ * High-dimensional Data. In: Proc. 14th ACM SIGKDD Int. Conf. on Knowledge
+ * Discovery and Data Mining (KDD '08), Las Vegas, NV, 2008.
+ *
+ * @author Matthias Schubert (Original Code)
+ * @author Erich Schubert (ELKIfication)
+ *
+ * @param <V> Vector type
+ */
+@Title("LB-ABOD: Lower Bounded Angle-Based Outlier Detection")
+@Description("Outlier detection using variance analysis on angles, especially for high dimensional data sets.")
+@Reference(authors = "H.-P. Kriegel, M. Schubert, and A. Zimek", title = "Angle-Based Outlier Detection in High-dimensional Data", booktitle = "Proc. 14th ACM SIGKDD Int. Conf. on Knowledge Discovery and Data Mining (KDD '08), Las Vegas, NV, 2008", url = "http://dx.doi.org/10.1145/1401890.1401946")
+public class LBABOD<V extends NumberVector<?>> extends FastABOD<V> {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging LOG = Logging.getLogger(LBABOD.class);
+
+ /**
+ * Number of outliers to refine.
+ */
+ protected int l;
+
+ /**
+ * Actual constructor, with parameters. Fast mode (sampling).
+ *
+ * @param kernelFunction Kernel function to use
+ * @param k k parameter
+ * @param l Number of outliers to find exact
+ */
+ public LBABOD(SimilarityFunction<? super V, DoubleDistance> kernelFunction, int k, int l) {
+ super(kernelFunction, k);
+ this.l = l;
+ }
+
+ /**
+ * Run LB-ABOD on the data set.
+ *
+ * @param relation Relation to process
+ * @return Outlier detection result
+ */
+ @Override
+ public OutlierResult run(Database db, Relation<V> relation) {
+ DBIDs ids = relation.getDBIDs();
+ SimilarityQuery<V, DoubleDistance> sq = relation.getDatabase().getSimilarityQuery(relation, kernelFunction);
+ KernelMatrix kernelMatrix = new KernelMatrix(sq, relation, ids);
+
+ // Output storage.
+ WritableDoubleDataStore abodvalues = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
+ DoubleMinMax minmaxabod = new DoubleMinMax();
+ double max = 0.;
+
+ // Storage for squared distances (will be reused!)
+ WritableDoubleDataStore sqDists = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT);
+ // Nearest neighbor heap (will be reused!)
+ ComparableMaxHeap<DoubleDBIDPair> nn = new ComparableMaxHeap<>(k);
+
+ // Priority queue for candidates
+ ComparableMinHeap<DoubleDBIDPair> candidates = new ComparableMinHeap<>(relation.size());
+ // get Candidate Ranking
+ for(DBIDIter pA = relation.iterDBIDs(); pA.valid(); pA.advance()) {
+ // Compute nearest neighbors and distances.
+ nn.clear();
+ double simAA = kernelMatrix.getSimilarity(pA, pA);
+ // Sum of 1./(|AB|) and 1./(|AB|^2); for computing R2.
+ double sumid = 0., sumisqd = 0.;
+ for(DBIDIter nB = relation.iterDBIDs(); nB.valid(); nB.advance()) {
+ if(DBIDUtil.equal(nB, pA)) {
+ continue;
+ }
+ double simBB = kernelMatrix.getSimilarity(nB, nB);
+ double simAB = kernelMatrix.getSimilarity(pA, nB);
+ double sqdAB = simAA + simBB - simAB - simAB;
+ sqDists.putDouble(nB, sqdAB);
+ if(!(sqdAB > 0.)) {
+ continue;
+ }
+ sumid += 1. / Math.sqrt(sqdAB);
+ sumisqd += 1. / sqdAB;
+ // Update heap
+ if(nn.size() < k) {
+ nn.add(DBIDUtil.newPair(sqdAB, nB));
+ }
+ else if(sqdAB < nn.peek().doubleValue()) {
+ nn.replaceTopElement(DBIDUtil.newPair(sqdAB, nB));
+ }
+ }
+
+ // Compute FastABOD approximation, adjust for lower bound.
+ // LB-ABOF is defined via a numerically unstable formula.
+ // Variance as E(X^2)-E(X)^2 suffers from catastrophic cancellation!
+ // TODO: ensure numerical precision!
+ double nnsum = 0., nnsumsq = 0., nnsumisqd = 0.;
+ for(ObjectHeap.UnsortedIter<DoubleDBIDPair> iB = nn.unsortedIter(); iB.valid(); iB.advance()) {
+ DoubleDBIDPair nB = iB.get();
+ double sqdAB = nB.doubleValue();
+ double simAB = kernelMatrix.getSimilarity(pA, nB);
+ if(!(sqdAB > 0.)) {
+ continue;
+ }
+ for(ObjectHeap.UnsortedIter<DoubleDBIDPair> iC = nn.unsortedIter(); iC.valid(); iC.advance()) {
+ DoubleDBIDPair nC = iC.get();
+ if(DBIDUtil.compare(nC, nB) < 0) {
+ continue;
+ }
+ double sqdAC = nC.doubleValue();
+ double simAC = kernelMatrix.getSimilarity(pA, nC);
+ if(!(sqdAC > 0.)) {
+ continue;
+ }
+ // Exploit bilinearity of scalar product:
+ // <B-A, C-A> = <B, C-A> - <A,C-A>
+ // = <B,C> - <B,A> - <A,C> + <A,A>
+ double simBC = kernelMatrix.getSimilarity(nB, nC);
+ double numerator = simBC - simAB - simAC + simAA;
+ double sqweight = 1. / (sqdAB * sqdAC);
+ double weight = Math.sqrt(sqweight);
+ double val = numerator * sqweight;
+ nnsum += val * weight;
+ nnsumsq += val * val * weight;
+ nnsumisqd += sqweight;
+ }
+ }
+ // Remaining weight, term R2:
+ double r2 = sumisqd * sumisqd - 2. * nnsumisqd;
+ double tmp = (2. * nnsum + r2) / (sumid * sumid);
+ double lbabof = 2. * nnsumsq / (sumid * sumid) - tmp * tmp;
+
+ // Track maximum?
+ if(lbabof > max) {
+ max = lbabof;
+ }
+ abodvalues.putDouble(pA, lbabof);
+ candidates.add(DBIDUtil.newPair(lbabof, pA));
+ }
+ minmaxabod.put(max); // Put maximum from approximate values.
+
+ // refine Candidates
+ int refinements = 0;
+ DoubleMinHeap topscores = new DoubleMinHeap(l);
+ MeanVariance s = new MeanVariance();
+ while(!candidates.isEmpty()) {
+ // Stop refining
+ if(topscores.size() >= k && candidates.peek().doubleValue() > topscores.peek()) {
+ break;
+ }
+ DoubleDBIDPair pA = candidates.poll();
+ final double abof = computeABOF(relation, kernelMatrix, pA, s);
+ // Store refined score:
+ abodvalues.putDouble(pA, abof);
+ minmaxabod.put(abof);
+ // Update the heap tracking the top scores.
+ if(topscores.size() < k) {
+ topscores.add(abof);
+ }
+ else {
+ if(topscores.peek() > abof) {
+ topscores.replaceTopElement(abof);
+ }
+ }
+ refinements += 1;
+ }
+ if(LOG.isStatistics()) {
+ LoggingConfiguration.setVerbose(Level.VERYVERBOSE);
+ LOG.statistics(new LongStatistic("lb-abod.refinements", refinements));
+ }
+ // Build result representation.
+ Relation<Double> scoreResult = new MaterializedRelation<>("Angle-based Outlier Detection", "abod-outlier", TypeUtil.DOUBLE, abodvalues, ids);
+ OutlierScoreMeta scoreMeta = new InvertedOutlierScoreMeta(minmaxabod.getMin(), minmaxabod.getMax(), 0.0, Double.POSITIVE_INFINITY);
+ return new OutlierResult(scoreMeta, scoreResult);
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(TypeUtil.NUMBER_VECTOR_FIELD);
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer<V extends NumberVector<?>> extends FastABOD.Parameterizer<V> {
+ /**
+ * Parameter to specify the number of outliers to compute exactly.
+ */
+ public static final OptionID L_ID = new OptionID("abod.l", "Number of top outliers to compute.");
+
+ /**
+ * Number of outliers to find.
+ */
+ protected int l = 0;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ final IntParameter lP = new IntParameter(L_ID);
+ lP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
+ if(config.grab(lP)) {
+ l = lP.getValue();
+ }
+ }
+
+ @Override
+ protected LBABOD<V> makeInstance() {
+ return new LBABOD<>(kernelFunction, k, l);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ODIN.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ODIN.java
index f22cdeb7..a5b39146 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ODIN.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ODIN.java
@@ -45,7 +45,7 @@ import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -110,19 +110,19 @@ public class ODIN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorit
double inc = 1. / (k - 1);
double min = Double.POSITIVE_INFINITY, max = 0.0;
// Process all objects
- for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
// Find the nearest neighbors (using an index, if available!)
KNNList<D> neighbors = knnq.getKNNForDBID(iter, k);
// For each neighbor, except ourselves, increase the in-degree:
- for (DBIDIter nei = neighbors.iter(); nei.valid(); nei.advance()) {
- if (DBIDUtil.equal(iter, nei)) {
+ for(DBIDIter nei = neighbors.iter(); nei.valid(); nei.advance()) {
+ if(DBIDUtil.equal(iter, nei)) {
continue;
}
final double value = scores.doubleValue(nei) + inc;
- if (value < min) {
+ if(value < min) {
min = value;
}
- if (value > max) {
+ if(value > max) {
max = value;
}
scores.put(nei, value);
@@ -178,8 +178,8 @@ public class ODIN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorit
// Since in a database context, the 1 nearest neighbor
// will usually be the query object itself, we require
// this value to be at least 2.
- param.addConstraint(new GreaterConstraint(1));
- if (config.grab(param)) {
+ param.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
+ if(config.grab(param)) {
k = param.intValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OPTICSOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OPTICSOF.java
index f6d46f57..b1ffae63 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OPTICSOF.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OPTICSOF.java
@@ -1,26 +1,27 @@
package de.lmu.ifi.dbs.elki.algorithm.outlier;
-/*
-This file is part of ELKI:
-Environment for Developing KDD-Applications Supported by Index-Structures
-
-Copyright (C) 2013
-Ludwig-Maximilians-Universität München
-Lehr- und Forschungseinheit für Datenbanksysteme
-ELKI Development Team
-
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU Affero General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU Affero General Public License for more details.
-
-You should have received a copy of the GNU Affero General Public License
-along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
import java.util.ArrayList;
import java.util.List;
@@ -54,7 +55,7 @@ import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -122,7 +123,7 @@ public class OPTICSOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanc
// Pass 1
// N_minpts(id) and core-distance(id)
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
KNNList<D> minptsNeighbours = knnQuery.getKNNForDBID(iditer, minpts);
D d = minptsNeighbours.getKNNDistance();
nMinPts.put(iditer, minptsNeighbours);
@@ -133,11 +134,11 @@ public class OPTICSOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanc
// Pass 2
WritableDataStore<List<Double>> reachDistance = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, List.class);
WritableDoubleDataStore lrds = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
List<Double> core = new ArrayList<>();
double lrd = 0;
// TODO: optimize for double distances
- for (DistanceDBIDListIter<D> neighbor = nMinPts.get(iditer).iter(); neighbor.valid(); neighbor.advance()) {
+ for(DistanceDBIDListIter<D> neighbor = nMinPts.get(iditer).iter(); neighbor.valid(); neighbor.advance()) {
double coreDist = coreDistance.doubleValue(neighbor);
double dist = distQuery.distance(iditer, neighbor).doubleValue();
double rd = Math.max(coreDist, dist);
@@ -152,9 +153,9 @@ public class OPTICSOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanc
// Pass 3
DoubleMinMax ofminmax = new DoubleMinMax();
WritableDoubleDataStore ofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double of = 0;
- for (DBIDIter neighbor = nMinPts.get(iditer).iter(); neighbor.valid(); neighbor.advance()) {
+ for(DBIDIter neighbor = nMinPts.get(iditer).iter(); neighbor.valid(); neighbor.advance()) {
double lrd = lrds.doubleValue(iditer);
double lrdN = lrds.doubleValue(neighbor);
of = of + lrdN / lrd;
@@ -169,7 +170,7 @@ public class OPTICSOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanc
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(ofminmax.getMin(), ofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
return new OutlierResult(scoreMeta, scoreResult);
}
-
+
@Override
public TypeInformation[] getInputTypeRestriction() {
return TypeUtil.array(getDistanceFunction().getInputTypeRestriction());
@@ -181,11 +182,11 @@ public class OPTICSOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanc
}
/**
- * Parameterization class.
- *
- * @author Erich Schubert
- *
- * @apiviz.exclude
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
*/
public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
protected int minpts = 0;
@@ -194,7 +195,7 @@ public class OPTICSOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanc
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
final IntParameter param = new IntParameter(OPTICS.MINPTS_ID);
- param.addConstraint(new GreaterConstraint(1));
+ param.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
if(config.grab(param)) {
minpts = param.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ReferenceBasedOutlierDetection.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ReferenceBasedOutlierDetection.java
index 092bbc45..d254c9a1 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ReferenceBasedOutlierDetection.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ReferenceBasedOutlierDetection.java
@@ -56,7 +56,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
@@ -182,14 +182,14 @@ public class ReferenceBasedOutlierDetection<V extends NumberVector<?>, D extends
}
// compute maximum density
double maxDensity = 0.0;
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double dens = rbod_score.doubleValue(iditer);
if(dens > maxDensity) {
maxDensity = dens;
}
}
// compute ROS
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double score = 1 - (rbod_score.doubleValue(iditer) / maxDensity);
rbod_score.putDouble(iditer, score);
}
@@ -218,7 +218,7 @@ public class ReferenceBasedOutlierDetection<V extends NumberVector<?>, D extends
protected DistanceDBIDList<D> computeDistanceVector(V refPoint, Relation<V> database, DistanceQuery<V, D> distFunc) {
// TODO: optimize for double distances?
GenericDistanceDBIDList<D> referenceDists = new GenericDistanceDBIDList<>(database.size());
- for(DBIDIter iditer = database.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for(DBIDIter iditer = database.iterDBIDs(); iditer.valid(); iditer.advance()) {
referenceDists.add(distFunc.distance(iditer, refPoint), iditer);
}
referenceDists.sort();
@@ -319,7 +319,7 @@ public class ReferenceBasedOutlierDetection<V extends NumberVector<?>, D extends
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
final IntParameter pK = new IntParameter(K_ID);
- pK.addConstraint(new GreaterConstraint(1));
+ pK.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
if(config.grab(pK)) {
k = pK.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleCOP.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleCOP.java
index 38820ab7..72a727a5 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleCOP.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleCOP.java
@@ -60,7 +60,7 @@ import de.lmu.ifi.dbs.elki.utilities.FormatUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
@@ -123,7 +123,7 @@ public class SimpleCOP<V extends NumberVector<?>, D extends NumberDistance<D, ?>
{// compute neighbors of each db object
FiniteProgress progressLocalPCA = LOG.isVerbose() ? new FiniteProgress("Correlation Outlier Probabilities", data.size(), LOG) : null;
double sqrt2 = Math.sqrt(2.0);
- for (DBIDIter id = data.iterDBIDs(); id.valid(); id.advance()) {
+ for(DBIDIter id = data.iterDBIDs(); id.valid(); id.advance()) {
KNNList<D> neighbors = knnQuery.getKNNForDBID(id, k + 1);
ModifiableDBIDs nids = DBIDUtil.newArray(neighbors);
nids.remove(id);
@@ -147,11 +147,11 @@ public class SimpleCOP<V extends NumberVector<?>, D extends NumberDistance<D, ?>
cop_sol.put(id, depsol);
- if (progressLocalPCA != null) {
+ if(progressLocalPCA != null) {
progressLocalPCA.incrementProcessed(LOG);
}
}
- if (progressLocalPCA != null) {
+ if(progressLocalPCA != null) {
progressLocalPCA.ensureCompleted(LOG);
}
}
@@ -218,12 +218,12 @@ public class SimpleCOP<V extends NumberVector<?>, D extends NumberDistance<D, ?>
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
IntParameter kP = new IntParameter(K_ID);
- kP.addConstraint(new GreaterConstraint(0));
- if (config.grab(kP)) {
+ kP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
+ if(config.grab(kP)) {
k = kP.intValue();
}
ObjectParameter<PCAFilteredRunner<V>> pcaP = new ObjectParameter<>(PCARUNNER_ID, PCAFilteredRunner.class, PCAFilteredRunner.class);
- if (config.grab(pcaP)) {
+ if(config.grab(pcaP)) {
pca = pcaP.instantiateClass(config);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/ALOCI.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/ALOCI.java
index d48679a9..f978365e 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/ALOCI.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/ALOCI.java
@@ -141,7 +141,7 @@ public class ALOCI<O extends NumberVector<?>, D extends NumberDistance<D, ?>> ex
public OutlierResult run(Database database, Relation<O> relation) {
final int dim = RelationUtil.dimensionality(relation);
- final Random random = rnd.getRandom();
+ final Random random = rnd.getSingleThreadedRandom();
FiniteProgress progressPreproc = LOG.isVerbose() ? new FiniteProgress("Build aLOCI quadtress", g, LOG) : null;
// Compute extend of dataset.
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/FlexibleLOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/FlexibleLOF.java
index 80f60e8b..2508b6b0 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/FlexibleLOF.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/FlexibleLOF.java
@@ -64,7 +64,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
@@ -581,14 +581,14 @@ public class FlexibleLOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgo
super.makeOptions(config);
final IntParameter pK = new IntParameter(KREF_ID);
- pK.addConstraint(new GreaterConstraint(1));
+ pK.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
if (config.grab(pK)) {
krefer = pK.intValue();
}
final IntParameter pK2 = new IntParameter(KREACH_ID);
pK2.setOptional(true);
- pK2.addConstraint(new GreaterConstraint(1));
+ pK2.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
if (config.grab(pK2)) {
kreach = pK2.intValue();
} else {
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/INFLO.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/INFLO.java
index ae297a3c..28fcf01b 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/INFLO.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/INFLO.java
@@ -53,7 +53,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -142,7 +142,7 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa
// density
WritableDoubleDataStore density = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT);
// init knns and rnns
- for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
knns.put(iditer, DBIDUtil.newArray());
rnns.put(iditer, DBIDUtil.newArray());
}
@@ -150,10 +150,10 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa
// TODO: use kNN preprocessor?
KNNQuery<O, D> knnQuery = database.getKNNQuery(distFunc, k, DatabaseQuery.HINT_HEAVY_USE);
- for (DBIDIter id = relation.iterDBIDs(); id.valid(); id.advance()) {
+ for(DBIDIter id = relation.iterDBIDs(); id.valid(); id.advance()) {
// if not visited count=0
int count = rnns.get(id).size();
- if (!processedIDs.contains(id)) {
+ if(!processedIDs.contains(id)) {
// TODO: use exactly k neighbors?
KNNList<D> list = knnQuery.getKNNForDBID(id, k);
knns.get(id).addDBIDs(list);
@@ -162,8 +162,8 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa
}
ModifiableDBIDs s = knns.get(id);
- for (DBIDIter q = knns.get(id).iter(); q.valid(); q.advance()) {
- if (!processedIDs.contains(q)) {
+ for(DBIDIter q = knns.get(id).iter(); q.valid(); q.advance()) {
+ if(!processedIDs.contains(q)) {
// TODO: use exactly k neighbors?
KNNList<D> listQ = knnQuery.getKNNForDBID(q, k);
knns.get(q).addDBIDs(listQ);
@@ -171,13 +171,13 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa
processedIDs.add(q);
}
- if (knns.get(q).contains(id)) {
+ if(knns.get(q).contains(id)) {
rnns.get(q).add(id);
rnns.get(id).add(q);
count++;
}
}
- if (count >= s.size() * m) {
+ if(count >= s.size() * m) {
pruned.add(id);
}
}
@@ -186,15 +186,15 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa
// IF Object is pruned INFLO=1.0
DoubleMinMax inflominmax = new DoubleMinMax();
WritableDoubleDataStore inflos = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
- for (DBIDIter id = relation.iterDBIDs(); id.valid(); id.advance()) {
- if (!pruned.contains(id)) {
+ for(DBIDIter id = relation.iterDBIDs(); id.valid(); id.advance()) {
+ if(!pruned.contains(id)) {
ModifiableDBIDs knn = knns.get(id);
ModifiableDBIDs rnn = rnns.get(id);
double denP = density.doubleValue(id);
knn.addDBIDs(rnn);
Mean mean = new Mean();
- for (DBIDIter iter = knn.iter(); iter.valid(); iter.advance()) {
+ for(DBIDIter iter = knn.iter(); iter.valid(); iter.advance()) {
mean.put(density.doubleValue(iter));
}
double den = mean.getMean() / denP;
@@ -203,7 +203,7 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa
inflominmax.put(den);
}
- if (pruned.contains(id)) {
+ if(pruned.contains(id)) {
inflos.putDouble(id, 1.0);
inflominmax.put(1.0);
}
@@ -241,14 +241,14 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
final DoubleParameter mP = new DoubleParameter(M_ID, 1.0);
- mP.addConstraint(new GreaterConstraint(0.0));
- if (config.grab(mP)) {
+ mP.addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE);
+ if(config.grab(mP)) {
m = mP.doubleValue();
}
final IntParameter kP = new IntParameter(K_ID);
- kP.addConstraint(new GreaterConstraint(1));
- if (config.grab(kP)) {
+ kP.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
+ if(config.grab(kP)) {
k = kP.intValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDF.java
index 4a86e93d..e5049877 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDF.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDF.java
@@ -55,6 +55,7 @@ import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.logging.progress.StepProgress;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.math.MathUtil;
import de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions.GaussianKernelDensityFunction;
import de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions.KernelDensityFunction;
import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
@@ -62,7 +63,7 @@ import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -149,8 +150,8 @@ public class LDF<O extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
// "HEAVY" flag for KNN Query since it is used more than once
KNNQuery<O, D> knnq = QueryUtil.getKNNQuery(relation, getDistanceFunction(), k, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
// No optimized kNN query - use a preprocessor!
- if (!(knnq instanceof PreprocessorKNNQuery)) {
- if (stepprog != null) {
+ if(!(knnq instanceof PreprocessorKNNQuery)) {
+ if(stepprog != null) {
stepprog.beginStep(1, "Materializing neighborhoods w.r.t. distance function.", LOG);
}
MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<>(relation, getDistanceFunction(), k);
@@ -160,43 +161,46 @@ public class LDF<O extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
}
// Compute LDEs
- if (stepprog != null) {
+ if(stepprog != null) {
stepprog.beginStep(2, "Computing LDEs.", LOG);
}
WritableDoubleDataStore ldes = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
FiniteProgress densProgress = LOG.isVerbose() ? new FiniteProgress("Densities", ids.size(), LOG) : null;
- for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
+ for(DBIDIter it = ids.iter(); it.valid(); it.advance()) {
final KNNList<D> neighbors = knnq.getKNNForDBID(it, k);
double sum = 0.0;
int count = 0;
- if (neighbors instanceof DoubleDistanceKNNList) {
+ if(neighbors instanceof DoubleDistanceKNNList) {
// Fast version for double distances
- for (DoubleDistanceDBIDListIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) {
- if (DBIDUtil.equal(neighbor, it)) {
+ for(DoubleDistanceDBIDListIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) {
+ if(DBIDUtil.equal(neighbor, it)) {
continue;
}
final double nkdist = ((DoubleDistanceKNNList) knnq.getKNNForDBID(neighbor, k)).doubleKNNDistance();
- if (nkdist > 0.) {
+ if(nkdist > 0.) {
final double v = Math.max(nkdist, neighbor.doubleDistance()) / (h * nkdist);
- sum += kernel.density(v) / Math.pow(h * nkdist, dim);
+ sum += kernel.density(v) / MathUtil.powi(h * nkdist, dim);
count++;
- } else {
+ }
+ else {
sum = Double.POSITIVE_INFINITY;
count++;
break;
}
}
- } else {
- for (DistanceDBIDListIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
- if (DBIDUtil.equal(neighbor, it)) {
+ }
+ else {
+ for(DistanceDBIDListIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ if(DBIDUtil.equal(neighbor, it)) {
continue;
}
final double nkdist = knnq.getKNNForDBID(neighbor, k).getKNNDistance().doubleValue();
- if (nkdist > 0.) {
+ if(nkdist > 0.) {
final double v = Math.max(nkdist, neighbor.getDistance().doubleValue()) / (h * nkdist);
- sum += kernel.density(v) / Math.pow(h * nkdist, dim);
+ sum += kernel.density(v) / MathUtil.powi(h * nkdist, dim);
count++;
- } else {
+ }
+ else {
sum = Double.POSITIVE_INFINITY;
count++;
break;
@@ -204,16 +208,16 @@ public class LDF<O extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
}
}
ldes.putDouble(it, sum / count);
- if (densProgress != null) {
+ if(densProgress != null) {
densProgress.incrementProcessed(LOG);
}
}
- if (densProgress != null) {
+ if(densProgress != null) {
densProgress.ensureCompleted(LOG);
}
// Compute local density factors.
- if (stepprog != null) {
+ if(stepprog != null) {
stepprog.beginStep(3, "Computing LDFs.", LOG);
}
WritableDoubleDataStore ldfs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
@@ -221,14 +225,14 @@ public class LDF<O extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
DoubleMinMax lofminmax = new DoubleMinMax();
FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("Local Density Factors", ids.size(), LOG) : null;
- for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
+ for(DBIDIter it = ids.iter(); it.valid(); it.advance()) {
final double lrdp = ldes.doubleValue(it);
final KNNList<D> neighbors = knnq.getKNNForDBID(it, k);
double sum = 0.0;
int count = 0;
- for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ for(DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
// skip the point itself
- if (DBIDUtil.equal(neighbor, it)) {
+ if(DBIDUtil.equal(neighbor, it)) {
continue;
}
sum += ldes.doubleValue(neighbor);
@@ -241,15 +245,15 @@ public class LDF<O extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
// update minimum and maximum
lofminmax.put(ldf);
- if (progressLOFs != null) {
+ if(progressLOFs != null) {
progressLOFs.incrementProcessed(LOG);
}
}
- if (progressLOFs != null) {
+ if(progressLOFs != null) {
progressLOFs.ensureCompleted(LOG);
}
- if (stepprog != null) {
+ if(stepprog != null) {
stepprog.setCompleted(LOG);
}
@@ -327,23 +331,23 @@ public class LDF<O extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
super.makeOptions(config);
final IntParameter pK = new IntParameter(K_ID);
- pK.addConstraint(new GreaterConstraint(1));
- if (config.grab(pK)) {
+ pK.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
+ if(config.grab(pK)) {
k = pK.getValue();
}
ObjectParameter<KernelDensityFunction> kernelP = new ObjectParameter<>(KERNEL_ID, KernelDensityFunction.class, GaussianKernelDensityFunction.class);
- if (config.grab(kernelP)) {
+ if(config.grab(kernelP)) {
kernel = kernelP.instantiateClass(config);
}
DoubleParameter hP = new DoubleParameter(H_ID);
- if (config.grab(hP)) {
+ if(config.grab(hP)) {
h = hP.doubleValue();
}
DoubleParameter cP = new DoubleParameter(C_ID, 0.1);
- if (config.grab(cP)) {
+ if(config.grab(cP)) {
c = cP.doubleValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDOF.java
index 80ed3f68..36c70b48 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDOF.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDOF.java
@@ -53,7 +53,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -80,7 +80,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@Title("LDOF: Local Distance-Based Outlier Factor")
@Description("Local outlier detection appraoch suitable for scattered data by averaging the kNN distance over all k nearest neighbors")
@Reference(authors = "K. Zhang, M. Hutter, H. Jin", title = "A New Local Distance-Based Outlier Detection Approach for Scattered Real-World Data", booktitle = "Proc. 13th Pacific-Asia Conference on Advances in Knowledge Discovery and Data Mining (PAKDD 2009), Bangkok, Thailand, 2009", url = "http://dx.doi.org/10.1007/978-3-642-01307-2_84")
-@Alias({"de.lmu.ifi.dbs.elki.algorithm.outlier.LDOF"})
+@Alias({ "de.lmu.ifi.dbs.elki.algorithm.outlier.LDOF" })
public class LDOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
@@ -138,15 +138,16 @@ public class LDOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
FiniteProgress progressLDOFs = LOG.isVerbose() ? new FiniteProgress("LDOF_SCORE for objects", relation.size(), LOG) : null;
Mean dxp = new Mean(), Dxp = new Mean();
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
KNNList<D> neighbors = knnQuery.getKNNForDBID(iditer, k);
// skip the point itself
- dxp.reset(); Dxp.reset();
+ dxp.reset();
+ Dxp.reset();
// TODO: optimize for double distances
- for (DistanceDBIDListIter<D> neighbor1 = neighbors.iter(); neighbor1.valid(); neighbor1.advance()) {
+ for(DistanceDBIDListIter<D> neighbor1 = neighbors.iter(); neighbor1.valid(); neighbor1.advance()) {
if(!DBIDUtil.equal(neighbor1, iditer)) {
dxp.put(neighbor1.getDistance().doubleValue());
- for (DistanceDBIDListIter<D> neighbor2 = neighbors.iter(); neighbor2.valid(); neighbor2.advance()) {
+ for(DistanceDBIDListIter<D> neighbor2 = neighbors.iter(); neighbor2.valid(); neighbor2.advance()) {
if(!DBIDUtil.equal(neighbor1, neighbor2) && !DBIDUtil.equal(neighbor2, iditer)) {
Dxp.put(distFunc.distance(neighbor1, neighbor2).doubleValue());
}
@@ -199,7 +200,7 @@ public class LDOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
final IntParameter kP = new IntParameter(K_ID);
- kP.addConstraint(new GreaterConstraint(1));
+ kP.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
if(config.grab(kP)) {
k = kP.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOF.java
index 302dafe6..28166c75 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOF.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOF.java
@@ -30,6 +30,7 @@ import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.DoubleDataStore;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
@@ -59,7 +60,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -129,8 +130,8 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBase
// "HEAVY" flag for knn query since it is used more than once
KNNQuery<O, D> knnq = database.getKNNQuery(dq, k, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
// No optimized kNN query - use a preprocessor!
- if (!(knnq instanceof PreprocessorKNNQuery)) {
- if (stepprog != null) {
+ if(!(knnq instanceof PreprocessorKNNQuery)) {
+ if(stepprog != null) {
stepprog.beginStep(1, "Materializing LOF neighborhoods.", LOG);
}
MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<>(relation, getDistanceFunction(), k);
@@ -139,109 +140,131 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBase
DBIDs ids = relation.getDBIDs();
// Compute LRDs
- if (stepprog != null) {
+ if(stepprog != null) {
stepprog.beginStep(2, "Computing LRDs.", LOG);
}
WritableDoubleDataStore lrds = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
- {
- FiniteProgress lrdsProgress = LOG.isVerbose() ? new FiniteProgress("LRD", ids.size(), LOG) : null;
- for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
- final KNNList<D> neighbors = knnq.getKNNForDBID(iter, k);
- double sum = 0.0;
- int count = 0;
- if (neighbors instanceof DoubleDistanceKNNList) {
- // Fast version for double distances
- for (DoubleDistanceDBIDListIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) {
- if (DBIDUtil.equal(neighbor, iter)) {
- continue;
- }
- KNNList<D> neighborsNeighbors = knnq.getKNNForDBID(neighbor, k);
- final double nkdist;
- if (neighborsNeighbors instanceof DoubleDistanceKNNList) {
- nkdist = ((DoubleDistanceKNNList) neighborsNeighbors).doubleKNNDistance();
- } else {
- nkdist = neighborsNeighbors.getKNNDistance().doubleValue();
- }
- sum += Math.max(neighbor.doubleDistance(), nkdist);
- count++;
- }
- } else {
- for (DistanceDBIDListIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
- if (DBIDUtil.equal(neighbor, iter)) {
- continue;
- }
- KNNList<D> neighborsNeighbors = knnq.getKNNForDBID(neighbor, k);
- sum += Math.max(neighbor.getDistance().doubleValue(), neighborsNeighbors.getKNNDistance().doubleValue());
- count++;
- }
- }
- // Avoid division by 0
- final double lrd = (sum > 0) ? (count / sum) : Double.POSITIVE_INFINITY;
- lrds.putDouble(iter, lrd);
- if (lrdsProgress != null) {
- lrdsProgress.incrementProcessed(LOG);
- }
- }
- if (lrdsProgress != null) {
- lrdsProgress.ensureCompleted(LOG);
- }
- }
+ computeLRDs(knnq, ids, lrds);
// compute LOF_SCORE of each db object
- if (stepprog != null) {
+ if(stepprog != null) {
stepprog.beginStep(3, "Computing LOFs.", LOG);
}
- WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
+ DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
+ WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_DB);
// track the maximum value for normalization.
DoubleMinMax lofminmax = new DoubleMinMax();
- {
- FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("LOF_SCORE for objects", ids.size(), LOG) : null;
- for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
- final double lof;
- final double lrdp = lrds.doubleValue(iter);
- final KNNList<D> neighbors = knnq.getKNNForDBID(iter, k);
- if (!Double.isInfinite(lrdp)) {
- double sum = 0.0;
- int count = 0;
- for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
- // skip the point itself
- if (DBIDUtil.equal(neighbor, iter)) {
- continue;
- }
- final double val = lrds.doubleValue(neighbor);
- sum += val;
- count++;
- if (Double.isInfinite(val)) {
- break;
- }
+ computeLOFScores(knnq, ids, lrds, lofs, lofminmax);
+
+ if(stepprog != null) {
+ stepprog.setCompleted(LOG);
+ }
+
+ // Build result representation.
+ Relation<Double> scoreResult = new MaterializedRelation<>("Local Outlier Factor", "lof-outlier", TypeUtil.DOUBLE, lofs, ids);
+ OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
+ return new OutlierResult(scoreMeta, scoreResult);
+ }
+
+ /**
+ * Compute local reachability distances.
+ *
+ * @param knnq KNN query
+ * @param ids IDs to process
+ * @param lrds Reachability storage
+ */
+ private void computeLRDs(KNNQuery<O, D> knnq, DBIDs ids, WritableDoubleDataStore lrds) {
+ FiniteProgress lrdsProgress = LOG.isVerbose() ? new FiniteProgress("LRD", ids.size(), LOG) : null;
+ for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ final KNNList<D> neighbors = knnq.getKNNForDBID(iter, k);
+ double sum = 0.0;
+ int count = 0;
+ if(neighbors instanceof DoubleDistanceKNNList) {
+ // Fast version for double distances
+ for(DoubleDistanceDBIDListIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) {
+ if(DBIDUtil.equal(neighbor, iter)) {
+ continue;
+ }
+ KNNList<D> neighborsNeighbors = knnq.getKNNForDBID(neighbor, k);
+ final double nkdist;
+ if(neighborsNeighbors instanceof DoubleDistanceKNNList) {
+ nkdist = ((DoubleDistanceKNNList) neighborsNeighbors).doubleKNNDistance();
+ }
+ else {
+ nkdist = neighborsNeighbors.getKNNDistance().doubleValue();
}
- lof = sum / (lrdp * count);
- } else {
- lof = 1.0;
+ sum += Math.max(neighbor.doubleDistance(), nkdist);
+ count++;
}
- lofs.putDouble(iter, lof);
- // update minimum and maximum
- lofminmax.put(lof);
-
- if (progressLOFs != null) {
- progressLOFs.incrementProcessed(LOG);
+ }
+ else {
+ for(DistanceDBIDListIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ if(DBIDUtil.equal(neighbor, iter)) {
+ continue;
+ }
+ KNNList<D> neighborsNeighbors = knnq.getKNNForDBID(neighbor, k);
+ sum += Math.max(neighbor.getDistance().doubleValue(), neighborsNeighbors.getKNNDistance().doubleValue());
+ count++;
}
}
- if (progressLOFs != null) {
- progressLOFs.ensureCompleted(LOG);
+ // Avoid division by 0
+ final double lrd = (sum > 0) ? (count / sum) : Double.POSITIVE_INFINITY;
+ lrds.putDouble(iter, lrd);
+ if(lrdsProgress != null) {
+ lrdsProgress.incrementProcessed(LOG);
}
}
-
- if (stepprog != null) {
- stepprog.setCompleted(LOG);
+ if(lrdsProgress != null) {
+ lrdsProgress.ensureCompleted(LOG);
}
+ }
- // Build result representation.
- Relation<Double> scoreResult = new MaterializedRelation<>("Local Outlier Factor", "lof-outlier", TypeUtil.DOUBLE, lofs, ids);
- OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
- OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
+ /**
+ * Compute local outlier factors.
+ *
+ * @param knnq KNN query
+ * @param ids IDs to process
+ * @param lrds Local reachability distances
+ * @param lofs Local outlier factor storage
+ * @param lofminmax Score minimum/maximum tracker
+ */
+ private void computeLOFScores(KNNQuery<O, D> knnq, DBIDs ids, DoubleDataStore lrds, WritableDoubleDataStore lofs, DoubleMinMax lofminmax) {
+ FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("LOF_SCORE for objects", ids.size(), LOG) : null;
+ for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ final double lof;
+ final double lrdp = lrds.doubleValue(iter);
+ final KNNList<D> neighbors = knnq.getKNNForDBID(iter, k);
+ if(!Double.isInfinite(lrdp)) {
+ double sum = 0.0;
+ int count = 0;
+ for(DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ // skip the point itself
+ if(DBIDUtil.equal(neighbor, iter)) {
+ continue;
+ }
+ final double val = lrds.doubleValue(neighbor);
+ sum += val;
+ count++;
+ if(Double.isInfinite(val)) {
+ break;
+ }
+ }
+ lof = sum / (lrdp * count);
+ }
+ else {
+ lof = 1.0;
+ }
+ lofs.putDouble(iter, lof);
+ // update minimum and maximum
+ lofminmax.put(lof);
- return result;
+ if(progressLOFs != null) {
+ progressLOFs.incrementProcessed(LOG);
+ }
+ }
+ if(progressLOFs != null) {
+ progressLOFs.ensureCompleted(LOG);
+ }
}
@Override
@@ -279,8 +302,8 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBase
super.makeOptions(config);
final IntParameter pK = new IntParameter(K_ID);
- pK.addConstraint(new GreaterConstraint(1));
- if (config.grab(pK)) {
+ pK.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
+ if(config.grab(pK)) {
k = pK.getValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LoOP.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LoOP.java
index 15ff690a..525d45f2 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LoOP.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LoOP.java
@@ -64,7 +64,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -183,26 +183,28 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O
protected Pair<KNNQuery<O, D>, KNNQuery<O, D>> getKNNQueries(Database database, Relation<O> relation, StepProgress stepprog) {
KNNQuery<O, D> knnComp;
KNNQuery<O, D> knnReach;
- if (comparisonDistanceFunction == reachabilityDistanceFunction || comparisonDistanceFunction.equals(reachabilityDistanceFunction)) {
+ if(comparisonDistanceFunction == reachabilityDistanceFunction || comparisonDistanceFunction.equals(reachabilityDistanceFunction)) {
// We need each neighborhood twice - use "HEAVY" flag.
knnComp = QueryUtil.getKNNQuery(relation, comparisonDistanceFunction, Math.max(kreach, kcomp), DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
// No optimized kNN query - use a preprocessor!
- if (knnComp == null) {
- if (stepprog != null) {
+ if(knnComp == null) {
+ if(stepprog != null) {
stepprog.beginStep(1, "Materializing neighborhoods with respect to reference neighborhood distance function.", LOG);
}
MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<>(relation, comparisonDistanceFunction, kcomp);
database.addIndex(preproc);
DistanceQuery<O, D> cdq = database.getDistanceQuery(relation, comparisonDistanceFunction);
knnComp = preproc.getKNNQuery(cdq, kreach, DatabaseQuery.HINT_HEAVY_USE);
- } else {
- if (stepprog != null) {
+ }
+ else {
+ if(stepprog != null) {
stepprog.beginStep(1, "Optimized neighborhoods provided by database.", LOG);
}
}
knnReach = knnComp;
- } else {
- if (stepprog != null) {
+ }
+ else {
+ if(stepprog != null) {
stepprog.beginStep(1, "Not materializing distance functions, since we request each DBID once only.", LOG);
}
knnComp = QueryUtil.getKNNQuery(relation, comparisonDistanceFunction, kreach);
@@ -228,10 +230,10 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O
KNNQuery<O, D> knnReach = pair.getSecond();
// Assert we got something
- if (knnComp == null) {
+ if(knnComp == null) {
throw new AbortException("No kNN queries supported by database for comparison distance function.");
}
- if (knnReach == null) {
+ if(knnReach == null) {
throw new AbortException("No kNN queries supported by database for density estimation distance function.");
}
@@ -239,34 +241,35 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O
WritableDoubleDataStore pdists = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
Mean mean = new Mean();
{// computing PRDs
- if (stepprog != null) {
+ if(stepprog != null) {
stepprog.beginStep(3, "Computing pdists", LOG);
}
FiniteProgress prdsProgress = LOG.isVerbose() ? new FiniteProgress("pdists", relation.size(), LOG) : null;
- for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
final KNNList<D> neighbors = knnReach.getKNNForDBID(iditer, kreach);
mean.reset();
// use first kref neighbors as reference set
int ks = 0;
// TODO: optimize for double distances
- if (neighbors instanceof DoubleDistanceKNNList) {
- for (DoubleDistanceDBIDListIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) {
- if (objectIsInKNN || !DBIDUtil.equal(neighbor, iditer)) {
+ if(neighbors instanceof DoubleDistanceKNNList) {
+ for(DoubleDistanceDBIDListIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) {
+ if(objectIsInKNN || !DBIDUtil.equal(neighbor, iditer)) {
final double d = neighbor.doubleDistance();
mean.put(d * d);
ks++;
- if (ks >= kreach) {
+ if(ks >= kreach) {
break;
}
}
}
- } else {
- for (DistanceDBIDListIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
- if (objectIsInKNN || !DBIDUtil.equal(neighbor, iditer)) {
+ }
+ else {
+ for(DistanceDBIDListIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ if(objectIsInKNN || !DBIDUtil.equal(neighbor, iditer)) {
double d = neighbor.getDistance().doubleValue();
mean.put(d * d);
ks++;
- if (ks >= kreach) {
+ if(ks >= kreach) {
break;
}
}
@@ -274,7 +277,7 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O
}
double pdist = lambda * Math.sqrt(mean.getMean());
pdists.putDouble(iditer, pdist);
- if (prdsProgress != null) {
+ if(prdsProgress != null) {
prdsProgress.incrementProcessed(LOG);
}
}
@@ -283,62 +286,62 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O
WritableDoubleDataStore plofs = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
MeanVariance mvplof = new MeanVariance();
{// compute LOOP_SCORE of each db object
- if (stepprog != null) {
+ if(stepprog != null) {
stepprog.beginStep(4, "Computing PLOF", LOG);
}
FiniteProgress progressPLOFs = LOG.isVerbose() ? new FiniteProgress("PLOFs for objects", relation.size(), LOG) : null;
MeanVariance mv = new MeanVariance();
- for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
final KNNList<D> neighbors = knnComp.getKNNForDBID(iditer, kcomp);
mv.reset();
// use first kref neighbors as comparison set.
int ks = 0;
- for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
- if (objectIsInKNN || !DBIDUtil.equal(neighbor, iditer)) {
+ for(DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ if(objectIsInKNN || !DBIDUtil.equal(neighbor, iditer)) {
mv.put(pdists.doubleValue(neighbor));
ks++;
- if (ks >= kcomp) {
+ if(ks >= kcomp) {
break;
}
}
}
double plof = Math.max(pdists.doubleValue(iditer) / mv.getMean(), 1.0);
- if (Double.isNaN(plof) || Double.isInfinite(plof)) {
+ if(Double.isNaN(plof) || Double.isInfinite(plof)) {
plof = 1.0;
}
plofs.putDouble(iditer, plof);
mvplof.put((plof - 1.0) * (plof - 1.0));
- if (progressPLOFs != null) {
+ if(progressPLOFs != null) {
progressPLOFs.incrementProcessed(LOG);
}
}
}
double nplof = lambda * Math.sqrt(mvplof.getMean());
- if (LOG.isDebugging()) {
+ if(LOG.isDebugging()) {
LOG.verbose("nplof normalization factor is " + nplof + " " + mvplof.getMean() + " " + mvplof.getSampleStddev());
}
// Compute final LoOP values.
WritableDoubleDataStore loops = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
{// compute LOOP_SCORE of each db object
- if (stepprog != null) {
+ if(stepprog != null) {
stepprog.beginStep(5, "Computing LoOP scores", LOG);
}
FiniteProgress progressLOOPs = LOG.isVerbose() ? new FiniteProgress("LoOP for objects", relation.size(), LOG) : null;
- for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
loops.putDouble(iditer, NormalDistribution.erf((plofs.doubleValue(iditer) - 1) / (nplof * sqrt2)));
- if (progressLOOPs != null) {
+ if(progressLOOPs != null) {
progressLOOPs.incrementProcessed(LOG);
}
}
}
- if (stepprog != null) {
+ if(stepprog != null) {
stepprog.setCompleted(LOG);
}
@@ -351,9 +354,10 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O
@Override
public TypeInformation[] getInputTypeRestriction() {
final TypeInformation type;
- if (reachabilityDistanceFunction.equals(comparisonDistanceFunction)) {
+ if(reachabilityDistanceFunction.equals(comparisonDistanceFunction)) {
type = reachabilityDistanceFunction.getInputTypeRestriction();
- } else {
+ }
+ else {
type = new CombinedTypeInformation(reachabilityDistanceFunction.getInputTypeRestriction(), comparisonDistanceFunction.getInputTypeRestriction());
}
return TypeUtil.array(type);
@@ -401,34 +405,35 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
final IntParameter kcompP = new IntParameter(KCOMP_ID);
- kcompP.addConstraint(new GreaterConstraint(1));
- if (config.grab(kcompP)) {
+ kcompP.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
+ if(config.grab(kcompP)) {
kcomp = kcompP.intValue();
}
final ObjectParameter<DistanceFunction<O, D>> compDistP = new ObjectParameter<>(COMPARISON_DISTANCE_FUNCTION_ID, DistanceFunction.class, EuclideanDistanceFunction.class);
- if (config.grab(compDistP)) {
+ if(config.grab(compDistP)) {
comparisonDistanceFunction = compDistP.instantiateClass(config);
}
final IntParameter kreachP = new IntParameter(KREACH_ID);
- kreachP.addConstraint(new GreaterConstraint(1));
+ kreachP.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
kreachP.setOptional(true);
- if (config.grab(kreachP)) {
+ if(config.grab(kreachP)) {
kreach = kreachP.intValue();
- } else {
+ }
+ else {
kreach = kcomp;
}
final ObjectParameter<DistanceFunction<O, D>> reachDistP = new ObjectParameter<>(REACHABILITY_DISTANCE_FUNCTION_ID, DistanceFunction.class, true);
- if (config.grab(reachDistP)) {
+ if(config.grab(reachDistP)) {
reachabilityDistanceFunction = reachDistP.instantiateClass(config);
}
// TODO: make default 1.0?
final DoubleParameter lambdaP = new DoubleParameter(LAMBDA_ID, 2.0);
- lambdaP.addConstraint(new GreaterConstraint(0.0));
- if (config.grab(lambdaP)) {
+ lambdaP.addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE);
+ if(config.grab(lambdaP)) {
lambda = lambdaP.doubleValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimpleKernelDensityLOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimpleKernelDensityLOF.java
index 2ff7534a..b990ef35 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimpleKernelDensityLOF.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimpleKernelDensityLOF.java
@@ -55,13 +55,14 @@ import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.logging.progress.StepProgress;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.math.MathUtil;
import de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions.EpanechnikovKernelDensityFunction;
import de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions.KernelDensityFunction;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
@@ -151,7 +152,7 @@ public class SimpleKernelDensityLOF<O extends NumberVector<?>, D extends NumberD
}
double max = ((DoubleDistanceKNNList)knnq.getKNNForDBID(neighbor, k)).doubleKNNDistance();
final double v = neighbor.doubleDistance() / max;
- sum += kernel.density(v) / Math.pow(max, dim);
+ sum += kernel.density(v) / MathUtil.powi(max, dim);
count++;
}
} else {
@@ -161,7 +162,7 @@ public class SimpleKernelDensityLOF<O extends NumberVector<?>, D extends NumberD
}
double max = knnq.getKNNForDBID(neighbor, k).getKNNDistance().doubleValue();
final double v = neighbor.getDistance().doubleValue() / max;
- sum += kernel.density(v) / Math.pow(max, dim);
+ sum += kernel.density(v) / MathUtil.powi(max, dim);
count++;
}
}
@@ -268,7 +269,7 @@ public class SimpleKernelDensityLOF<O extends NumberVector<?>, D extends NumberD
super.makeOptions(config);
final IntParameter pK = new IntParameter(LOF.Parameterizer.K_ID);
- pK.addConstraint(new GreaterConstraint(1));
+ pK.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
if (config.grab(pK)) {
k = pK.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimplifiedLOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimplifiedLOF.java
index 413eaca1..d54b053f 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimplifiedLOF.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimplifiedLOF.java
@@ -57,7 +57,7 @@ import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.utilities.Alias;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -118,8 +118,8 @@ public class SimplifiedLOF<O, D extends NumberDistance<D, ?>> extends AbstractDi
// "HEAVY" flag for KNN Query since it is used more than once
KNNQuery<O, D> knnq = QueryUtil.getKNNQuery(relation, getDistanceFunction(), k, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
// No optimized kNN query - use a preprocessor!
- if (!(knnq instanceof PreprocessorKNNQuery)) {
- if (stepprog != null) {
+ if(!(knnq instanceof PreprocessorKNNQuery)) {
+ if(stepprog != null) {
stepprog.beginStep(1, "Materializing neighborhoods w.r.t. distance function.", LOG);
}
MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<>(relation, getDistanceFunction(), k);
@@ -129,27 +129,28 @@ public class SimplifiedLOF<O, D extends NumberDistance<D, ?>> extends AbstractDi
}
// Compute LRDs
- if (stepprog != null) {
+ if(stepprog != null) {
stepprog.beginStep(2, "Computing densities.", LOG);
}
WritableDoubleDataStore dens = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
FiniteProgress densProgress = LOG.isVerbose() ? new FiniteProgress("Densities", ids.size(), LOG) : null;
- for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
+ for(DBIDIter it = ids.iter(); it.valid(); it.advance()) {
final KNNList<D> neighbors = knnq.getKNNForDBID(it, k);
double sum = 0.0;
int count = 0;
- if (neighbors instanceof DoubleDistanceKNNList) {
+ if(neighbors instanceof DoubleDistanceKNNList) {
// Fast version for double distances
- for (DoubleDistanceDBIDListIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) {
- if (DBIDUtil.equal(neighbor, it)) {
+ for(DoubleDistanceDBIDListIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) {
+ if(DBIDUtil.equal(neighbor, it)) {
continue;
}
sum += neighbor.doubleDistance();
count++;
}
- } else {
- for (DistanceDBIDListIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
- if (DBIDUtil.equal(neighbor, it)) {
+ }
+ else {
+ for(DistanceDBIDListIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ if(DBIDUtil.equal(neighbor, it)) {
continue;
}
sum += neighbor.getDistance().doubleValue();
@@ -159,16 +160,16 @@ public class SimplifiedLOF<O, D extends NumberDistance<D, ?>> extends AbstractDi
// Avoid division by 0
final double lrd = (sum > 0) ? (count / sum) : 0;
dens.putDouble(it, lrd);
- if (densProgress != null) {
+ if(densProgress != null) {
densProgress.incrementProcessed(LOG);
}
}
- if (densProgress != null) {
+ if(densProgress != null) {
densProgress.ensureCompleted(LOG);
}
// compute LOF_SCORE of each db object
- if (stepprog != null) {
+ if(stepprog != null) {
stepprog.beginStep(3, "Computing SLOFs.", LOG);
}
WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
@@ -176,38 +177,39 @@ public class SimplifiedLOF<O, D extends NumberDistance<D, ?>> extends AbstractDi
DoubleMinMax lofminmax = new DoubleMinMax();
FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("Simple LOF scores.", ids.size(), LOG) : null;
- for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
+ for(DBIDIter it = ids.iter(); it.valid(); it.advance()) {
final double lrdp = dens.doubleValue(it);
final double lof;
- if (lrdp > 0) {
+ if(lrdp > 0) {
final KNNList<D> neighbors = knnq.getKNNForDBID(it, k);
double sum = 0.0;
int count = 0;
- for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ for(DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
// skip the point itself
- if (DBIDUtil.equal(neighbor, it)) {
+ if(DBIDUtil.equal(neighbor, it)) {
continue;
}
sum += dens.doubleValue(neighbor);
count++;
}
lof = sum / (count * lrdp);
- } else {
+ }
+ else {
lof = 1.0;
}
lofs.putDouble(it, lof);
// update minimum and maximum
lofminmax.put(lof);
- if (progressLOFs != null) {
+ if(progressLOFs != null) {
progressLOFs.incrementProcessed(LOG);
}
}
- if (progressLOFs != null) {
+ if(progressLOFs != null) {
progressLOFs.ensureCompleted(LOG);
}
- if (stepprog != null) {
+ if(stepprog != null) {
stepprog.setCompleted(LOG);
}
@@ -250,8 +252,8 @@ public class SimplifiedLOF<O, D extends NumberDistance<D, ?>> extends AbstractDi
super.makeOptions(config);
final IntParameter pK = new IntParameter(LOF.Parameterizer.K_ID);
- pK.addConstraint(new GreaterConstraint(1));
- if (config.grab(pK)) {
+ pK.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
+ if(config.grab(pK)) {
k = pK.getValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/ExternalDoubleOutlierScore.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/ExternalDoubleOutlierScore.java
index 0d0f7303..757b80ad 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/ExternalDoubleOutlierScore.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/ExternalDoubleOutlierScore.java
@@ -52,6 +52,7 @@ import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.utilities.FileUtil;
+import de.lmu.ifi.dbs.elki.utilities.FormatUtil;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
@@ -175,7 +176,7 @@ public class ExternalDoubleOutlierScore extends AbstractAlgorithm<OutlierResult>
if(!Double.isNaN(score)) {
throw new AbortException("Score pattern matched twice: previous value " + score + " second value: " + str);
}
- score = Double.parseDouble(str.substring(ms.end()));
+ score = FormatUtil.parseDouble(str.substring(ms.end()));
}
}
if(id != null && !Double.isNaN(score)) {
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java
index 22c20fc3..5b681106 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java
@@ -54,8 +54,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -136,12 +135,12 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements
final int dbdim = RelationUtil.dimensionality(relation);
final int mindim = dbdim >> 1;
final int maxdim = dbdim - 1;
- final Random rand = rnd.getRandom();
+ final Random rand = rnd.getSingleThreadedRandom();
ArrayList<OutlierResult> results = new ArrayList<>(num);
{
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("LOF iterations", num, LOG) : null;
- for (int i = 0; i < num; i++) {
+ for(int i = 0; i < num; i++) {
BitSet dimset = randomSubspace(dbdim, mindim, maxdim, rand);
SubspaceEuclideanDistanceFunction df = new SubspaceEuclideanDistanceFunction(dimset);
LOF<NumberVector<?>, DoubleDistance> lof = new LOF<>(k, df);
@@ -149,18 +148,18 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements
// run LOF and collect the result
OutlierResult result = lof.run(database, relation);
results.add(result);
- if (prog != null) {
+ if(prog != null) {
prog.incrementProcessed(LOG);
}
}
- if (prog != null) {
+ if(prog != null) {
prog.ensureCompleted(LOG);
}
}
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
DoubleMinMax minmax = new DoubleMinMax();
- if (breadth) {
+ if(breadth) {
FiniteProgress cprog = LOG.isVerbose() ? new FiniteProgress("Combining results", relation.size(), LOG) : null;
Pair<DBIDIter, Relation<Double>>[] IDVectorOntoScoreVector = Pair.newPairArray(results.size());
@@ -168,55 +167,57 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements
// We need to initialize them now be able to iterate them "in parallel".
{
int i = 0;
- for (OutlierResult r : results) {
+ for(OutlierResult r : results) {
IDVectorOntoScoreVector[i] = new Pair<DBIDIter, Relation<Double>>(r.getOrdering().iter(relation.getDBIDs()).iter(), r.getScores());
i++;
}
}
// Iterating over the *lines* of the AS_t(i)-matrix.
- for (int i = 0; i < relation.size(); i++) {
+ for(int i = 0; i < relation.size(); i++) {
// Iterating over the elements of a line (breadth-first).
- for (Pair<DBIDIter, Relation<Double>> pair : IDVectorOntoScoreVector) {
+ for(Pair<DBIDIter, Relation<Double>> pair : IDVectorOntoScoreVector) {
DBIDIter iter = pair.first;
// Always true if every algorithm returns a complete result (one score
// for every DBID).
- if (iter.valid()) {
+ if(iter.valid()) {
double score = pair.second.get(iter);
- if (Double.isNaN(scores.doubleValue(iter))) {
+ if(Double.isNaN(scores.doubleValue(iter))) {
scores.putDouble(iter, score);
minmax.put(score);
}
iter.advance();
- } else {
+ }
+ else {
LOG.warning("Incomplete result: Iterator does not contain |DB| DBIDs");
}
}
// Progress does not take the initial mapping into account.
- if (cprog != null) {
+ if(cprog != null) {
cprog.incrementProcessed(LOG);
}
}
- if (cprog != null) {
+ if(cprog != null) {
cprog.ensureCompleted(LOG);
}
- } else {
+ }
+ else {
FiniteProgress cprog = LOG.isVerbose() ? new FiniteProgress("Combining results", relation.size(), LOG) : null;
- for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
+ for(DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
double sum = 0.0;
- for (OutlierResult r : results) {
+ for(OutlierResult r : results) {
final Double s = r.getScores().get(iter);
- if (s != null && !Double.isNaN(s)) {
+ if(s != null && !Double.isNaN(s)) {
sum += s;
}
}
scores.putDouble(iter, sum);
minmax.put(sum);
- if (cprog != null) {
+ if(cprog != null) {
cprog.incrementProcessed(LOG);
}
}
- if (cprog != null) {
+ if(cprog != null) {
cprog.ensureCompleted(LOG);
}
}
@@ -237,13 +238,13 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements
BitSet dimset = new BitSet();
// Fill with all dimensions
int[] dims = new int[alldim];
- for (int d = 0; d < alldim; d++) {
+ for(int d = 0; d < alldim; d++) {
dims[d] = d;
}
// Target dimensionality:
int subdim = mindim + rand.nextInt(maxdim - mindim);
// Shrink the subspace to the destination size
- for (int d = 0; d < alldim - subdim; d++) {
+ for(int d = 0; d < alldim - subdim; d++) {
int s = rand.nextInt(alldim - d);
dimset.set(dims[s]);
dims[s] = dims[alldim - d - 1];
@@ -317,21 +318,21 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
final IntParameter pK = new IntParameter(LOF.Parameterizer.K_ID);
- pK.addConstraint(new GreaterConstraint(1));
- if (config.grab(pK)) {
+ pK.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
+ if(config.grab(pK)) {
k = pK.getValue();
}
IntParameter numP = new IntParameter(NUM_ID);
- numP.addConstraint(new GreaterEqualConstraint(1));
- if (config.grab(numP)) {
+ numP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
+ if(config.grab(numP)) {
num = numP.getValue();
}
Flag breadthF = new Flag(BREADTH_ID);
- if (config.grab(breadthF)) {
+ if(config.grab(breadthF)) {
breadth = breadthF.getValue();
}
RandomParameter rndP = new RandomParameter(SEED_ID);
- if (config.grab(rndP)) {
+ if(config.grab(rndP)) {
rnd = rndP.getValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/HiCS.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/HiCS.java
index 69608293..f92a8b80 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/HiCS.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/HiCS.java
@@ -72,7 +72,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -174,9 +174,9 @@ public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe
final DBIDs ids = relation.getDBIDs();
ArrayList<ArrayDBIDs> subspaceIndex = buildOneDimIndexes(relation);
- Set<HiCSSubspace> subspaces = calculateSubspaces(relation, subspaceIndex, rnd.getRandom());
+ Set<HiCSSubspace> subspaces = calculateSubspaces(relation, subspaceIndex, rnd.getSingleThreadedRandom());
- if (LOG.isVerbose()) {
+ if(LOG.isVerbose()) {
LOG.verbose("Number of high-contrast subspaces: " + subspaces.size());
}
List<Relation<Double>> results = new ArrayList<>();
@@ -185,8 +185,8 @@ public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe
// run outlier detection and collect the result
// TODO extend so that any outlierAlgorithm can be used (use materialized
// relation instead of SubspaceEuclideanDistanceFunction?)
- for (HiCSSubspace dimset : subspaces) {
- if (LOG.isVerbose()) {
+ for(HiCSSubspace dimset : subspaces) {
+ if(LOG.isVerbose()) {
LOG.verbose("Performing outlier detection in subspace " + dimset);
}
@@ -196,22 +196,22 @@ public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe
// run LOF and collect the result
OutlierResult result = outlierAlgorithm.run(pdb);
results.add(result.getScores());
- if (prog != null) {
+ if(prog != null) {
prog.incrementProcessed(LOG);
}
}
- if (prog != null) {
+ if(prog != null) {
prog.ensureCompleted(LOG);
}
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
DoubleMinMax minmax = new DoubleMinMax();
- for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double sum = 0.0;
- for (Relation<Double> r : results) {
+ for(Relation<Double> r : results) {
final Double s = r.get(iditer);
- if (s != null && !Double.isNaN(s)) {
+ if(s != null && !Double.isNaN(s)) {
sum += s;
}
}
@@ -237,7 +237,7 @@ public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe
ArrayList<ArrayDBIDs> subspaceIndex = new ArrayList<>(dim + 1);
SortDBIDsBySingleDimension comp = new VectorUtil.SortDBIDsBySingleDimension(relation);
- for (int i = 0; i < dim; i++) {
+ for(int i = 0; i < dim; i++) {
ArrayModifiableDBIDs amDBIDs = DBIDUtil.newArray(relation.getDBIDs());
comp.setDimension(i);
amDBIDs.sort(comp);
@@ -258,7 +258,7 @@ public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe
final int dbdim = RelationUtil.dimensionality(relation);
FiniteProgress dprog = LOG.isVerbose() ? new FiniteProgress("Subspace dimensionality", dbdim, LOG) : null;
- if (dprog != null) {
+ if(dprog != null) {
dprog.setProcessed(2, LOG);
}
@@ -266,31 +266,31 @@ public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe
TopBoundedHeap<HiCSSubspace> dDimensionalList = new TopBoundedHeap<>(cutoff, HiCSSubspace.SORT_BY_CONTRAST_ASC);
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Generating two-element subsets", (dbdim * (dbdim - 1)) >> 1, LOG) : null;
// compute two-element sets of subspaces
- for (int i = 0; i < dbdim; i++) {
- for (int j = i + 1; j < dbdim; j++) {
+ for(int i = 0; i < dbdim; i++) {
+ for(int j = i + 1; j < dbdim; j++) {
HiCSSubspace ts = new HiCSSubspace();
ts.set(i);
ts.set(j);
calculateContrast(relation, ts, subspaceIndex, random);
dDimensionalList.add(ts);
- if (prog != null) {
+ if(prog != null) {
prog.incrementProcessed(LOG);
}
}
}
- if (prog != null) {
+ if(prog != null) {
prog.ensureCompleted(LOG);
}
IndefiniteProgress qprog = LOG.isVerbose() ? new IndefiniteProgress("Testing subspace candidates", LOG) : null;
- for (int d = 3; !dDimensionalList.isEmpty(); d++) {
- if (dprog != null) {
+ for(int d = 3; !dDimensionalList.isEmpty(); d++) {
+ if(dprog != null) {
dprog.setProcessed(d, LOG);
}
// result now contains all d-dimensional sets of subspaces
ArrayList<HiCSSubspace> candidateList = new ArrayList<>(dDimensionalList.size());
- for (Heap<HiCSSubspace>.UnorderedIter it = dDimensionalList.unorderedIter(); it.valid(); it.advance()) {
+ for(Heap<HiCSSubspace>.UnorderedIter it = dDimensionalList.unorderedIter(); it.valid(); it.advance()) {
subspaceList.add(it.get());
candidateList.add(it.get());
}
@@ -299,39 +299,39 @@ public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe
Collections.sort(candidateList, HiCSSubspace.SORT_BY_SUBSPACE);
// TODO: optimize APRIORI style, by not even computing the bit set or?
- for (int i = 0; i < candidateList.size() - 1; i++) {
- for (int j = i + 1; j < candidateList.size(); j++) {
+ for(int i = 0; i < candidateList.size() - 1; i++) {
+ for(int j = i + 1; j < candidateList.size(); j++) {
HiCSSubspace set1 = candidateList.get(i);
HiCSSubspace set2 = candidateList.get(j);
HiCSSubspace joinedSet = new HiCSSubspace();
joinedSet.or(set1);
joinedSet.or(set2);
- if (joinedSet.cardinality() != d) {
+ if(joinedSet.cardinality() != d) {
continue;
}
calculateContrast(relation, joinedSet, subspaceIndex, random);
dDimensionalList.add(joinedSet);
- if (qprog != null) {
+ if(qprog != null) {
qprog.incrementProcessed(LOG);
}
}
}
// Prune
- for (HiCSSubspace cand : candidateList) {
- for (Heap<HiCSSubspace>.UnorderedIter it = dDimensionalList.unorderedIter(); it.valid(); it.advance()) {
- if (it.get().contrast > cand.contrast) {
+ for(HiCSSubspace cand : candidateList) {
+ for(Heap<HiCSSubspace>.UnorderedIter it = dDimensionalList.unorderedIter(); it.valid(); it.advance()) {
+ if(it.get().contrast > cand.contrast) {
subspaceList.remove(cand);
break;
}
}
}
}
- if (qprog != null) {
+ if(qprog != null) {
qprog.setCompleted(LOG);
}
- if (dprog != null) {
+ if(dprog != null) {
dprog.setProcessed(dbdim, LOG);
dprog.ensureCompleted(LOG);
}
@@ -353,17 +353,17 @@ public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe
int retries = 0;
double deviationSum = 0.0;
- for (int i = 0; i < m; i++) {
+ for(int i = 0; i < m; i++) {
// Choose a random set bit.
int chosen = -1;
- for (int tmp = random.nextInt(card); tmp >= 0; tmp--) {
+ for(int tmp = random.nextInt(card); tmp >= 0; tmp--) {
chosen = subspace.nextSetBit(chosen + 1);
}
// initialize sample
DBIDs conditionalSample = relation.getDBIDs();
- for (int j = subspace.nextSetBit(0); j >= 0; j = subspace.nextSetBit(j + 1)) {
- if (j == chosen) {
+ for(int j = subspace.nextSetBit(0); j >= 0; j = subspace.nextSetBit(j + 1)) {
+ if(j == chosen) {
continue;
}
ArrayDBIDs sortedIndices = subspaceIndex.get(j);
@@ -371,20 +371,21 @@ public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe
// initialize index block
DBIDArrayIter iter = sortedIndices.iter();
iter.seek(random.nextInt(relation.size() - windowsize));
- for (int k = 0; k < windowsize; k++, iter.advance()) {
+ for(int k = 0; k < windowsize; k++, iter.advance()) {
indexBlock.add(iter); // select index block
}
conditionalSample = DBIDUtil.intersection(conditionalSample, indexBlock);
}
- if (conditionalSample.size() < 10) {
+ if(conditionalSample.size() < 10) {
retries++;
- if (LOG.isDebugging()) {
+ if(LOG.isDebugging()) {
LOG.debug("Sample size very small. Retry no. " + retries);
}
- if (retries >= MAX_RETRIES) {
+ if(retries >= MAX_RETRIES) {
LOG.warning("Too many retries, for small samples: " + retries);
- } else {
+ }
+ else {
i--;
continue;
}
@@ -393,7 +394,7 @@ public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe
double[] sampleValues = new double[conditionalSample.size()];
{
int l = 0;
- for (DBIDIter iter = conditionalSample.iter(); iter.valid(); iter.advance()) {
+ for(DBIDIter iter = conditionalSample.iter(); iter.valid(); iter.advance()) {
sampleValues[l] = relation.get(iter).doubleValue(chosen);
l++;
}
@@ -402,23 +403,23 @@ public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe
double[] fullValues = new double[relation.size()];
{
int l = 0;
- for (DBIDIter iter = subspaceIndex.get(chosen).iter(); iter.valid(); iter.advance()) {
+ for(DBIDIter iter = subspaceIndex.get(chosen).iter(); iter.valid(); iter.advance()) {
fullValues[l] = relation.get(iter).doubleValue(chosen);
l++;
}
}
double contrast = statTest.deviation(fullValues, sampleValues);
- if (Double.isNaN(contrast)) {
+ if(Double.isNaN(contrast)) {
i--;
LOG.warning("Contrast was NaN");
continue;
}
deviationSum += contrast;
- if (prog != null) {
+ if(prog != null) {
prog.incrementProcessed(LOG);
}
}
- if (prog != null) {
+ if(prog != null) {
prog.ensureCompleted(LOG);
}
subspace.contrast = deviationSum / m;
@@ -464,7 +465,7 @@ public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe
public String toString() {
StringBuilder buf = new StringBuilder();
buf.append("[contrast=").append(contrast);
- for (int i = nextSetBit(0); i >= 0; i = nextSetBit(i + 1)) {
+ for(int i = nextSetBit(0); i >= 0; i = nextSetBit(i + 1)) {
buf.append(' ').append(i + 1);
}
buf.append(']');
@@ -477,7 +478,7 @@ public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe
public static final Comparator<HiCSSubspace> SORT_BY_CONTRAST_ASC = new Comparator<HiCSSubspace>() {
@Override
public int compare(HiCSSubspace o1, HiCSSubspace o2) {
- if (o1.contrast == o2.contrast) {
+ if(o1.contrast == o2.contrast) {
return 0;
}
return o1.contrast > o2.contrast ? 1 : -1;
@@ -490,7 +491,7 @@ public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe
public static final Comparator<HiCSSubspace> SORT_BY_CONTRAST_DESC = new Comparator<HiCSSubspace>() {
@Override
public int compare(HiCSSubspace o1, HiCSSubspace o2) {
- if (o1.contrast == o2.contrast) {
+ if(o1.contrast == o2.contrast) {
return 0;
}
return o1.contrast < o2.contrast ? 1 : -1;
@@ -505,10 +506,11 @@ public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe
public int compare(HiCSSubspace o1, HiCSSubspace o2) {
int dim1 = o1.nextSetBit(0);
int dim2 = o2.nextSetBit(0);
- while (dim1 >= 0 && dim2 >= 0) {
- if (dim1 < dim2) {
+ while(dim1 >= 0 && dim2 >= 0) {
+ if(dim1 < dim2) {
return -1;
- } else if (dim1 > dim2) {
+ }
+ else if(dim1 > dim2) {
return 1;
}
dim1 = o1.nextSetBit(dim1 + 1);
@@ -597,35 +599,35 @@ public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
final IntParameter mP = new IntParameter(M_ID, 50);
- mP.addConstraint(new GreaterConstraint(1));
- if (config.grab(mP)) {
+ mP.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
+ if(config.grab(mP)) {
m = mP.intValue();
}
final DoubleParameter alphaP = new DoubleParameter(ALPHA_ID, 0.1);
- alphaP.addConstraint(new GreaterConstraint(0));
- if (config.grab(alphaP)) {
+ alphaP.addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE);
+ if(config.grab(alphaP)) {
alpha = alphaP.doubleValue();
}
final ObjectParameter<OutlierAlgorithm> algoP = new ObjectParameter<>(ALGO_ID, OutlierAlgorithm.class, LOF.class);
- if (config.grab(algoP)) {
+ if(config.grab(algoP)) {
outlierAlgorithm = algoP.instantiateClass(config);
}
final ObjectParameter<GoodnessOfFitTest> testP = new ObjectParameter<>(TEST_ID, GoodnessOfFitTest.class, KolmogorovSmirnovTest.class);
- if (config.grab(testP)) {
+ if(config.grab(testP)) {
statTest = testP.instantiateClass(config);
}
final IntParameter cutoffP = new IntParameter(LIMIT_ID, 100);
- cutoffP.addConstraint(new GreaterConstraint(1));
- if (config.grab(cutoffP)) {
+ cutoffP.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
+ if(config.grab(cutoffP)) {
cutoff = cutoffP.intValue();
}
final RandomParameter rndP = new RandomParameter(SEED_ID);
- if (config.grab(rndP)) {
+ if(config.grab(rndP)) {
rnd = rndP.getValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuRandomWalkEC.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuRandomWalkEC.java
index c8efe4da..85524b4e 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuRandomWalkEC.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuRandomWalkEC.java
@@ -56,7 +56,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -149,7 +149,7 @@ public class CTLuRandomWalkEC<N, D extends NumberDistance<D, ?>> extends Abstrac
}
final double e;
final D distance = distFunc.distance(id, n);
- heap.add(distance, n);
+ heap.insert(distance, n);
double dist = distance.doubleValue();
if(dist == 0) {
LOG.warning("Zero distances are not supported - skipping: " + DBIDUtil.toString(id) + " " + DBIDUtil.toString(n));
@@ -296,7 +296,7 @@ public class CTLuRandomWalkEC<N, D extends NumberDistance<D, ?>> extends Abstrac
*/
protected void configK(Parameterization config) {
final IntParameter param = new IntParameter(K_ID);
- param.addConstraint(new GreaterEqualConstraint(1));
+ param.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
if(config.grab(param)) {
k = param.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/TrimmedMeanApproach.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/TrimmedMeanApproach.java
index e07ce480..1a1f9a82 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/TrimmedMeanApproach.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/TrimmedMeanApproach.java
@@ -1,26 +1,27 @@
package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial;
-/*
-This file is part of ELKI:
-Environment for Developing KDD-Applications Supported by Index-Structures
-
-Copyright (C) 2013
-Ludwig-Maximilians-Universität München
-Lehr- und Forschungseinheit für Datenbanksysteme
-ELKI Development Team
-
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU Affero General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU Affero General Public License for more details.
-
-You should have received a copy of the GNU Affero General Public License
-along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
import java.util.Arrays;
@@ -50,15 +51,15 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
/**
* A Trimmed Mean Approach to Finding Spatial Outliers.
*
- * Outliers are defined by their value deviation from a trimmed mean of the neighbors.
+ * Outliers are defined by their value deviation from a trimmed mean of the
+ * neighbors.
*
* <p>
* Reference: <br>
@@ -116,7 +117,7 @@ public class TrimmedMeanApproach<N> extends AbstractNeighborhoodOutlier<N> {
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Computing trimmed means", relation.size(), LOG) : null;
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
DBIDs neighbors = npred.getNeighborDBIDs(iditer);
int num = 0;
double[] values = new double[neighbors.size()];
@@ -161,7 +162,7 @@ public class TrimmedMeanApproach<N> extends AbstractNeighborhoodOutlier<N> {
double[] ei = new double[relation.size()];
{
int i = 0;
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
ei[i] = errors.doubleValue(iditer);
i++;
}
@@ -180,7 +181,7 @@ public class TrimmedMeanApproach<N> extends AbstractNeighborhoodOutlier<N> {
}
// calculate score
DoubleMinMax minmax = new DoubleMinMax();
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double score = Math.abs(errors.doubleValue(iditer)) * 0.6745 / median_dev_from_median;
scores.putDouble(iditer, score);
minmax.put(score);
@@ -228,8 +229,8 @@ public class TrimmedMeanApproach<N> extends AbstractNeighborhoodOutlier<N> {
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
DoubleParameter pP = new DoubleParameter(P_ID);
- pP.addConstraint(new GreaterConstraint(0.0));
- pP.addConstraint(new LessConstraint(0.5));
+ pP.addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE);
+ pP.addConstraint(CommonConstraints.LESS_THAN_HALF_DOUBLE);
if(config.grab(pP)) {
p = pP.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExtendedNeighborhood.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExtendedNeighborhood.java
index c4fc4407..c93b10cb 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExtendedNeighborhood.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExtendedNeighborhood.java
@@ -38,7 +38,7 @@ import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
@@ -224,7 +224,7 @@ public class ExtendedNeighborhood extends AbstractPrecomputedNeighborhood {
*/
public static int getParameterSteps(Parameterization config) {
final IntParameter param = new IntParameter(STEPS_ID);
- param.addConstraint(new GreaterEqualConstraint(1));
+ param.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
if(config.grab(param)) {
return param.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExternalNeighborhood.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExternalNeighborhood.java
index 96896bd8..33b5010a 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExternalNeighborhood.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExternalNeighborhood.java
@@ -161,8 +161,8 @@ public class ExternalNeighborhood extends AbstractPrecomputedNeighborhood {
if(olq != null) {
LabelList label = olq.get(iditer);
if(label != null) {
- for(String lbl : label) {
- lblmap.put(lbl, DBIDUtil.deref(iditer));
+ for(int i = 0; i < label.size(); i++) {
+ lblmap.put(label.get(i), DBIDUtil.deref(iditer));
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/LinearWeightedExtendedNeighborhood.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/LinearWeightedExtendedNeighborhood.java
index 05bf2f18..4d6ec635 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/LinearWeightedExtendedNeighborhood.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/LinearWeightedExtendedNeighborhood.java
@@ -38,7 +38,7 @@ import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
@@ -216,7 +216,7 @@ public class LinearWeightedExtendedNeighborhood implements WeightedNeighborSetPr
*/
public static int getParameterSteps(Parameterization config) {
final IntParameter param = new IntParameter(STEPS_ID);
- param.addConstraint(new GreaterEqualConstraint(1));
+ param.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
if(config.grab(param)) {
return param.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OUTRES.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OUTRES.java
index ae04fef4..c21542da 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OUTRES.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OUTRES.java
@@ -55,6 +55,7 @@ import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.math.MathUtil;
import de.lmu.ifi.dbs.elki.math.MeanVariance;
import de.lmu.ifi.dbs.elki.math.statistics.distribution.GammaDistribution;
import de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions.EpanechnikovKernelDensityFunction;
@@ -368,7 +369,7 @@ public class OUTRES<V extends NumberVector<?>> extends AbstractAlgorithm<Outlier
*/
protected double optimalBandwidth(int dim) {
// Pi in the publication is redundant and cancels out!
- double hopt = 8 * GammaDistribution.gamma(dim / 2.0 + 1) * (dim + 4) * Math.pow(2, dim);
+ double hopt = 8 * GammaDistribution.gamma(dim / 2.0 + 1) * (dim + 4) * MathUtil.powi(2, dim);
return hopt * Math.pow(relation.size(), (-1. / (dim + 4)));
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OutRankS1.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OutRankS1.java
index 96c8875f..3e248bfa 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OutRankS1.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OutRankS1.java
@@ -49,7 +49,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
@@ -186,7 +186,7 @@ public class OutRankS1 extends AbstractAlgorithm<OutlierResult> implements Outli
algorithm = algP.instantiateClass(config);
}
DoubleParameter alphaP = new DoubleParameter(ALPHA_ID, 0.25);
- alphaP.addConstraint(new GreaterConstraint(0));
+ alphaP.addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE);
if (config.grab(alphaP)) {
alpha = alphaP.doubleValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/SOD.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/SOD.java
index b2255e67..489f811b 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/SOD.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/SOD.java
@@ -31,10 +31,10 @@ import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
-import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
@@ -44,7 +44,6 @@ import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair;
import de.lmu.ifi.dbs.elki.database.query.similarity.SimilarityQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
-import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.distance.similarityfunction.SharedNearestNeighborSimilarityFunction;
@@ -52,7 +51,9 @@ import de.lmu.ifi.dbs.elki.distance.similarityfunction.SimilarityFunction;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
-import de.lmu.ifi.dbs.elki.result.ResultHierarchy;
+import de.lmu.ifi.dbs.elki.math.Mean;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
@@ -65,9 +66,10 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
@@ -91,7 +93,6 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
* @param <V> the type of NumberVector handled by this Algorithm
* @param <D> distance type
*/
-// todo arthur comment
@Title("SOD: Subspace outlier degree")
@Description("Outlier Detection in Axis-Parallel Subspaces of High Dimensional Data")
@Reference(authors = "H.-P. Kriegel, P. Kröger, E. Schubert, A. Zimek", title = "Outlier Detection in Axis-Parallel Subspaces of High Dimensional Data", booktitle = "Proceedings of the 13th Pacific-Asia Conference on Knowledge Discovery and Data Mining (PAKDD), Bangkok, Thailand, 2009", url = "http://dx.doi.org/10.1007/978-3-642-01307-2")
@@ -102,50 +103,39 @@ public class SOD<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
private static final Logging LOG = Logging.getLogger(SOD.class);
/**
- * Parameter to specify the number of shared nearest neighbors to be
- * considered for learning the subspace properties., must be an integer
- * greater than 0.
- */
- public static final OptionID KNN_ID = new OptionID("sod.knn", "The number of most snn-similar objects to use as reference set for learning the subspace properties.");
-
- /**
- * Parameter to indicate the multiplier for the discriminance value for
- * discerning small from large variances.
- */
- public static final OptionID ALPHA_ID = new OptionID("sod.alpha", "The multiplier for the discriminance value for discerning small from large variances.");
-
- /**
- * Parameter for the similarity function.
- */
- public static final OptionID SIM_ID = new OptionID("sod.similarity", "The similarity function used for the neighborhood set.");
-
- /**
- * Holds the value of {@link #KNN_ID}.
+ * Neighborhood size.
*/
private int knn;
/**
- * Holds the value of {@link #ALPHA_ID}.
+ * Alpha (discriminance value).
*/
private double alpha;
/**
- * The similarity function {@link #SIM_ID}.
+ * Similarity function to use.
*/
private SimilarityFunction<V, D> similarityFunction;
/**
+ * Report models.
+ */
+ private boolean models;
+
+ /**
* Constructor with parameters.
*
* @param knn knn value
* @param alpha Alpha parameter
* @param similarityFunction Shared nearest neighbor similarity function
+ * @param models Report generated models
*/
- public SOD(int knn, double alpha, SimilarityFunction<V, D> similarityFunction) {
+ public SOD(int knn, double alpha, SimilarityFunction<V, D> similarityFunction, boolean models) {
super();
this.knn = knn;
this.alpha = alpha;
this.similarityFunction = similarityFunction;
+ this.models = models;
}
/**
@@ -157,26 +147,55 @@ public class SOD<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
public OutlierResult run(Relation<V> relation) {
SimilarityQuery<V, D> snnInstance = similarityFunction.instantiate(relation);
FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Assigning Subspace Outlier Degree", relation.size(), LOG) : null;
- WritableDataStore<SODModel<?>> sod_models = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, SODModel.class);
+ final WritableDoubleDataStore sod_scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
+ WritableDataStore<SODModel> sod_models = null;
+ if (models) { // Models requested
+ sod_models = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, SODModel.class);
+ }
DoubleMinMax minmax = new DoubleMinMax();
for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
if (progress != null) {
progress.incrementProcessed(LOG);
}
- DBIDs knnList = getNearestNeighbors(relation, snnInstance, iter);
- SODModel<V> model = new SODModel<>(relation, knnList, alpha, relation.get(iter));
- sod_models.put(iter, model);
- minmax.put(model.getSod());
+ DBIDs neighborhood = getNearestNeighbors(relation, snnInstance, iter);
+
+ Vector center;
+ BitSet weightVector;
+ double sod;
+ if (neighborhood.size() > 0) {
+ center = Centroid.make(relation, neighborhood);
+ // Note: per-dimension variances; no covariances.
+ double[] variances = computePerDimensionVariances(relation, center, neighborhood);
+ double expectationOfVariance = Mean.of(variances);
+ weightVector = new BitSet(variances.length);
+ for (int d = 0; d < variances.length; d++) {
+ if (variances[d] < alpha * expectationOfVariance) {
+ weightVector.set(d, true);
+ }
+ }
+ sod = subspaceOutlierDegree(relation.get(iter), center, weightVector);
+ } else {
+ center = relation.get(iter).getColumnVector();
+ weightVector = null;
+ sod = 0.;
+ }
+
+ if (sod_models != null) {
+ sod_models.put(iter, new SODModel(center, weightVector));
+ }
+ sod_scores.putDouble(iter, sod);
+ minmax.put(sod);
}
if (progress != null) {
progress.ensureCompleted(LOG);
}
// combine results.
- Relation<SODModel<?>> models = new MaterializedRelation<>("Subspace Outlier Model", "sod-outlier", new SimpleTypeInformation<SODModel<?>>(SODModel.class), sod_models, relation.getDBIDs());
OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
- OutlierResult sodResult = new OutlierResult(meta, new SODProxyScoreResult(models, relation.getDBIDs()));
- // also add the models.
- sodResult.addChildResult(models);
+ OutlierResult sodResult = new OutlierResult(meta, new MaterializedRelation<>("Subspace Outlier Degree", "sod-outlier", TypeUtil.DOUBLE, sod_scores, relation.getDBIDs()));
+ if (sod_models != null) {
+ Relation<SODModel> models = new MaterializedRelation<>("Subspace Outlier Model", "sod-outlier", new SimpleTypeInformation<>(SODModel.class), sod_models, relation.getDBIDs());
+ sodResult.addChildResult(models);
+ }
return sodResult;
}
@@ -186,6 +205,8 @@ public class SOD<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
* <p/>
* The query object is excluded from the knn list.
*
+ * FIXME: move this to the database layer.
+ *
* @param relation the database holding the objects
* @param simQ similarity function
* @param queryObject the query object for which the kNNs should be determined
@@ -193,14 +214,14 @@ public class SOD<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
* distance without the query object
*/
private DBIDs getNearestNeighbors(Relation<V> relation, SimilarityQuery<V, D> simQ, DBIDRef queryObject) {
- // similarityFunction.getPreprocessor().getParameters();
Heap<DoubleDBIDPair> nearestNeighbors = new TiedTopBoundedHeap<>(knn);
for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
- if (!DBIDUtil.equal(iter, queryObject)) {
- double sim = simQ.similarity(queryObject, iter).doubleValue();
- if (sim > 0) {
- nearestNeighbors.add(DBIDUtil.newPair(sim, iter));
- }
+ if (DBIDUtil.equal(iter, queryObject)) {
+ continue;
+ }
+ double sim = simQ.similarity(queryObject, iter).doubleValue();
+ if (sim > 0.) {
+ nearestNeighbors.add(DBIDUtil.newPair(sim, iter));
}
}
// Collect DBIDs
@@ -211,6 +232,50 @@ public class SOD<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
return dbids;
}
+ /**
+ * Compute the per-dimension variances for the given neighborhood and center.
+ *
+ * @param relation Data relation
+ * @param center Center vector
+ * @param neighborhood Neighbors
+ * @return Per-dimension variances.
+ */
+ private static double[] computePerDimensionVariances(Relation<? extends NumberVector<?>> relation, Vector center, DBIDs neighborhood) {
+ double[] c = center.getArrayRef();
+ double[] variances = new double[c.length];
+ for (DBIDIter iter = neighborhood.iter(); iter.valid(); iter.advance()) {
+ NumberVector<?> databaseObject = relation.get(iter);
+ for (int d = 0; d < c.length; d++) {
+ final double deviation = databaseObject.doubleValue(d) - c[d];
+ variances[d] += deviation * deviation;
+ }
+ }
+ for (int d = 0; d < variances.length; d++) {
+ variances[d] /= neighborhood.size();
+ }
+ return variances;
+ }
+
+ /**
+ * Compute SOD score.
+ *
+ * @param queryObject Query object
+ * @param center Center vector
+ * @param weightVector Weight vector
+ * @return sod score
+ */
+ private double subspaceOutlierDegree(V queryObject, Vector center, BitSet weightVector) {
+ final int card = weightVector.cardinality();
+ if (card == 0) {
+ return 0;
+ }
+ final SubspaceEuclideanDistanceFunction df = new SubspaceEuclideanDistanceFunction(weightVector);
+ double distance = df.distance(queryObject, center).doubleValue();
+ distance /= card; // FIXME: defined as card, should be sqrt(card),
+ // unfortunately
+ return distance;
+ }
+
@Override
public TypeInformation[] getInputTypeRestriction() {
return TypeUtil.array(TypeUtil.NUMBER_VECTOR_FIELD);
@@ -225,232 +290,89 @@ public class SOD<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
* SOD Model class
*
* @author Arthur Zimek
- * @param <V> the type of DatabaseObjects handled by this Result
*/
- // TODO: arthur comment
- public static class SODModel<V extends NumberVector<?>> implements TextWriteable, Comparable<SODModel<?>> {
- private double[] centerValues;
-
- private V center;
-
- private double[] variances;
-
- private double expectationOfVariance;
-
- private BitSet weightVector;
-
- private double sod;
-
+ public static class SODModel implements TextWriteable {
/**
- * Initialize SOD Model
- *
- * @param relation Database
- * @param neighborhood Neighborhood
- * @param alpha Alpha value
- * @param queryObject Query object
+ * Center vector
*/
- public SODModel(Relation<V> relation, DBIDs neighborhood, double alpha, V queryObject) {
- if (neighborhood.size() > 0) {
- // TODO: store database link?
- centerValues = new double[RelationUtil.dimensionality(relation)];
- variances = new double[centerValues.length];
- for (DBIDIter iter = neighborhood.iter(); iter.valid(); iter.advance()) {
- V databaseObject = relation.get(iter);
- for (int d = 0; d < centerValues.length; d++) {
- centerValues[d] += databaseObject.doubleValue(d);
- }
- }
- for (int d = 0; d < centerValues.length; d++) {
- centerValues[d] /= neighborhood.size();
- }
- for (DBIDIter iter = neighborhood.iter(); iter.valid(); iter.advance()) {
- V databaseObject = relation.get(iter);
- for (int d = 0; d < centerValues.length; d++) {
- // distance
- double distance = centerValues[d] - databaseObject.doubleValue(d);
- // variance
- variances[d] += distance * distance;
- }
- }
- expectationOfVariance = 0;
- for (int d = 0; d < variances.length; d++) {
- variances[d] /= neighborhood.size();
- expectationOfVariance += variances[d];
- }
- expectationOfVariance /= variances.length;
- weightVector = new BitSet(variances.length);
- for (int d = 0; d < variances.length; d++) {
- if (variances[d] < alpha * expectationOfVariance) {
- weightVector.set(d, true);
- }
- }
- center = RelationUtil.getNumberVectorFactory(relation).newNumberVector(centerValues);
- sod = subspaceOutlierDegree(queryObject, center, weightVector);
- } else {
- center = queryObject;
- sod = 0.0;
- }
- }
+ private Vector center;
/**
- * Compute SOD score.
- *
- * @param queryObject Query object
- * @param center Center vector
- * @param weightVector Weight vector
- * @return sod score
+ * Relevant dimensions.
*/
- private double subspaceOutlierDegree(V queryObject, V center, BitSet weightVector) {
- final SubspaceEuclideanDistanceFunction df = new SubspaceEuclideanDistanceFunction(weightVector);
- final int card = weightVector.cardinality();
- if (card == 0) {
- return 0;
- }
- double distance = df.distance(queryObject, center).doubleValue();
- distance /= card;
- return distance;
- }
+ private BitSet weightVector;
/**
- * Return the SOD of the point.
+ * Initialize SOD Model
*
- * @return sod value
+ * @param center Center vector
+ * @param weightVector Selected dimensions
*/
- public double getSod() {
- return this.sod;
+ public SODModel(Vector center, BitSet weightVector) {
+ this.center = center;
+ this.weightVector = weightVector;
}
@Override
public void writeToText(TextWriterStream out, String label) {
- out.inlinePrint(label + "=" + this.sod);
out.commentPrintLn(this.getClass().getSimpleName() + ":");
out.commentPrintLn("relevant attributes (counting starts with 0): " + this.weightVector.toString());
out.commentPrintLn("center of neighborhood: " + out.normalizationRestore(center).toString());
- out.commentPrintLn("subspace outlier degree: " + this.sod);
out.commentPrintSeparator();
}
-
- @Override
- public int compareTo(SODModel<?> o) {
- return Double.compare(this.getSod(), o.getSod());
- }
-
}
/**
- * Proxy class that converts a model result to an actual SOD score result.
+ * Parameterization class.
*
* @author Erich Schubert
*
* @apiviz.exclude
*/
- protected static class SODProxyScoreResult implements Relation<Double> {
+ public static class Parameterizer<V extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractParameterizer {
/**
- * Model result this is a proxy for.
+ * Parameter to specify the number of shared nearest neighbors to be
+ * considered for learning the subspace properties., must be an integer
+ * greater than 0.
*/
- Relation<SODModel<?>> models;
+ public static final OptionID KNN_ID = new OptionID("sod.knn", "The number of most snn-similar objects to use as reference set for learning the subspace properties.");
/**
- * The IDs we are defined for.
+ * Parameter to indicate the multiplier for the discriminance value for
+ * discerning small from large variances.
*/
- DBIDs dbids;
+ public static final OptionID ALPHA_ID = new OptionID("sod.alpha", "The multiplier for the discriminance value for discerning small from large variances.");
/**
- * Constructor.
- *
- * @param models Models result
- * @param dbids IDs we are defined for
+ * Parameter for the similarity function.
*/
- public SODProxyScoreResult(Relation<SODModel<?>> models, DBIDs dbids) {
- super();
- this.models = models;
- this.dbids = dbids;
- }
-
- @Override
- public Double get(DBIDRef objID) {
- return models.get(objID).getSod();
- }
-
- @Override
- public String getLongName() {
- return "Subspace Outlier Degree";
- }
-
- @Override
- public String getShortName() {
- return "sod-outlier";
- }
-
- @Override
- public DBIDs getDBIDs() {
- return dbids;
- }
-
- @Override
- public DBIDIter iterDBIDs() {
- return dbids.iter();
- }
-
- @Override
- public Database getDatabase() {
- return null; // FIXME
- }
+ public static final OptionID SIM_ID = new OptionID("sod.similarity", "The similarity function used for the neighborhood set.");
- @Override
- public void set(DBIDRef id, Double val) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public void delete(DBIDRef id) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public SimpleTypeInformation<Double> getDataTypeInformation() {
- return TypeUtil.DOUBLE;
- }
-
- @Override
- public int size() {
- return dbids.size();
- }
-
- @Override
- public ResultHierarchy getHierarchy() {
- return models.getHierarchy();
- }
-
- @Override
- public void setHierarchy(ResultHierarchy hierarchy) {
- models.setHierarchy(hierarchy);
- }
- }
+ /**
+ * Parameter for keeping the models.
+ */
+ public static final OptionID MODELS_ID = new OptionID("sod.models", "Report the models computed by SOD (default: report only scores).");
- /**
- * Parameterization class.
- *
- * @author Erich Schubert
- *
- * @apiviz.exclude
- */
- public static class Parameterizer<V extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractParameterizer {
/**
- * Holds the value of {@link #KNN_ID}.
+ * Neighborhood size
*/
private int knn = 1;
/**
- * Holds the value of {@link #ALPHA_ID}.
+ * Alpha (discriminance value).
*/
private double alpha = 1.1;
/**
- * The similarity function - {@link #SIM_ID}.
+ * The similarity function.
*/
private SimilarityFunction<V, D> similarityFunction;
+ /**
+ * Track models.
+ */
+ private boolean models = false;
+
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
@@ -460,21 +382,26 @@ public class SOD<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
}
final IntParameter knnP = new IntParameter(KNN_ID);
- knnP.addConstraint(new GreaterConstraint(0));
+ knnP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
if (config.grab(knnP)) {
knn = knnP.getValue();
}
final DoubleParameter alphaP = new DoubleParameter(ALPHA_ID, 1.1);
- alphaP.addConstraint(new GreaterConstraint(0));
+ alphaP.addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE);
if (config.grab(alphaP)) {
alpha = alphaP.doubleValue();
}
+
+ final Flag modelsF = new Flag(MODELS_ID);
+ if (config.grab(modelsF)) {
+ models = modelsF.isTrue();
+ }
}
@Override
protected SOD<V, D> makeInstance() {
- return new SOD<>(knn, alpha, similarityFunction);
+ return new SOD<>(knn, alpha, similarityFunction, models);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAverageCoordinateOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAverageCoordinateOutlier.java
index 84e3ad41..6f2f2f38 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAverageCoordinateOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAverageCoordinateOutlier.java
@@ -80,13 +80,13 @@ public class TrivialAverageCoordinateOutlier extends AbstractAlgorithm<OutlierRe
m.reset();
NumberVector<?> nv = relation.get(iditer);
for (int i = 0; i < nv.getDimensionality(); i++) {
- m.put(nv.doubleValue(i + 1));
+ m.put(nv.doubleValue(i));
}
final double score = m.getMean();
scores.putDouble(iditer, score);
minmax.put(score);
}
- Relation<Double> scoreres = new MaterializedRelation<Double>("Trivial mean score", "mean-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs());
+ Relation<Double> scoreres = new MaterializedRelation<>("Trivial mean score", "mean-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs());
OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
return new OutlierResult(meta, scoreres);
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialGeneratedOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialGeneratedOutlier.java
index 285b00df..2e952b5f 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialGeneratedOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialGeneratedOutlier.java
@@ -52,8 +52,7 @@ import de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
@@ -106,7 +105,8 @@ public class TrivialGeneratedOutlier extends AbstractAlgorithm<OutlierResult> im
try {
Relation<?> relation = database.getRelation(TypeUtil.CLASSLABEL);
return run(models, vecs, relation);
- } catch (NoSupportedDataTypeException e) {
+ }
+ catch(NoSupportedDataTypeException e) {
// Otherwise, try any labellike.
return run(models, vecs, database.getRelation(TypeUtil.GUESSED_LABEL));
}
@@ -124,56 +124,58 @@ public class TrivialGeneratedOutlier extends AbstractAlgorithm<OutlierResult> im
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(models.getDBIDs(), DataStoreFactory.HINT_HOT);
HashSet<GeneratorSingleCluster> generators = new HashSet<>();
- for (DBIDIter iditer = models.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for(DBIDIter iditer = models.iterDBIDs(); iditer.valid(); iditer.advance()) {
Model model = models.get(iditer);
- if (model instanceof GeneratorSingleCluster) {
+ if(model instanceof GeneratorSingleCluster) {
generators.add((GeneratorSingleCluster) model);
}
}
- if (generators.size() == 0) {
+ if(generators.size() == 0) {
LOG.warning("No generator models found for dataset - all points will be considered outliers.");
}
- for (GeneratorSingleCluster gen : generators) {
- for (int i = 0; i < gen.getDim(); i++) {
+ for(GeneratorSingleCluster gen : generators) {
+ for(int i = 0; i < gen.getDim(); i++) {
Distribution dist = gen.getDistribution(i);
- if (!(dist instanceof NormalDistribution)) {
+ if(!(dist instanceof NormalDistribution)) {
throw new AbortException("TrivialGeneratedOutlier currently only supports normal distributions, got: " + dist);
}
}
}
- for (DBIDIter iditer = models.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for(DBIDIter iditer = models.iterDBIDs(); iditer.valid(); iditer.advance()) {
double score = 1.;
// Convert to a math vector
Vector v = vecs.get(iditer).getColumnVector();
- for (GeneratorSingleCluster gen : generators) {
+ for(GeneratorSingleCluster gen : generators) {
Vector tv = v;
// Transform backwards
- if (gen.getTransformation() != null) {
+ if(gen.getTransformation() != null) {
tv = gen.getTransformation().applyInverse(v);
}
final int dim = tv.getDimensionality();
double lensq = 0.0;
int norm = 0;
- for (int i = 0; i < dim; i++) {
+ for(int i = 0; i < dim; i++) {
Distribution dist = gen.getDistribution(i);
- if (dist instanceof NormalDistribution) {
+ if(dist instanceof NormalDistribution) {
NormalDistribution d = (NormalDistribution) dist;
double delta = (tv.get(i) - d.getMean()) / d.getStddev();
lensq += delta * delta;
norm += 1;
- } else {
+ }
+ else {
throw new AbortException("TrivialGeneratedOutlier currently only supports normal distributions, got: " + dist);
}
}
- if (norm > 0.) {
+ if(norm > 0.) {
// The squared distances are ChiSquared distributed
score = Math.min(score, ChiSquaredDistribution.cdf(lensq, norm));
- } else {
+ }
+ else {
score = 0.;
}
}
- if (expect < 1) {
+ if(expect < 1) {
score = expect * score / (1 - score + expect);
}
scores.putDouble(iditer, score);
@@ -210,9 +212,9 @@ public class TrivialGeneratedOutlier extends AbstractAlgorithm<OutlierResult> im
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
DoubleParameter expectP = new DoubleParameter(EXPECT_ID, 0.01);
- expectP.addConstraint(new GreaterConstraint(0.0));
- expectP.addConstraint(new LessEqualConstraint(1.0));
- if (config.grab(expectP)) {
+ expectP.addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE);
+ expectP.addConstraint(CommonConstraints.LESS_EQUAL_ONE_DOUBLE);
+ if(config.grab(expectP)) {
expect = expectP.getValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/statistics/AddSingleScale.java b/src/de/lmu/ifi/dbs/elki/algorithm/statistics/AddSingleScale.java
index cbae17ca..8bd5f057 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/statistics/AddSingleScale.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/statistics/AddSingleScale.java
@@ -94,7 +94,11 @@ public class AddSingleScale implements Algorithm {
for(DBIDIter iditer = rel.iterDBIDs(); iditer.valid(); iditer.advance()) {
NumberVector<?> vec = rel.get(iditer);
for(int d = 0; d < dim; d++) {
- mm.put(vec.doubleValue(d));
+ final double val = vec.doubleValue(d);
+ if(val != val) {
+ continue; // NaN
+ }
+ mm.put(val);
}
}
LinearScale scale = new LinearScale(mm.getMin(), mm.getMax());
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/statistics/AveragePrecisionAtK.java b/src/de/lmu/ifi/dbs/elki/algorithm/statistics/AveragePrecisionAtK.java
index 1b87a015..490f8ba6 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/statistics/AveragePrecisionAtK.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/statistics/AveragePrecisionAtK.java
@@ -47,9 +47,7 @@ import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.math.MeanVariance;
import de.lmu.ifi.dbs.elki.result.CollectionResult;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag;
@@ -124,34 +122,36 @@ public class AveragePrecisionAtK<V extends Object, D extends NumberDistance<D, ?
MeanVariance[] mvs = MeanVariance.newArray(k);
final DBIDs ids;
- if (sampling < 1.0) {
+ if(sampling < 1.0) {
int size = Math.max(1, (int) (sampling * relation.size()));
ids = DBIDUtil.randomSample(relation.getDBIDs(), size, seed);
- } else {
+ }
+ else {
ids = relation.getDBIDs();
}
- if (LOG.isVerbose()) {
+ if(LOG.isVerbose()) {
LOG.verbose("Processing points...");
}
FiniteProgress objloop = LOG.isVerbose() ? new FiniteProgress("Computing nearest neighbors", ids.size(), LOG) : null;
// sort neighbors
- for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
KNNList<D> knn = knnQuery.getKNNForDBID(iter, qk);
Object label = lrelation.get(iter);
int positive = 0, i = 0;
- for (DBIDIter ri = knn.iter(); i < k && ri.valid(); ri.advance()) {
- if (!includeSelf && DBIDUtil.equal(iter, ri)) {
+ for(DBIDIter ri = knn.iter(); i < k && ri.valid(); ri.advance()) {
+ if(!includeSelf && DBIDUtil.equal(iter, ri)) {
continue;
}
Object olabel = lrelation.get(ri);
- if (label == null) {
- if (olabel == null) {
+ if(label == null) {
+ if(olabel == null) {
positive += 1;
}
- } else {
- if (label.equals(olabel)) {
+ }
+ else {
+ if(label.equals(olabel)) {
positive += 1;
}
}
@@ -159,18 +159,18 @@ public class AveragePrecisionAtK<V extends Object, D extends NumberDistance<D, ?
mvs[i].put(precision);
i++;
}
- if (objloop != null) {
+ if(objloop != null) {
objloop.incrementProcessed(LOG);
}
}
- if (objloop != null) {
+ if(objloop != null) {
objloop.ensureCompleted(LOG);
}
// Collections.sort(results);
// Transform Histogram into a Double Vector array.
Collection<DoubleVector> res = new ArrayList<>(k);
- for (int i = 0; i < k; i++) {
+ for(int i = 0; i < k; i++) {
DoubleVector row = new DoubleVector(new double[] { mvs[i].getMean(), mvs[i].getSampleStddev() });
res.add(row);
}
@@ -239,24 +239,24 @@ public class AveragePrecisionAtK<V extends Object, D extends NumberDistance<D, ?
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
final IntParameter kP = new IntParameter(K_ID);
- kP.addConstraint(new GreaterEqualConstraint(2));
- if (config.grab(kP)) {
+ kP.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
+ if(config.grab(kP)) {
k = kP.getValue();
}
final DoubleParameter samplingP = new DoubleParameter(SAMPLING_ID);
- samplingP.addConstraint(new GreaterConstraint(0.0));
- samplingP.addConstraint(new LessEqualConstraint(1.0));
+ samplingP.addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE);
+ samplingP.addConstraint(CommonConstraints.LESS_EQUAL_ONE_DOUBLE);
samplingP.setOptional(true);
- if (config.grab(samplingP)) {
+ if(config.grab(samplingP)) {
sampling = samplingP.getValue();
}
final LongParameter rndP = new LongParameter(SEED_ID);
rndP.setOptional(true);
- if (config.grab(rndP)) {
+ if(config.grab(rndP)) {
seed = rndP.getValue();
}
final Flag includeP = new Flag(INCLUDESELF_ID);
- if (config.grab(includeP)) {
+ if(config.grab(includeP)) {
includeSelf = includeP.isTrue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/statistics/DistanceStatisticsWithClasses.java b/src/de/lmu/ifi/dbs/elki/algorithm/statistics/DistanceStatisticsWithClasses.java
index 3c8e1635..244af0ca 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/statistics/DistanceStatisticsWithClasses.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/statistics/DistanceStatisticsWithClasses.java
@@ -62,7 +62,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.exceptions.ExceptionMessages;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.OnlyOneIsAllowedToBeSetGlobalConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag;
@@ -159,24 +159,26 @@ public class DistanceStatisticsWithClasses<O, D extends NumberDistance<D, ?>> ex
MeanVariance modif = new MeanVariance();
// Histogram
final ObjHistogram<long[]> histogram;
- if (stepprog != null) {
+ if(stepprog != null) {
stepprog.beginStep(1, "Prepare histogram.", LOG);
}
- if (exact) {
+ if(exact) {
gminmax = exactMinMax(relation, distFunc);
histogram = new LongArrayStaticHistogram(numbin, gminmax.getMin(), gminmax.getMax(), 2);
- } else if (sampling) {
+ }
+ else if(sampling) {
gminmax = sampleMinMax(relation, distFunc);
histogram = new LongArrayStaticHistogram(numbin, gminmax.getMin(), gminmax.getMax(), 2);
- } else {
+ }
+ else {
histogram = new AbstractObjDynamicHistogram<long[]>(numbin) {
@Override
protected long[] downsample(Object[] data, int start, int end, int size) {
long[] ret = new long[2];
- for (int i = start; i < end; i++) {
+ for(int i = start; i < end; i++) {
long[] existing = (long[]) data[i];
- if (existing != null) {
- for (int c = 0; c < 2; c++) {
+ if(existing != null) {
+ for(int c = 0; c < 2; c++) {
ret[c] += existing[c];
}
}
@@ -186,7 +188,7 @@ public class DistanceStatisticsWithClasses<O, D extends NumberDistance<D, ?>> ex
@Override
protected long[] aggregate(long[] first, long[] second) {
- for (int c = 0; c < 2; c++) {
+ for(int c = 0; c < 2; c++) {
first[c] += second[c];
}
return first;
@@ -204,20 +206,20 @@ public class DistanceStatisticsWithClasses<O, D extends NumberDistance<D, ?>> ex
};
}
- if (stepprog != null) {
+ if(stepprog != null) {
stepprog.beginStep(2, "Build histogram.", LOG);
}
final FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Distance computations", relation.size(), LOG) : null;
// iterate per cluster
final long[] incFirst = new long[] { 1L, 0L };
final long[] incSecond = new long[] { 0L, 1L };
- for (Cluster<?> c1 : split) {
- for (DBIDIter id1 = c1.getIDs().iter(); id1.valid(); id1.advance()) {
+ for(Cluster<?> c1 : split) {
+ for(DBIDIter id1 = c1.getIDs().iter(); id1.valid(); id1.advance()) {
// in-cluster distances
DoubleMinMax iminmax = new DoubleMinMax();
- for (DBIDIter iter2 = c1.getIDs().iter(); iter2.valid(); iter2.advance()) {
+ for(DBIDIter iter2 = c1.getIDs().iter(); iter2.valid(); iter2.advance()) {
// skip the point itself.
- if (DBIDUtil.equal(id1, iter2)) {
+ if(DBIDUtil.equal(id1, iter2)) {
continue;
}
double d = distFunc.distance(id1, iter2).doubleValue();
@@ -236,13 +238,13 @@ public class DistanceStatisticsWithClasses<O, D extends NumberDistance<D, ?>> ex
// other-cluster distances
DoubleMinMax ominmax = new DoubleMinMax();
- for (Cluster<?> c2 : split) {
- if (c2 == c1) {
+ for(Cluster<?> c2 : split) {
+ if(c2 == c1) {
continue;
}
- for (DBIDIter iter2 = c2.getIDs().iter(); iter2.valid(); iter2.advance()) {
+ for(DBIDIter iter2 = c2.getIDs().iter(); iter2.valid(); iter2.advance()) {
// skip the point itself (shouldn't happen though)
- if (DBIDUtil.equal(id1, iter2)) {
+ if(DBIDUtil.equal(id1, iter2)) {
continue;
}
double d = distFunc.distance(id1, iter2).doubleValue();
@@ -259,33 +261,33 @@ public class DistanceStatisticsWithClasses<O, D extends NumberDistance<D, ?>> ex
// min/max
gominmax.put(ominmax.getMin());
gominmax.put(ominmax.getMax());
- if (progress != null) {
+ if(progress != null) {
progress.incrementProcessed(LOG);
}
}
}
- if (progress != null) {
+ if(progress != null) {
progress.ensureCompleted(LOG);
}
// Update values (only needed for sampling case).
gminmax.setFirst(Math.min(giminmax.getMin(), gominmax.getMin()));
gminmax.setSecond(Math.max(giminmax.getMax(), gominmax.getMax()));
- if (stepprog != null) {
+ if(stepprog != null) {
stepprog.setCompleted(LOG);
}
// count the number of samples we have in the data
long inum = 0;
long onum = 0;
- for (ObjHistogram.Iter<long[]> iter = histogram.iter(); iter.valid(); iter.advance()) {
+ for(ObjHistogram.Iter<long[]> iter = histogram.iter(); iter.valid(); iter.advance()) {
inum += iter.getValue()[0];
onum += iter.getValue()[1];
}
long bnum = inum + onum;
Collection<DoubleVector> binstat = new ArrayList<>(numbin);
- for (ObjHistogram.Iter<long[]> iter = histogram.iter(); iter.valid(); iter.advance()) {
+ for(ObjHistogram.Iter<long[]> iter = histogram.iter(); iter.valid(); iter.advance()) {
final long[] value = iter.getValue();
final double icof = (inum == 0) ? 0 : ((double) value[0]) / inum / histogram.getBinsize();
final double icaf = ((double) value[0]) / bnum / histogram.getBinsize();
@@ -327,26 +329,26 @@ public class DistanceStatisticsWithClasses<O, D extends NumberDistance<D, ?>> ex
ArrayModifiableDBIDs randomset = DBIDUtil.newArray(randomsize);
DBIDIter iter = relation.iterDBIDs();
- if (!iter.valid()) {
+ if(!iter.valid()) {
throw new IllegalStateException(ExceptionMessages.DATABASE_EMPTY);
}
DBID firstid = DBIDUtil.deref(iter);
iter.advance();
minhotset.add(DBIDUtil.newPair(Double.MAX_VALUE, firstid));
maxhotset.add(DBIDUtil.newPair(Double.MIN_VALUE, firstid));
- for (; iter.valid(); iter.advance()) {
+ for(; iter.valid(); iter.advance()) {
// generate candidates for min distance.
ArrayList<DoubleDBIDPair> np = new ArrayList<>(k * 2 + randomsize * 2);
- for (DoubleDBIDPair pair : minhotset) {
+ for(DoubleDBIDPair pair : minhotset) {
// skip the object itself
- if (DBIDUtil.equal(iter, pair)) {
+ if(DBIDUtil.equal(iter, pair)) {
continue;
}
double d = distFunc.distance(iter, pair).doubleValue();
np.add(DBIDUtil.newPair(d, iter));
np.add(DBIDUtil.newPair(d, pair));
}
- for (DBIDIter iter2 = randomset.iter(); iter2.valid(); iter2.advance()) {
+ for(DBIDIter iter2 = randomset.iter(); iter2.valid(); iter2.advance()) {
double d = distFunc.distance(iter, iter2).doubleValue();
np.add(DBIDUtil.newPair(d, iter));
np.add(DBIDUtil.newPair(d, iter2));
@@ -356,16 +358,16 @@ public class DistanceStatisticsWithClasses<O, D extends NumberDistance<D, ?>> ex
// generate candidates for max distance.
ArrayList<DoubleDBIDPair> np2 = new ArrayList<>(k * 2 + randomsize * 2);
- for (DoubleDBIDPair pair : minhotset) {
+ for(DoubleDBIDPair pair : minhotset) {
// skip the object itself
- if (DBIDUtil.equal(iter, pair)) {
+ if(DBIDUtil.equal(iter, pair)) {
continue;
}
double d = distFunc.distance(iter, pair).doubleValue();
np2.add(DBIDUtil.newPair(d, iter));
np2.add(DBIDUtil.newPair(d, pair));
}
- for (DBIDIter iter2 = randomset.iter(); iter2.valid(); iter2.advance()) {
+ for(DBIDIter iter2 = randomset.iter(); iter2.valid(); iter2.advance()) {
double d = distFunc.distance(iter, iter2).doubleValue();
np.add(DBIDUtil.newPair(d, iter));
np.add(DBIDUtil.newPair(d, iter2));
@@ -374,9 +376,10 @@ public class DistanceStatisticsWithClasses<O, D extends NumberDistance<D, ?>> ex
shrinkHeap(maxhotset, k);
// update random set
- if (randomset.size() < randomsize) {
+ if(randomset.size() < randomsize) {
randomset.add(iter);
- } else if (rnd.nextDouble() < rprob) {
+ }
+ else if(rnd.nextDouble() < rprob) {
randomset.set((int) Math.floor(rnd.nextDouble() * randomsize), iter);
}
}
@@ -393,10 +396,10 @@ public class DistanceStatisticsWithClasses<O, D extends NumberDistance<D, ?>> ex
private DoubleMinMax exactMinMax(Relation<O> relation, DistanceQuery<O, D> distFunc) {
DoubleMinMax minmax = new DoubleMinMax();
// find exact minimum and maximum first.
- for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- for (DBIDIter iditer2 = relation.iterDBIDs(); iditer2.valid(); iditer2.advance()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for(DBIDIter iditer2 = relation.iterDBIDs(); iditer2.valid(); iditer2.advance()) {
// skip the point itself.
- if (DBIDUtil.equal(iditer, iditer2)) {
+ if(DBIDUtil.equal(iditer, iditer2)) {
continue;
}
double d = distFunc.distance(iditer, iditer2).doubleValue();
@@ -416,11 +419,12 @@ public class DistanceStatisticsWithClasses<O, D extends NumberDistance<D, ?>> ex
// drop duplicates
ModifiableDBIDs seenids = DBIDUtil.newHashSet(2 * k);
int cnt = 0;
- for (Iterator<DoubleDBIDPair> i = hotset.iterator(); i.hasNext();) {
+ for(Iterator<DoubleDBIDPair> i = hotset.iterator(); i.hasNext();) {
DoubleDBIDPair p = i.next();
- if (cnt > k || seenids.contains(p)) {
+ if(cnt > k || seenids.contains(p)) {
i.remove();
- } else {
+ }
+ else {
seenids.add(p);
cnt++;
}
@@ -464,18 +468,18 @@ public class DistanceStatisticsWithClasses<O, D extends NumberDistance<D, ?>> ex
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
final IntParameter numbinP = new IntParameter(HISTOGRAM_BINS_ID, 20);
- numbinP.addConstraint(new GreaterEqualConstraint(2));
- if (config.grab(numbinP)) {
+ numbinP.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
+ if(config.grab(numbinP)) {
numbin = numbinP.getValue();
}
final Flag exactF = new Flag(EXACT_ID);
- if (config.grab(exactF)) {
+ if(config.grab(exactF)) {
exact = exactF.getValue();
}
final Flag samplingF = new Flag(SAMPLING_ID);
- if (config.grab(samplingF)) {
+ if(config.grab(samplingF)) {
sampling = samplingF.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/statistics/EvaluateRankingQuality.java b/src/de/lmu/ifi/dbs/elki/algorithm/statistics/EvaluateRankingQuality.java
index 76e5ef66..d5d8e407 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/statistics/EvaluateRankingQuality.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/statistics/EvaluateRankingQuality.java
@@ -62,7 +62,7 @@ import de.lmu.ifi.dbs.elki.utilities.datastructures.histogram.ObjHistogram;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -121,7 +121,7 @@ public class EvaluateRankingQuality<V extends NumberVector<?>, D extends NumberD
final DistanceQuery<V, D> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
final KNNQuery<V, D> knnQuery = database.getKNNQuery(distQuery, relation.size());
- if (LOG.isVerbose()) {
+ if(LOG.isVerbose()) {
LOG.verbose("Preprocessing clusters...");
}
// Cluster by labels
@@ -130,7 +130,7 @@ public class EvaluateRankingQuality<V extends NumberVector<?>, D extends NumberD
// Compute cluster averages and covariance matrix
HashMap<Cluster<?>, Vector> averages = new HashMap<>(split.size());
HashMap<Cluster<?>, Matrix> covmats = new HashMap<>(split.size());
- for (Cluster<?> clus : split) {
+ for(Cluster<?> clus : split) {
CovarianceMatrix covmat = CovarianceMatrix.make(relation, clus.getIDs());
averages.put(clus, covmat.getMeanVector());
covmats.put(clus, covmat.destroyToNaiveMatrix());
@@ -138,42 +138,42 @@ public class EvaluateRankingQuality<V extends NumberVector<?>, D extends NumberD
MeanVarianceStaticHistogram hist = new MeanVarianceStaticHistogram(numbins, 0.0, 1.0);
- if (LOG.isVerbose()) {
+ if(LOG.isVerbose()) {
LOG.verbose("Processing points...");
}
FiniteProgress rocloop = LOG.isVerbose() ? new FiniteProgress("Computing ROC AUC values", relation.size(), LOG) : null;
// sort neighbors
- for (Cluster<?> clus : split) {
+ for(Cluster<?> clus : split) {
ArrayList<DoubleDBIDPair> cmem = new ArrayList<>(clus.size());
Vector av = averages.get(clus);
Matrix covm = covmats.get(clus);
- for (DBIDIter iter = clus.getIDs().iter(); iter.valid(); iter.advance()) {
+ for(DBIDIter iter = clus.getIDs().iter(); iter.valid(); iter.advance()) {
double d = MathUtil.mahalanobisDistance(covm, relation.get(iter).getColumnVector().minusEquals(av));
cmem.add(DBIDUtil.newPair(d, iter));
}
Collections.sort(cmem);
- for (int ind = 0; ind < cmem.size(); ind++) {
+ for(int ind = 0; ind < cmem.size(); ind++) {
KNNList<D> knn = knnQuery.getKNNForDBID(cmem.get(ind), relation.size());
double result = ROC.computeROCAUCDistanceResult(relation.size(), clus, knn);
hist.put(((double) ind) / clus.size(), result);
- if (rocloop != null) {
+ if(rocloop != null) {
rocloop.incrementProcessed(LOG);
}
}
}
- if (rocloop != null) {
+ if(rocloop != null) {
rocloop.ensureCompleted(LOG);
}
// Collections.sort(results);
// Transform Histogram into a Double Vector array.
Collection<DoubleVector> res = new ArrayList<>(relation.size());
- for (ObjHistogram.Iter<MeanVariance> iter = hist.iter(); iter.valid(); iter.advance()) {
+ for(ObjHistogram.Iter<MeanVariance> iter = hist.iter(); iter.valid(); iter.advance()) {
DoubleVector row = new DoubleVector(new double[] { iter.getCenter(), iter.getValue().getCount(), iter.getValue().getMean(), iter.getValue().getSampleVariance() });
res.add(row);
}
@@ -207,8 +207,8 @@ public class EvaluateRankingQuality<V extends NumberVector<?>, D extends NumberD
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
final IntParameter param = new IntParameter(HISTOGRAM_BINS_ID, 20);
- param.addConstraint(new GreaterEqualConstraint(2));
- if (config.grab(param)) {
+ param.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
+ if(config.grab(param)) {
numbins = param.getValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/statistics/RankingQualityHistogram.java b/src/de/lmu/ifi/dbs/elki/algorithm/statistics/RankingQualityHistogram.java
index 58018029..7d0f1bb2 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/statistics/RankingQualityHistogram.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/statistics/RankingQualityHistogram.java
@@ -51,7 +51,7 @@ import de.lmu.ifi.dbs.elki.utilities.datastructures.histogram.DoubleStaticHistog
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -109,7 +109,7 @@ public class RankingQualityHistogram<O, D extends NumberDistance<D, ?>> extends
final DistanceQuery<O, D> distanceQuery = database.getDistanceQuery(relation, getDistanceFunction());
final KNNQuery<O, D> knnQuery = database.getKNNQuery(distanceQuery, relation.size());
- if (LOG.isVerbose()) {
+ if(LOG.isVerbose()) {
LOG.verbose("Preprocessing clusters...");
}
// Cluster by labels
@@ -117,33 +117,33 @@ public class RankingQualityHistogram<O, D extends NumberDistance<D, ?>> extends
DoubleStaticHistogram hist = new DoubleStaticHistogram(numbins, 0.0, 1.0);
- if (LOG.isVerbose()) {
+ if(LOG.isVerbose()) {
LOG.verbose("Processing points...");
}
FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Computing ROC AUC values", relation.size(), LOG) : null;
MeanVariance mv = new MeanVariance();
// sort neighbors
- for (Cluster<?> clus : split) {
- for (DBIDIter iter = clus.getIDs().iter(); iter.valid(); iter.advance()) {
+ for(Cluster<?> clus : split) {
+ for(DBIDIter iter = clus.getIDs().iter(); iter.valid(); iter.advance()) {
KNNList<D> knn = knnQuery.getKNNForDBID(iter, relation.size());
double result = ROC.computeROCAUCDistanceResult(relation.size(), clus, knn);
mv.put(result);
hist.increment(result, 1. / relation.size());
- if (progress != null) {
+ if(progress != null) {
progress.incrementProcessed(LOG);
}
}
}
- if (progress != null) {
+ if(progress != null) {
progress.ensureCompleted(LOG);
}
// Transform Histogram into a Double Vector array.
Collection<DoubleVector> res = new ArrayList<>(relation.size());
- for (DoubleStaticHistogram.Iter iter = hist.iter(); iter.valid(); iter.advance()) {
+ for(DoubleStaticHistogram.Iter iter = hist.iter(); iter.valid(); iter.advance()) {
DoubleVector row = new DoubleVector(new double[] { iter.getCenter(), iter.getValue() });
res.add(row);
}
@@ -179,8 +179,8 @@ public class RankingQualityHistogram<O, D extends NumberDistance<D, ?>> extends
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
final IntParameter param = new IntParameter(HISTOGRAM_BINS_ID, 100);
- param.addConstraint(new GreaterEqualConstraint(2));
- if (config.grab(param)) {
+ param.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
+ if(config.grab(param)) {
numbins = param.getValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/application/AbstractApplication.java b/src/de/lmu/ifi/dbs/elki/application/AbstractApplication.java
index db97b4bb..dabf5224 100644
--- a/src/de/lmu/ifi/dbs/elki/application/AbstractApplication.java
+++ b/src/de/lmu/ifi/dbs/elki/application/AbstractApplication.java
@@ -77,7 +77,12 @@ public abstract class AbstractApplication implements Parameterizable {
/**
* Information for citation and version.
*/
- public static final String INFORMATION = "ELKI Version 0.5.5 (2012, December)" + NEWLINE + NEWLINE + "published in:" + NEWLINE + "E. Achtert, S. Goldhofer, H.-P. Kriegel, E. Schubert, A. Zimek:" + NEWLINE + "Evaluation of Clusterings – Metrics and Visual Support." + NEWLINE + "In Proceedings of the 28th" + NEWLINE + "International Conference on Data Engineering (ICDE), Washington, DC, 2012." + NEWLINE;
+ public static final String INFORMATION = "ELKI Version 0.6.0 (2014, January)" + NEWLINE + NEWLINE //
+ + "published in:" + NEWLINE //
+ + "Elke Achtert, Hans-Peter Kriegel, Erich Schubert, Arthur Zimek:" + NEWLINE //
+ + "Interactive Data Mining with 3D-Parallel-Coordinate-Trees." + NEWLINE //
+ + "In Proceedings of the ACM International Conference on " + NEWLINE //
+ + "Management of Data (SIGMOD), New York City, NY, 2013." + NEWLINE;
/**
* Constructor.
@@ -108,55 +113,60 @@ public abstract class AbstractApplication implements Parameterizable {
params.grab(helpLongF);
params.grab(descriptionP);
params.grab(debugP);
- if (descriptionP.isDefined()) {
+ if(descriptionP.isDefined()) {
params.clearErrors();
printDescription(descriptionP.getValue());
return;
}
// Fail silently on errors.
- if (params.getErrors().size() > 0) {
+ if(params.getErrors().size() > 0) {
params.logAndClearReportedErrors();
return;
}
- if (debugP.isDefined()) {
+ if(debugP.isDefined()) {
LoggingUtil.parseDebugParameter(debugP);
}
- } catch (Exception e) {
+ }
+ catch(Exception e) {
printErrorMessage(e);
return;
}
try {
TrackParameters config = new TrackParameters(params);
- if (config.grab(verboseF) && verboseF.isTrue()) {
+ if(config.grab(verboseF) && verboseF.isTrue()) {
// Extra verbosity by repeating the flag:
final Flag verbose2F = new Flag(Parameterizer.VERBOSE_ID);
- if (config.grab(verbose2F) && verbose2F.isTrue()) {
+ if(config.grab(verbose2F) && verbose2F.isTrue()) {
LoggingConfiguration.setVerbose(Level.VERYVERBOSE);
- } else {
+ }
+ else {
LoggingConfiguration.setVerbose(Level.VERBOSE);
}
}
AbstractApplication task = ClassGenericsUtil.tryInstantiate(AbstractApplication.class, cls, config);
- if ((helpF.isDefined() && helpF.getValue()) || (helpLongF.isDefined() && helpLongF.getValue())) {
+ if((helpF.isDefined() && helpF.getValue()) || (helpLongF.isDefined() && helpLongF.getValue())) {
LoggingConfiguration.setVerbose(Level.VERBOSE);
LOG.verbose(usage(config.getAllParameters()));
- } else {
+ }
+ else {
params.logUnusedParameters();
- if (params.getErrors().size() > 0) {
+ if(params.getErrors().size() > 0) {
LoggingConfiguration.setVerbose(Level.VERBOSE);
LOG.verbose("The following configuration errors prevented execution:\n");
- for (ParameterException e : params.getErrors()) {
+ for(ParameterException e : params.getErrors()) {
LOG.verbose(e.getMessage());
}
LOG.verbose("\n");
LOG.verbose("Stopping execution because of configuration errors.");
System.exit(1);
- } else {
+ }
+ else {
task.run();
}
}
- } catch (Exception e) {
+ }
+ catch(Exception e) {
printErrorMessage(e);
}
}
@@ -185,15 +195,18 @@ public abstract class AbstractApplication implements Parameterizable {
* @param e Error Exception.
*/
protected static void printErrorMessage(Exception e) {
- if (e instanceof AbortException) {
+ if(e instanceof AbortException) {
// ensure we actually show the message:
LoggingConfiguration.setVerbose(Level.VERBOSE);
LOG.verbose(e.getMessage());
- } else if (e instanceof UnspecifiedParameterException) {
+ }
+ else if(e instanceof UnspecifiedParameterException) {
LOG.error(e.getMessage());
- } else if (e instanceof ParameterException) {
+ }
+ else if(e instanceof ParameterException) {
LOG.error(e.getMessage());
- } else {
+ }
+ else {
LOG.exception(e);
}
}
@@ -202,7 +215,7 @@ public abstract class AbstractApplication implements Parameterizable {
* Print the description for the given parameter
*/
private static void printDescription(Class<?> descriptionClass) {
- if (descriptionClass != null) {
+ if(descriptionClass != null) {
LoggingConfiguration.setVerbose(Level.VERBOSE);
LOG.verbose(OptionUtil.describeParameterizable(new StringBuilder(), descriptionClass, FormatUtil.getConsoleWidth(), " ").toString());
}
@@ -300,7 +313,7 @@ public abstract class AbstractApplication implements Parameterizable {
protected File getParameterOutputFile(Parameterization config, String description) {
final FileParameter outputP = new FileParameter(OUTPUT_ID, FileParameter.FileType.OUTPUT_FILE);
outputP.setShortDescription(description);
- if (config.grab(outputP)) {
+ if(config.grab(outputP)) {
return outputP.getValue();
}
return null;
@@ -326,7 +339,7 @@ public abstract class AbstractApplication implements Parameterizable {
protected File getParameterInputFile(Parameterization config, String description) {
final FileParameter inputP = new FileParameter(INPUT_ID, FileParameter.FileType.INPUT_FILE);
inputP.setShortDescription(description);
- if (config.grab(inputP)) {
+ if(config.grab(inputP)) {
return inputP.getValue();
}
return null;
diff --git a/src/de/lmu/ifi/dbs/elki/application/ConvertToBundleApplication.java b/src/de/lmu/ifi/dbs/elki/application/ConvertToBundleApplication.java
index 99320838..8b177691 100644
--- a/src/de/lmu/ifi/dbs/elki/application/ConvertToBundleApplication.java
+++ b/src/de/lmu/ifi/dbs/elki/application/ConvertToBundleApplication.java
@@ -28,7 +28,7 @@ import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.channels.FileChannel;
-import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.AbstractDatabase;
import de.lmu.ifi.dbs.elki.datasource.DatabaseConnection;
import de.lmu.ifi.dbs.elki.datasource.FileBasedDatabaseConnection;
import de.lmu.ifi.dbs.elki.datasource.bundle.BundleWriter;
@@ -74,11 +74,11 @@ public class ConvertToBundleApplication extends AbstractApplication {
@Override
public void run() throws UnableToComplyException {
- if (LOG.isVerbose()) {
+ if(LOG.isVerbose()) {
LOG.verbose("Loading data.");
}
MultipleObjectsBundle bundle = input.loadData();
- if (LOG.isVerbose()) {
+ if(LOG.isVerbose()) {
LOG.verbose("Serializing to output file: " + outfile.toString());
}
// TODO: make configurable?
@@ -89,7 +89,8 @@ public class ConvertToBundleApplication extends AbstractApplication {
writer.writeBundleStream(new StreamFromBundle(bundle), channel);
channel.close();
fos.close();
- } catch (IOException e) {
+ }
+ catch(IOException e) {
LOG.exception("IO Error", e);
}
}
@@ -115,8 +116,8 @@ public class ConvertToBundleApplication extends AbstractApplication {
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- ObjectParameter<DatabaseConnection> inputP = new ObjectParameter<>(Database.DATABASE_CONNECTION_ID, DatabaseConnection.class, FileBasedDatabaseConnection.class);
- if (config.grab(inputP)) {
+ ObjectParameter<DatabaseConnection> inputP = new ObjectParameter<>(AbstractDatabase.Parameterizer.DATABASE_CONNECTION_ID, DatabaseConnection.class, FileBasedDatabaseConnection.class);
+ if(config.grab(inputP)) {
input = inputP.instantiateClass(config);
}
outfile = super.getParameterOutputFile(config, "File name to serialize the bundle to.");
diff --git a/src/de/lmu/ifi/dbs/elki/application/ELKILauncher.java b/src/de/lmu/ifi/dbs/elki/application/ELKILauncher.java
index b8d000a3..5c969157 100644
--- a/src/de/lmu/ifi/dbs/elki/application/ELKILauncher.java
+++ b/src/de/lmu/ifi/dbs/elki/application/ELKILauncher.java
@@ -35,6 +35,8 @@ import de.lmu.ifi.dbs.elki.utilities.InspectionUtil;
* Class to launch ELKI.
*
* @author Erich Schubert
+ *
+ * @apiviz.uses AbstractApplication
*/
public class ELKILauncher {
/**
diff --git a/src/de/lmu/ifi/dbs/elki/application/cache/CacheDoubleDistanceKNNLists.java b/src/de/lmu/ifi/dbs/elki/application/cache/CacheDoubleDistanceKNNLists.java
index e9bd4480..d8ddbf17 100644
--- a/src/de/lmu/ifi/dbs/elki/application/cache/CacheDoubleDistanceKNNLists.java
+++ b/src/de/lmu/ifi/dbs/elki/application/cache/CacheDoubleDistanceKNNLists.java
@@ -48,7 +48,7 @@ import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.persistent.ByteArrayUtil;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.FileParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -135,13 +135,13 @@ public class CacheDoubleDistanceKNNLists<O, D extends NumberDistance<D, ?>> exte
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Computing kNN", relation.size(), LOG) : null;
- for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
+ for(DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
final KNNList<D> nn = knnQ.getKNNForDBID(it, k);
final int nnsize = nn.size();
// Grow the buffer when needed:
- if (nnsize * 12 + 10 > bufsize) {
- while (nnsize * 12 + 10 > bufsize) {
+ if(nnsize * 12 + 10 > bufsize) {
+ while(nnsize * 12 + 10 > bufsize) {
bufsize <<= 1;
}
buffer = ByteBuffer.allocateDirect(bufsize);
@@ -151,32 +151,34 @@ public class CacheDoubleDistanceKNNLists<O, D extends NumberDistance<D, ?>> exte
ByteArrayUtil.writeUnsignedVarint(buffer, it.internalGetIndex());
ByteArrayUtil.writeUnsignedVarint(buffer, nnsize);
int c = 0;
- if (nn instanceof DoubleDistanceDBIDList) {
- for (DoubleDistanceDBIDListIter ni = ((DoubleDistanceDBIDList) nn).iter(); ni.valid(); ni.advance(), c++) {
+ if(nn instanceof DoubleDistanceDBIDList) {
+ for(DoubleDistanceDBIDListIter ni = ((DoubleDistanceDBIDList) nn).iter(); ni.valid(); ni.advance(), c++) {
ByteArrayUtil.writeUnsignedVarint(buffer, ni.internalGetIndex());
buffer.putDouble(ni.doubleDistance());
}
- } else {
- for (DistanceDBIDListIter<D> ni = nn.iter(); ni.valid(); ni.advance(), c++) {
+ }
+ else {
+ for(DistanceDBIDListIter<D> ni = nn.iter(); ni.valid(); ni.advance(), c++) {
ByteArrayUtil.writeUnsignedVarint(buffer, ni.internalGetIndex());
buffer.putDouble(ni.getDistance().doubleValue());
}
}
- if (c != nn.size()) {
+ if(c != nn.size()) {
throw new AbortException("Sizes did not agree. Cache is invalid.");
}
buffer.flip();
channel.write(buffer);
- if (prog != null) {
+ if(prog != null) {
prog.incrementProcessed(LOG);
}
}
- if (prog != null) {
+ if(prog != null) {
prog.ensureCompleted(LOG);
}
lock.release();
- } catch (IOException e) {
+ }
+ catch(IOException e) {
LOG.exception(e);
}
// FIXME: close!
@@ -240,17 +242,17 @@ public class CacheDoubleDistanceKNNLists<O, D extends NumberDistance<D, ?>> exte
input = config.tryInstantiate(InputStep.class);
// Distance function parameter
final ObjectParameter<DistanceFunction<O, D>> dpar = new ObjectParameter<>(DISTANCE_ID, DistanceFunction.class);
- if (config.grab(dpar)) {
+ if(config.grab(dpar)) {
distance = dpar.instantiateClass(config);
}
final IntParameter kpar = new IntParameter(K_ID);
- kpar.addConstraint(new GreaterEqualConstraint(1));
- if (config.grab(kpar)) {
+ kpar.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
+ if(config.grab(kpar)) {
k = kpar.intValue();
}
// Output file parameter
final FileParameter cpar = new FileParameter(CACHE_ID, FileParameter.FileType.OUTPUT_FILE);
- if (config.grab(cpar)) {
+ if(config.grab(cpar)) {
out = cpar.getValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/application/cache/CacheDoubleDistanceRangeQueries.java b/src/de/lmu/ifi/dbs/elki/application/cache/CacheDoubleDistanceRangeQueries.java
index f9434820..f61874dd 100644
--- a/src/de/lmu/ifi/dbs/elki/application/cache/CacheDoubleDistanceRangeQueries.java
+++ b/src/de/lmu/ifi/dbs/elki/application/cache/CacheDoubleDistanceRangeQueries.java
@@ -48,7 +48,7 @@ import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.persistent.ByteArrayUtil;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.FileParameter;
@@ -249,7 +249,7 @@ public class CacheDoubleDistanceRangeQueries<O> extends AbstractApplication {
distance = dpar.instantiateClass(config);
}
final DoubleParameter kpar = new DoubleParameter(RADIUS_ID);
- kpar.addConstraint(new GreaterEqualConstraint(0.));
+ kpar.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE);
if (config.grab(kpar)) {
radius = kpar.doubleValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/application/greedyensemble/ComputeKNNOutlierScores.java b/src/de/lmu/ifi/dbs/elki/application/greedyensemble/ComputeKNNOutlierScores.java
index 9dd9ddae..a890172c 100644
--- a/src/de/lmu/ifi/dbs/elki/application/greedyensemble/ComputeKNNOutlierScores.java
+++ b/src/de/lmu/ifi/dbs/elki/application/greedyensemble/ComputeKNNOutlierScores.java
@@ -30,7 +30,6 @@ import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
-import de.lmu.ifi.dbs.elki.algorithm.outlier.ABOD;
import de.lmu.ifi.dbs.elki.algorithm.outlier.KNNOutlier;
import de.lmu.ifi.dbs.elki.algorithm.outlier.KNNWeightOutlier;
import de.lmu.ifi.dbs.elki.algorithm.outlier.lof.LDF;
@@ -42,7 +41,6 @@ import de.lmu.ifi.dbs.elki.algorithm.outlier.trivial.ByLabelOutlier;
import de.lmu.ifi.dbs.elki.algorithm.outlier.trivial.TrivialAllOutlier;
import de.lmu.ifi.dbs.elki.algorithm.outlier.trivial.TrivialNoOutlier;
import de.lmu.ifi.dbs.elki.application.AbstractApplication;
-import de.lmu.ifi.dbs.elki.data.DoubleVector;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.QueryUtil;
@@ -54,9 +52,7 @@ import de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
-import de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.PolynomialKernelFunction;
import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions.GaussianKernelDensityFunction;
@@ -66,7 +62,7 @@ import de.lmu.ifi.dbs.elki.utilities.FormatUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
@@ -133,11 +129,6 @@ public class ComputeKNNOutlierScores<O extends NumberVector<?>, D extends Number
ByLabelOutlier bylabel;
/**
- * Include ABOD in the experiments.
- */
- boolean runabod = false;
-
- /**
* Scaling function.
*/
ScalingFunction scaling;
@@ -173,7 +164,7 @@ public class ComputeKNNOutlierScores<O extends NumberVector<?>, D extends Number
// If there is no kNN preprocessor already, then precompute.
KNNQuery<O, D> knnq = QueryUtil.getKNNQuery(relation, distf, maxk + 2);
- if (!(knnq instanceof PreprocessorKNNQuery)) {
+ if(!(knnq instanceof PreprocessorKNNQuery)) {
LOG.verbose("Running preprocessor ...");
MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<>(relation, distf, maxk + 2);
database.addIndex(preproc);
@@ -181,7 +172,7 @@ public class ComputeKNNOutlierScores<O extends NumberVector<?>, D extends Number
// Test that we now get a proper index query
knnq = QueryUtil.getKNNQuery(relation, distf, maxk + 2);
- if (!(knnq instanceof PreprocessorKNNQuery)) {
+ if(!(knnq instanceof PreprocessorKNNQuery)) {
LOG.warning("Not using preprocessor knn query -- KNN queries using class: " + knnq.getClass());
}
@@ -190,21 +181,23 @@ public class ComputeKNNOutlierScores<O extends NumberVector<?>, D extends Number
final PrintStream fout;
try {
fout = new PrintStream(outfile);
- } catch (FileNotFoundException e) {
+ }
+ catch(FileNotFoundException e) {
throw new AbortException("Cannot create output file.", e);
}
// Control: print the DBIDs in case we are seeing an odd iteration
{
try {
MessageDigest md = MessageDigest.getInstance("MD5");
- for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
md.update((byte) ' ');
md.update(DBIDUtil.toString(iter).getBytes());
}
fout.append("# DBID-series MD5:");
fout.append(Base64.encodeBase64(md.digest()));
fout.append(FormatUtil.NEWLINE);
- } catch (NoSuchAlgorithmException e) {
+ }
+ catch(NoSuchAlgorithmException e) {
throw new AbortException("MD5 not found.");
}
}
@@ -216,7 +209,7 @@ public class ComputeKNNOutlierScores<O extends NumberVector<?>, D extends Number
}
// No/all outliers "results"
boolean withdummy = false;
- if (withdummy) {
+ if(withdummy) {
OutlierResult noresult = (new TrivialNoOutlier()).run(database);
writeResult(fout, ids, noresult, new IdentityScaling(), "no-outliers");
OutlierResult allresult = (new TrivialAllOutlier()).run(database);
@@ -280,17 +273,18 @@ public class ComputeKNNOutlierScores<O extends NumberVector<?>, D extends Number
});
// LDOF
boolean runldof = false;
- if (runldof) {
- LOG.verbose("Running LDOF");
- runForEachK(new AlgRunner() {
- @Override
- public void run(int k, String kstr) {
- LDOF<O, D> ldof = new LDOF<>(distf, k + 1);
- OutlierResult ldofresult = ldof.run(database, relation);
- writeResult(fout, ids, ldofresult, scaling, "LDOF-" + kstr);
- database.getHierarchy().removeSubtree(ldofresult);
- }
- });}
+ if(runldof) {
+ LOG.verbose("Running LDOF");
+ runForEachK(new AlgRunner() {
+ @Override
+ public void run(int k, String kstr) {
+ LDOF<O, D> ldof = new LDOF<>(distf, k + 1);
+ OutlierResult ldofresult = ldof.run(database, relation);
+ writeResult(fout, ids, ldofresult, scaling, "LDOF-" + kstr);
+ database.getHierarchy().removeSubtree(ldofresult);
+ }
+ });
+ }
// Run LDF
LOG.verbose("Running LDF");
runForEachK(new AlgRunner() {
@@ -302,27 +296,6 @@ public class ComputeKNNOutlierScores<O extends NumberVector<?>, D extends Number
database.getHierarchy().removeSubtree(ldfresult);
}
});
- // ABOD
- if (runabod && relation.size() < 10000) {
- try {
- final PolynomialKernelFunction poly = new PolynomialKernelFunction(PolynomialKernelFunction.DEFAULT_DEGREE);
- @SuppressWarnings("unchecked")
- final DistanceFunction<DoubleVector, DoubleDistance> df = DistanceFunction.class.cast(distf);
- LOG.verbose("Running ABOD");
- runForEachK(new AlgRunner() {
- @Override
- public void run(int k, String kstr) {
- ABOD<DoubleVector> abod = new ABOD<>(k, poly, df);
- OutlierResult abodresult = abod.run(database);
- writeResult(fout, ids, abodresult, scaling, "ABOD-" + kstr);
- database.getHierarchy().removeSubtree(abodresult);
- }
- });
- } catch (ClassCastException e) {
- // ABOD might just be not appropriate.
- LOG.warning("Running ABOD failed - probably not appropriate to this data type / distance?", e);
- }
- }
}
/**
@@ -335,14 +308,14 @@ public class ComputeKNNOutlierScores<O extends NumberVector<?>, D extends Number
* @param label Identification label
*/
void writeResult(PrintStream out, DBIDs ids, OutlierResult result, ScalingFunction scaling, String label) {
- if (scaling instanceof OutlierScalingFunction) {
+ if(scaling instanceof OutlierScalingFunction) {
((OutlierScalingFunction) scaling).prepare(result);
}
out.append(label);
Relation<Double> scores = result.getScores();
- for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
double value = scores.get(iter);
- if (scaling != null) {
+ if(scaling != null) {
value = scaling.getScaled(value);
}
out.append(' ').append(FormatUtil.NF.format(value));
@@ -358,7 +331,7 @@ public class ComputeKNNOutlierScores<O extends NumberVector<?>, D extends Number
private void runForEachK(AlgRunner runner) {
final int digits = (int) Math.ceil(Math.log10(maxk));
final int startk = (this.startk > 0) ? this.startk : this.stepk;
- for (int k = startk; k <= maxk; k += stepk) {
+ for(int k = startk; k <= maxk; k += stepk) {
String kstr = String.format("%0" + digits + "d", k);
runner.run(k, kstr);
}
@@ -450,25 +423,26 @@ public class ComputeKNNOutlierScores<O extends NumberVector<?>, D extends Number
inputstep = config.tryInstantiate(InputStep.class);
// Distance function
ObjectParameter<DistanceFunction<? super O, D>> distP = AbstractAlgorithm.makeParameterDistanceFunction(EuclideanDistanceFunction.class, DistanceFunction.class);
- if (config.grab(distP)) {
+ if(config.grab(distP)) {
distf = distP.instantiateClass(config);
}
// k parameters
IntParameter stepkP = new IntParameter(STEPK_ID);
- stepkP.addConstraint(new GreaterConstraint(0));
- if (config.grab(stepkP)) {
+ stepkP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
+ if(config.grab(stepkP)) {
stepk = stepkP.getValue();
}
IntParameter startkP = new IntParameter(STARTK_ID);
startkP.setOptional(true);
- if (config.grab(startkP)) {
+ if(config.grab(startkP)) {
startk = startkP.getValue();
- } else {
+ }
+ else {
startk = stepk;
}
IntParameter maxkP = new IntParameter(MAXK_ID);
- maxkP.addConstraint(new GreaterConstraint(0));
- if (config.grab(maxkP)) {
+ maxkP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
+ if(config.grab(maxkP)) {
maxk = maxkP.getValue();
}
bylabel = config.tryInstantiate(ByLabelOutlier.class);
@@ -477,7 +451,7 @@ public class ComputeKNNOutlierScores<O extends NumberVector<?>, D extends Number
ObjectParameter<ScalingFunction> scalingP = new ObjectParameter<>(SCALING_ID, ScalingFunction.class);
scalingP.setOptional(true);
- if (config.grab(scalingP)) {
+ if(config.grab(scalingP)) {
scaling = scalingP.instantiateClass(config);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/application/jsmap/JSONResultHandler.java b/src/de/lmu/ifi/dbs/elki/application/jsmap/JSONResultHandler.java
index 9b1e723d..16331b02 100644
--- a/src/de/lmu/ifi/dbs/elki/application/jsmap/JSONResultHandler.java
+++ b/src/de/lmu/ifi/dbs/elki/application/jsmap/JSONResultHandler.java
@@ -28,7 +28,7 @@ import de.lmu.ifi.dbs.elki.result.Result;
import de.lmu.ifi.dbs.elki.result.ResultHandler;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessEqualConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -92,7 +92,7 @@ public class JSONResultHandler implements ResultHandler {
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
IntParameter portP = new IntParameter(PORT_ID, port);
- portP.addConstraint(new GreaterEqualConstraint(1));
+ portP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
portP.addConstraint(new LessEqualConstraint(65535));
if(config.grab(portP)) {
this.port = portP.getValue();
diff --git a/src/de/lmu/ifi/dbs/elki/data/Bit.java b/src/de/lmu/ifi/dbs/elki/data/Bit.java
index 219ac2dd..e39d9f42 100644
--- a/src/de/lmu/ifi/dbs/elki/data/Bit.java
+++ b/src/de/lmu/ifi/dbs/elki/data/Bit.java
@@ -42,18 +42,39 @@ public class Bit extends Number {
public static final Pattern BIT_PATTERN = Pattern.compile("^[01]$");
/**
+ * True bit.
+ */
+ public static final Bit TRUE = new Bit(true);
+
+ /**
+ * False bit.
+ */
+ public static final Bit FALSE = new Bit(false);
+
+ /**
* Method to construct a Bit for a given String expression.
*
* @param bit a String expression defining a Bit
* @return a Bit as defined by the given String expression
* @throws NumberFormatException if the given String expression does not fit
- * to the Pattern {@link #BIT_PATTERN BIT_PATTERN}
+ * the defined pattern.
*/
public static Bit valueOf(String bit) throws NumberFormatException {
- if(!BIT_PATTERN.matcher(bit).matches()) {
- throw new NumberFormatException("Input \"" + bit + "\" does not fit required pattern: " + BIT_PATTERN.pattern());
+ final int i = Integer.parseInt(bit);
+ if(i != 0 && i != 1) {
+ throw new NumberFormatException("Input \"" + bit + "\" must be 0 or 1.");
}
- return new Bit(Integer.parseInt(bit));
+ return (i > 0) ? TRUE : FALSE;
+ }
+
+ /**
+ * Convert truth value to a bit.
+ *
+ * @param b Truth value
+ * @return Bit
+ */
+ public static Bit valueOf(boolean b) {
+ return b ? TRUE : FALSE;
}
/**
@@ -65,7 +86,10 @@ public class Bit extends Number {
* Provides a new bit according to the specified boolean value.
*
* @param bit the boolean value of this bit
+ *
+ * @deprecated Use {@link Bit#valueOf} to save memory.
*/
+ @Deprecated
public Bit(boolean bit) {
this.bit = bit;
}
@@ -76,12 +100,14 @@ public class Bit extends Number {
*
* @param bit 1 for true and 0 for false
* @throws IllegalArgumentException if the specified value is neither 0 nor 1.
+ * @deprecated Use {@link Bit#valueOf} to save memory.
*/
+ @Deprecated
public Bit(int bit) throws IllegalArgumentException {
if(bit != 0 && bit != 1) {
throw new IllegalArgumentException("Required: 0 or 1 - found: " + bit);
}
- this.bit = bit == 1;
+ this.bit = (bit == 1);
}
/**
diff --git a/src/de/lmu/ifi/dbs/elki/data/BitVector.java b/src/de/lmu/ifi/dbs/elki/data/BitVector.java
index 750f6f5b..de0edc2d 100644
--- a/src/de/lmu/ifi/dbs/elki/data/BitVector.java
+++ b/src/de/lmu/ifi/dbs/elki/data/BitVector.java
@@ -38,6 +38,8 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
* Provides a BitVector wrapping a BitSet.
*
* @author Arthur Zimek
+ *
+ * @apiviz.composedOf Bit
*/
public class BitVector extends AbstractNumberVector<Bit> {
/**
@@ -178,7 +180,7 @@ public class BitVector extends AbstractNumberVector<Bit> {
public String toString() {
Bit[] bitArray = new Bit[dimensionality];
for (int i = 0; i < dimensionality; i++) {
- bitArray[i] = new Bit(bits.get(i));
+ bitArray[i] = bits.get(i) ? Bit.TRUE : Bit.FALSE;
}
StringBuilder representation = new StringBuilder();
for (Bit bit : bitArray) {
diff --git a/src/de/lmu/ifi/dbs/elki/data/ByteVector.java b/src/de/lmu/ifi/dbs/elki/data/ByteVector.java
new file mode 100644
index 00000000..10442a3f
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/data/ByteVector.java
@@ -0,0 +1,273 @@
+package de.lmu.ifi.dbs.elki.data;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
+import de.lmu.ifi.dbs.elki.persistent.ByteArrayUtil;
+import de.lmu.ifi.dbs.elki.persistent.ByteBufferSerializer;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayAdapter;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+
+/**
+ * A ByteVector stores the data using bytes. This is beneficial e.g. when using
+ * SIFT vectors based on byte values.
+ *
+ * @author Erich Schubert
+ */
+public class ByteVector extends AbstractNumberVector<Byte> {
+ /**
+ * Static instance (object factory).
+ */
+ public static final ByteVector.Factory STATIC = new ByteVector.Factory();
+
+ /**
+ * Serializer for up to 127 dimensions.
+ */
+ public static final ByteBufferSerializer<ByteVector> BYTE_SERIALIZER = new SmallSerializer();
+
+ /**
+ * Serializer for up to 2^15-1 dimensions.
+ */
+ public static final ByteBufferSerializer<ByteVector> SHORT_SERIALIZER = new ShortSerializer();
+
+ /**
+ * Keeps the values of the real vector.
+ */
+ private final byte[] values;
+
+ /**
+ * Private constructor. NOT for public use.
+ *
+ * @param values Value data
+ * @param nocopy Flag to use without copying.
+ */
+ private ByteVector(byte[] values, boolean nocopy) {
+ if (nocopy) {
+ this.values = values;
+ } else {
+ this.values = new byte[values.length];
+ System.arraycopy(values, 0, this.values, 0, values.length);
+ }
+ }
+
+ /**
+ * Provides an ByteVector consisting of the given Byte values.
+ *
+ * @param values the values to be set as values of the ByteVector
+ */
+ public ByteVector(byte[] values) {
+ this.values = new byte[values.length];
+ System.arraycopy(values, 0, this.values, 0, values.length);
+ }
+
+ @Override
+ public int getDimensionality() {
+ return values.length;
+ }
+
+ @Override
+ @Deprecated
+ public Byte getValue(int dimension) {
+ return Byte.valueOf(values[dimension]);
+ }
+
+ @Override
+ public double doubleValue(int dimension) {
+ return values[dimension];
+ }
+
+ @Override
+ public long longValue(int dimension) {
+ return values[dimension];
+ }
+
+ @Override
+ public byte byteValue(int dimension) {
+ return values[dimension];
+ }
+
+ /**
+ * Get a copy of the raw byte[] array.
+ *
+ * @return copy of values array.
+ */
+ public byte[] getValues() {
+ byte[] copy = new byte[values.length];
+ System.arraycopy(values, 0, copy, 0, values.length);
+ return copy;
+ }
+
+ @Override
+ public Vector getColumnVector() {
+ double[] data = new double[values.length];
+ for (int i = 0; i < values.length; i++) {
+ data[i] = values[i];
+ }
+ return new Vector(data);
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder featureLine = new StringBuilder();
+ for (int i = 0; i < values.length; i++) {
+ featureLine.append(values[i]);
+ if (i + 1 < values.length) {
+ featureLine.append(ATTRIBUTE_SEPARATOR);
+ }
+ }
+ return featureLine.toString();
+ }
+
+ /**
+ * Factory for Byte vectors.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.has ByteVector
+ */
+ public static class Factory extends AbstractNumberVector.Factory<ByteVector, Byte> {
+ @Override
+ public <A> ByteVector newFeatureVector(A array, ArrayAdapter<Byte, A> adapter) {
+ int dim = adapter.size(array);
+ byte[] values = new byte[dim];
+ for (int i = 0; i < dim; i++) {
+ values[i] = adapter.get(array, i);
+ }
+ return new ByteVector(values, true);
+ }
+
+ @Override
+ public <A> ByteVector newNumberVector(A array, NumberArrayAdapter<?, ? super A> adapter) {
+ int dim = adapter.size(array);
+ byte[] values = new byte[dim];
+ for (int i = 0; i < dim; i++) {
+ values[i] = adapter.getByte(array, i);
+ }
+ return new ByteVector(values, true);
+ }
+
+ @Override
+ public ByteBufferSerializer<ByteVector> getDefaultSerializer() {
+ return SHORT_SERIALIZER;
+ }
+
+ @Override
+ public Class<? super ByteVector> getRestrictionClass() {
+ return ByteVector.class;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ @Override
+ protected ByteVector.Factory makeInstance() {
+ return STATIC;
+ }
+ }
+ }
+
+ /**
+ * Serialization class for dense Byte vectors with up to 127 dimensions, by
+ * using a byte for storing the dimensionality.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.uses ByteVector - - «serializes»
+ */
+ public static class SmallSerializer implements ByteBufferSerializer<ByteVector> {
+ @Override
+ public ByteVector fromByteBuffer(ByteBuffer buffer) throws IOException {
+ final byte dimensionality = buffer.get();
+ assert (buffer.remaining() >= ByteArrayUtil.SIZE_BYTE * dimensionality);
+ final byte[] values = new byte[dimensionality];
+ for (int i = 0; i < dimensionality; i++) {
+ values[i] = buffer.get();
+ }
+ return new ByteVector(values, true);
+ }
+
+ @Override
+ public void toByteBuffer(ByteBuffer buffer, ByteVector vec) throws IOException {
+ assert (vec.values.length < Byte.MAX_VALUE) : "This serializer only supports a maximum dimensionality of " + Byte.MAX_VALUE + "!";
+ assert (buffer.remaining() >= ByteArrayUtil.SIZE_BYTE * vec.values.length);
+ buffer.put((byte) vec.values.length);
+ for (int i = 0; i < vec.values.length; i++) {
+ buffer.put(vec.values[i]);
+ }
+ }
+
+ @Override
+ public int getByteSize(ByteVector vec) {
+ assert (vec.values.length < Byte.MAX_VALUE) : "This serializer only supports a maximum dimensionality of " + Byte.MAX_VALUE + "!";
+ return ByteArrayUtil.SIZE_BYTE + ByteArrayUtil.SIZE_BYTE * vec.getDimensionality();
+ }
+ }
+
+ /**
+ * Serialization class for dense Byte vectors with up to
+ * {@link Short#MAX_VALUE} dimensions, by using a short for storing the
+ * dimensionality.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.uses ByteVector - - «serializes»
+ */
+ public static class ShortSerializer implements ByteBufferSerializer<ByteVector> {
+ @Override
+ public ByteVector fromByteBuffer(ByteBuffer buffer) throws IOException {
+ final short dimensionality = buffer.getShort();
+ assert (buffer.remaining() >= ByteArrayUtil.SIZE_BYTE * dimensionality);
+ final byte[] values = new byte[dimensionality];
+ for (int i = 0; i < dimensionality; i++) {
+ values[i] = buffer.get();
+ }
+ return new ByteVector(values, true);
+ }
+
+ @Override
+ public void toByteBuffer(ByteBuffer buffer, ByteVector vec) throws IOException {
+ assert (vec.values.length < Short.MAX_VALUE) : "This serializer only supports a maximum dimensionality of " + Short.MAX_VALUE + "!";
+ assert (buffer.remaining() >= ByteArrayUtil.SIZE_BYTE * vec.values.length);
+ buffer.putShort((short) vec.values.length);
+ for (int i = 0; i < vec.values.length; i++) {
+ buffer.put(vec.values[i]);
+ }
+ }
+
+ @Override
+ public int getByteSize(ByteVector vec) {
+ assert (vec.values.length < Short.MAX_VALUE) : "This serializer only supports a maximum dimensionality of " + Short.MAX_VALUE + "!";
+ return ByteArrayUtil.SIZE_SHORT + ByteArrayUtil.SIZE_BYTE * vec.getDimensionality();
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/data/DoubleVector.java b/src/de/lmu/ifi/dbs/elki/data/DoubleVector.java
index 6c13c342..4f427d04 100644
--- a/src/de/lmu/ifi/dbs/elki/data/DoubleVector.java
+++ b/src/de/lmu/ifi/dbs/elki/data/DoubleVector.java
@@ -27,8 +27,6 @@ import gnu.trove.list.TDoubleList;
import java.io.IOException;
import java.nio.ByteBuffer;
-import java.util.Iterator;
-import java.util.List;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
import de.lmu.ifi.dbs.elki.persistent.ByteArrayUtil;
@@ -78,26 +76,7 @@ public class DoubleVector extends AbstractNumberVector<Double> {
* @param nocopy Flag to not copy the array
*/
private DoubleVector(double[] values, boolean nocopy) {
- if (nocopy) {
- this.values = values;
- } else {
- this.values = new double[values.length];
- System.arraycopy(values, 0, this.values, 0, values.length);
- }
- }
-
- /**
- * Provides a feature vector consisting of double values according to the
- * given Double values.
- *
- * @param values the values to be set as values of the real vector
- */
- public DoubleVector(List<Double> values) {
- int i = 0;
- this.values = new double[values.size()];
- for (Iterator<Double> iter = values.iterator(); iter.hasNext(); i++) {
- this.values[i] = (iter.next());
- }
+ this.values = nocopy ? values : values.clone();
}
/**
@@ -106,20 +85,7 @@ public class DoubleVector extends AbstractNumberVector<Double> {
* @param values the values to be set as values of the DoubleVector
*/
public DoubleVector(double[] values) {
- this.values = new double[values.length];
- System.arraycopy(values, 0, this.values, 0, values.length);
- }
-
- /**
- * Provides a DoubleVector consisting of the given double values.
- *
- * @param values the values to be set as values of the DoubleVector
- */
- public DoubleVector(Double[] values) {
- this.values = new double[values.length];
- for (int i = 0; i < values.length; i++) {
- this.values[i] = values[i];
- }
+ this.values = values.clone();
}
/**
@@ -128,10 +94,7 @@ public class DoubleVector extends AbstractNumberVector<Double> {
* @param columnMatrix a matrix of one column
*/
public DoubleVector(Vector columnMatrix) {
- values = new double[columnMatrix.getDimensionality()];
- for (int i = 0; i < values.length; i++) {
- values[i] = columnMatrix.get(i);
- }
+ this.values = columnMatrix.getArrayCopy();
}
@Override
@@ -161,9 +124,7 @@ public class DoubleVector extends AbstractNumberVector<Double> {
* @return copy of values array.
*/
public double[] getValues() {
- double[] copy = new double[values.length];
- System.arraycopy(values, 0, copy, 0, values.length);
- return copy;
+ return values.clone();
}
@Override
@@ -176,9 +137,9 @@ public class DoubleVector extends AbstractNumberVector<Double> {
@Override
public String toString() {
StringBuilder featureLine = new StringBuilder();
- for (int i = 0; i < values.length; i++) {
+ for(int i = 0; i < values.length; i++) {
featureLine.append(values[i]);
- if (i + 1 < values.length) {
+ if(i + 1 < values.length) {
featureLine.append(ATTRIBUTE_SEPARATOR);
}
}
@@ -202,7 +163,7 @@ public class DoubleVector extends AbstractNumberVector<Double> {
public <A> DoubleVector newFeatureVector(A array, ArrayAdapter<Double, A> adapter) {
int dim = adapter.size(array);
double[] values = new double[dim];
- for (int i = 0; i < dim; i++) {
+ for(int i = 0; i < dim; i++) {
values[i] = adapter.get(array, i);
}
return new DoubleVector(values, true);
@@ -210,12 +171,12 @@ public class DoubleVector extends AbstractNumberVector<Double> {
@Override
public <A> DoubleVector newNumberVector(A array, NumberArrayAdapter<?, ? super A> adapter) {
- if (adapter == ArrayLikeUtil.TDOUBLELISTADAPTER) {
+ if(adapter == ArrayLikeUtil.TDOUBLELISTADAPTER) {
return new DoubleVector(((TDoubleList) array).toArray(), true);
}
final int dim = adapter.size(array);
double[] values = new double[dim];
- for (int i = 0; i < dim; i++) {
+ for(int i = 0; i < dim; i++) {
values[i] = adapter.getDouble(array, i);
}
return new DoubleVector(values, true);
@@ -260,7 +221,7 @@ public class DoubleVector extends AbstractNumberVector<Double> {
final byte dimensionality = buffer.get();
assert (buffer.remaining() >= ByteArrayUtil.SIZE_DOUBLE * dimensionality);
final double[] values = new double[dimensionality];
- for (int i = 0; i < dimensionality; i++) {
+ for(int i = 0; i < dimensionality; i++) {
values[i] = buffer.getDouble();
}
return new DoubleVector(values, true);
@@ -271,7 +232,7 @@ public class DoubleVector extends AbstractNumberVector<Double> {
assert (vec.values.length < Byte.MAX_VALUE) : "This serializer only supports a maximum dimensionality of " + Byte.MAX_VALUE + "!";
assert (buffer.remaining() >= ByteArrayUtil.SIZE_DOUBLE * vec.values.length);
buffer.put((byte) vec.values.length);
- for (int i = 0; i < vec.values.length; i++) {
+ for(int i = 0; i < vec.values.length; i++) {
buffer.putDouble(vec.values[i]);
}
}
@@ -298,7 +259,7 @@ public class DoubleVector extends AbstractNumberVector<Double> {
final short dimensionality = buffer.getShort();
assert (buffer.remaining() >= ByteArrayUtil.SIZE_DOUBLE * dimensionality);
final double[] values = new double[dimensionality];
- for (int i = 0; i < dimensionality; i++) {
+ for(int i = 0; i < dimensionality; i++) {
values[i] = buffer.getDouble();
}
return new DoubleVector(values, true);
@@ -309,7 +270,7 @@ public class DoubleVector extends AbstractNumberVector<Double> {
assert (vec.values.length < Short.MAX_VALUE) : "This serializer only supports a maximum dimensionality of " + Short.MAX_VALUE + "!";
assert (buffer.remaining() >= ByteArrayUtil.SIZE_DOUBLE * vec.values.length);
buffer.putShort((short) vec.values.length);
- for (int i = 0; i < vec.values.length; i++) {
+ for(int i = 0; i < vec.values.length; i++) {
buffer.putDouble(vec.values[i]);
}
}
@@ -334,7 +295,7 @@ public class DoubleVector extends AbstractNumberVector<Double> {
final int dimensionality = ByteArrayUtil.readUnsignedVarint(buffer);
assert (buffer.remaining() >= ByteArrayUtil.SIZE_DOUBLE * dimensionality);
final double[] values = new double[dimensionality];
- for (int i = 0; i < dimensionality; i++) {
+ for(int i = 0; i < dimensionality; i++) {
values[i] = buffer.getDouble();
}
return new DoubleVector(values, true);
@@ -344,7 +305,7 @@ public class DoubleVector extends AbstractNumberVector<Double> {
public void toByteBuffer(ByteBuffer buffer, DoubleVector vec) throws IOException {
assert (buffer.remaining() >= ByteArrayUtil.SIZE_DOUBLE * vec.values.length);
ByteArrayUtil.writeUnsignedVarint(buffer, vec.values.length);
- for (int i = 0; i < vec.values.length; i++) {
+ for(int i = 0; i < vec.values.length; i++) {
buffer.putDouble(vec.values[i]);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/data/FloatVector.java b/src/de/lmu/ifi/dbs/elki/data/FloatVector.java
index d750af01..6db8cd24 100644
--- a/src/de/lmu/ifi/dbs/elki/data/FloatVector.java
+++ b/src/de/lmu/ifi/dbs/elki/data/FloatVector.java
@@ -25,8 +25,6 @@ package de.lmu.ifi.dbs.elki.data;
import java.io.IOException;
import java.nio.ByteBuffer;
-import java.util.Iterator;
-import java.util.List;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
import de.lmu.ifi.dbs.elki.persistent.ByteArrayUtil;
@@ -37,7 +35,8 @@ import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
/**
- * A FloatVector is to store real values with lower memory requirements by using float values.
+ * A FloatVector is to store real values with lower memory requirements by using
+ * float values.
*
* @author Elke Achtert
*/
@@ -74,26 +73,7 @@ public class FloatVector extends AbstractNumberVector<Float> {
* @param nocopy Flag to re-use the values array
*/
private FloatVector(float[] values, boolean nocopy) {
- if (nocopy) {
- this.values = values;
- } else {
- this.values = new float[values.length];
- System.arraycopy(values, 0, this.values, 0, values.length);
- }
- }
-
- /**
- * Provides a FloatVector consisting of float values according to the given
- * Float values.
- *
- * @param values the values to be set as values of the float vector
- */
- public FloatVector(List<Float> values) {
- int i = 0;
- this.values = new float[values.size()];
- for (Iterator<Float> iter = values.iterator(); iter.hasNext(); i++) {
- this.values[i] = (iter.next());
- }
+ this.values = nocopy ? values : values.clone();
}
/**
@@ -102,20 +82,7 @@ public class FloatVector extends AbstractNumberVector<Float> {
* @param values the values to be set as values of the float vector
*/
public FloatVector(float[] values) {
- this.values = new float[values.length];
- System.arraycopy(values, 0, this.values, 0, values.length);
- }
-
- /**
- * Provides a FloatVector consisting of the given float values.
- *
- * @param values the values to be set as values of the float vector
- */
- public FloatVector(Float[] values) {
- this.values = new float[values.length];
- for (int i = 0; i < values.length; i++) {
- this.values[i] = values[i];
- }
+ this.values = values.clone();
}
/**
@@ -124,9 +91,10 @@ public class FloatVector extends AbstractNumberVector<Float> {
* @param columnMatrix a matrix of one column
*/
public FloatVector(Vector columnMatrix) {
- values = new float[columnMatrix.getDimensionality()];
- for (int i = 0; i < values.length; i++) {
- values[i] = (float) columnMatrix.get(i);
+ final double[] src = columnMatrix.getArrayRef();
+ values = new float[src.length];
+ for(int i = 0; i < src.length; i++) {
+ values[i] = (float) src[i];
}
}
@@ -138,29 +106,17 @@ public class FloatVector extends AbstractNumberVector<Float> {
@Deprecated
@Override
public Float getValue(int dimension) {
- try {
- return values[dimension - 1];
- } catch (ArrayIndexOutOfBoundsException e) {
- throw new IllegalArgumentException("Dimension " + dimension + " out of range.");
- }
+ return values[dimension];
}
@Override
public double doubleValue(int dimension) {
- try {
- return values[dimension - 1];
- } catch (ArrayIndexOutOfBoundsException e) {
- throw new IllegalArgumentException("Dimension " + dimension + " out of range.");
- }
+ return values[dimension];
}
@Override
public long longValue(int dimension) {
- try {
- return (long) values[dimension - 1];
- } catch (ArrayIndexOutOfBoundsException e) {
- throw new IllegalArgumentException("Dimension " + dimension + " out of range.");
- }
+ return (long) values[dimension];
}
@Override
@@ -171,9 +127,9 @@ public class FloatVector extends AbstractNumberVector<Float> {
@Override
public String toString() {
StringBuilder featureLine = new StringBuilder();
- for (int i = 0; i < values.length; i++) {
+ for(int i = 0; i < values.length; i++) {
featureLine.append(values[i]);
- if (i + 1 < values.length) {
+ if(i + 1 < values.length) {
featureLine.append(ATTRIBUTE_SEPARATOR);
}
}
@@ -192,7 +148,7 @@ public class FloatVector extends AbstractNumberVector<Float> {
public <A> FloatVector newFeatureVector(A array, ArrayAdapter<Float, A> adapter) {
int dim = adapter.size(array);
float[] values = new float[dim];
- for (int i = 0; i < dim; i++) {
+ for(int i = 0; i < dim; i++) {
values[i] = adapter.get(array, i);
}
return new FloatVector(values, true);
@@ -202,7 +158,7 @@ public class FloatVector extends AbstractNumberVector<Float> {
public <A> FloatVector newNumberVector(A array, NumberArrayAdapter<?, ? super A> adapter) {
int dim = adapter.size(array);
float[] values = new float[dim];
- for (int i = 0; i < dim; i++) {
+ for(int i = 0; i < dim; i++) {
values[i] = adapter.getFloat(array, i);
}
return new FloatVector(values, true);
@@ -212,7 +168,7 @@ public class FloatVector extends AbstractNumberVector<Float> {
public ByteBufferSerializer<FloatVector> getDefaultSerializer() {
return VARIABLE_SERIALIZER;
}
-
+
@Override
public Class<? super FloatVector> getRestrictionClass() {
return FloatVector.class;
@@ -247,7 +203,7 @@ public class FloatVector extends AbstractNumberVector<Float> {
final byte dimensionality = buffer.get();
assert (buffer.remaining() >= ByteArrayUtil.SIZE_FLOAT * dimensionality);
final float[] values = new float[dimensionality];
- for (int i = 0; i < dimensionality; i++) {
+ for(int i = 0; i < dimensionality; i++) {
values[i] = buffer.getFloat();
}
return new FloatVector(values, true);
@@ -258,7 +214,7 @@ public class FloatVector extends AbstractNumberVector<Float> {
assert (vec.values.length < Byte.MAX_VALUE) : "This serializer only supports a maximum dimensionality of " + Byte.MAX_VALUE + "!";
assert (buffer.remaining() >= ByteArrayUtil.SIZE_FLOAT * vec.values.length);
buffer.put((byte) vec.values.length);
- for (int i = 0; i < vec.values.length; i++) {
+ for(int i = 0; i < vec.values.length; i++) {
buffer.putFloat(vec.values[i]);
}
}
@@ -285,7 +241,7 @@ public class FloatVector extends AbstractNumberVector<Float> {
final short dimensionality = buffer.getShort();
assert (buffer.remaining() >= ByteArrayUtil.SIZE_FLOAT * dimensionality);
final float[] values = new float[dimensionality];
- for (int i = 0; i < dimensionality; i++) {
+ for(int i = 0; i < dimensionality; i++) {
values[i] = buffer.getFloat();
}
return new FloatVector(values, true);
@@ -296,7 +252,7 @@ public class FloatVector extends AbstractNumberVector<Float> {
assert (vec.values.length < Short.MAX_VALUE) : "This serializer only supports a maximum dimensionality of " + Short.MAX_VALUE + "!";
assert (buffer.remaining() >= ByteArrayUtil.SIZE_FLOAT * vec.values.length);
buffer.putShort((short) vec.values.length);
- for (int i = 0; i < vec.values.length; i++) {
+ for(int i = 0; i < vec.values.length; i++) {
buffer.putFloat(vec.values[i]);
}
}
@@ -321,7 +277,7 @@ public class FloatVector extends AbstractNumberVector<Float> {
final int dimensionality = ByteArrayUtil.readUnsignedVarint(buffer);
assert (buffer.remaining() >= ByteArrayUtil.SIZE_FLOAT * dimensionality);
final float[] values = new float[dimensionality];
- for (int i = 0; i < dimensionality; i++) {
+ for(int i = 0; i < dimensionality; i++) {
values[i] = buffer.getFloat();
}
return new FloatVector(values, true);
@@ -332,7 +288,7 @@ public class FloatVector extends AbstractNumberVector<Float> {
assert (vec.values.length < Short.MAX_VALUE) : "This serializer only supports a maximum dimensionality of " + Short.MAX_VALUE + "!";
assert (buffer.remaining() >= ByteArrayUtil.SIZE_FLOAT * vec.values.length);
ByteArrayUtil.writeUnsignedVarint(buffer, vec.values.length);
- for (int i = 0; i < vec.values.length; i++) {
+ for(int i = 0; i < vec.values.length; i++) {
buffer.putFloat(vec.values[i]);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/data/IntegerVector.java b/src/de/lmu/ifi/dbs/elki/data/IntegerVector.java
index a5f95650..aed931ef 100644
--- a/src/de/lmu/ifi/dbs/elki/data/IntegerVector.java
+++ b/src/de/lmu/ifi/dbs/elki/data/IntegerVector.java
@@ -108,51 +108,22 @@ public class IntegerVector extends AbstractNumberVector<Integer> {
@Override
@Deprecated
public Integer getValue(int dimension) {
- try {
- return Integer.valueOf(values[dimension]);
- } catch (IndexOutOfBoundsException e) {
- throw new IllegalArgumentException("Dimension " + dimension + " out of range.");
- }
+ return Integer.valueOf(values[dimension]);
}
- /**
- * Returns the value of the specified attribute.
- *
- * @param dimension the selected attribute. Attributes are counted starting
- * with 0.
- *
- * @throws IllegalArgumentException if the specified dimension is out of range
- * of the possible attributes
- *
- * {@inheritDoc}
- */
@Override
public double doubleValue(int dimension) {
- try {
- return values[dimension];
- } catch (IndexOutOfBoundsException e) {
- throw new IllegalArgumentException("Dimension " + dimension + " out of range.");
- }
+ return values[dimension];
}
- /**
- * Returns the value of the specified attribute as long.
- *
- * @param dimension the selected attribute. Attributes are counted starting
- * with 0.
- *
- * @throws IllegalArgumentException if the specified dimension is out of range
- * of the possible attributes
- *
- * {@inheritDoc}
- */
@Override
public long longValue(int dimension) {
- try {
- return values[dimension];
- } catch (IndexOutOfBoundsException e) {
- throw new IllegalArgumentException("Dimension " + dimension + " out of range.");
- }
+ return values[dimension];
+ }
+
+ @Override
+ public int intValue(int dimension) {
+ return values[dimension];
}
/**
@@ -219,7 +190,7 @@ public class IntegerVector extends AbstractNumberVector<Integer> {
public ByteBufferSerializer<IntegerVector> getDefaultSerializer() {
return VARIABLE_SERIALIZER;
}
-
+
@Override
public Class<? super IntegerVector> getRestrictionClass() {
return IntegerVector.class;
diff --git a/src/de/lmu/ifi/dbs/elki/data/LabelList.java b/src/de/lmu/ifi/dbs/elki/data/LabelList.java
index 74685710..dc84439a 100644
--- a/src/de/lmu/ifi/dbs/elki/data/LabelList.java
+++ b/src/de/lmu/ifi/dbs/elki/data/LabelList.java
@@ -25,7 +25,6 @@ package de.lmu.ifi.dbs.elki.data;
import java.io.IOException;
import java.nio.ByteBuffer;
-import java.util.ArrayList;
import java.util.Collection;
import de.lmu.ifi.dbs.elki.persistent.ByteArrayUtil;
@@ -39,45 +38,80 @@ import de.lmu.ifi.dbs.elki.utilities.FormatUtil;
*
* @apiviz.composedOf String
*/
-public class LabelList extends ArrayList<String> {
+public class LabelList {
/**
* Serializer.
*/
public static final ByteBufferSerializer<LabelList> SERIALIZER = new Serializer();
/**
- * Serial number.
+ * Labels.
*/
- private static final long serialVersionUID = 1L;
+ private String[] labels;
+
+ /**
+ * Empty label list.
+ */
+ public static final LabelList EMPTY_LABELS = new LabelList(0);
/**
* Constructor.
+ *
+ * @param initialCapacity initial size
*/
- public LabelList() {
+ private LabelList(int initialCapacity) {
super();
+ labels = new String[initialCapacity];
}
/**
- * Constructor.
+ * Private constructor. Use {@link #make}.
*
- * @param c existing collection
+ * @param array Label list
*/
- public LabelList(Collection<? extends String> c) {
- super(c);
+ protected LabelList(String[] array) {
+ super();
+ this.labels = array;
}
/**
- * Constructor.
+ * Constructor replacement.
*
- * @param initialCapacity initial size
+ * When the label list is empty, it will produce the same instance!
+ *
+ * @param labels Existing labels
+ * @return Label list instance.
+ */
+ public static LabelList make(Collection<String> labels) {
+ int size = labels.size();
+ if(size == 0) {
+ return EMPTY_LABELS;
+ }
+ return new LabelList(labels.toArray(new String[size]));
+ }
+
+ /**
+ * Size of label list.
+ *
+ * @return Size
+ */
+ public int size() {
+ return labels.length;
+ }
+
+ /**
+ * Get the label at position i.
+ *
+ * @param i Position
+ * @return Label
*/
- public LabelList(int initialCapacity) {
- super(initialCapacity);
+ public String get(int i) {
+ return labels[i];
}
@Override
public String toString() {
- return FormatUtil.format(this, " ");
+ return FormatUtil.format(labels, " ");
}
/**
@@ -92,27 +126,27 @@ public class LabelList extends ArrayList<String> {
public LabelList fromByteBuffer(ByteBuffer buffer) throws IOException {
final int cnt = ByteArrayUtil.readUnsignedVarint(buffer);
LabelList ret = new LabelList(cnt);
- for (int i = 0; i < cnt; i++) {
- ret.add(ByteArrayUtil.STRING_SERIALIZER.fromByteBuffer(buffer));
+ for(int i = 0; i < cnt; i++) {
+ ret.labels[i] = ByteArrayUtil.STRING_SERIALIZER.fromByteBuffer(buffer);
}
return ret;
}
@Override
public void toByteBuffer(ByteBuffer buffer, LabelList object) throws IOException {
- final int cnt = object.size();
+ final int cnt = object.labels.length;
ByteArrayUtil.writeUnsignedVarint(buffer, cnt);
- for (int i = 0; i < cnt; i++) {
- ByteArrayUtil.STRING_SERIALIZER.toByteBuffer(buffer, object.get(i));
+ for(int i = 0; i < cnt; i++) {
+ ByteArrayUtil.STRING_SERIALIZER.toByteBuffer(buffer, object.labels[i]);
}
}
@Override
public int getByteSize(LabelList object) throws IOException {
- final int cnt = object.size();
+ final int cnt = object.labels.length;
int size = ByteArrayUtil.getUnsignedVarintSize(cnt);
- for (int i = 0; i < cnt; i++) {
- size += ByteArrayUtil.STRING_SERIALIZER.getByteSize(object.get(i));
+ for(int i = 0; i < cnt; i++) {
+ size += ByteArrayUtil.STRING_SERIALIZER.getByteSize(object.labels[i]);
}
return size;
}
diff --git a/src/de/lmu/ifi/dbs/elki/data/RationalNumber.java b/src/de/lmu/ifi/dbs/elki/data/RationalNumber.java
index 6d9cbe8f..1b7c46f6 100644
--- a/src/de/lmu/ifi/dbs/elki/data/RationalNumber.java
+++ b/src/de/lmu/ifi/dbs/elki/data/RationalNumber.java
@@ -25,6 +25,8 @@ package de.lmu.ifi.dbs.elki.data;
import java.math.BigInteger;
+import de.lmu.ifi.dbs.elki.utilities.FormatUtil;
+
/**
* RationalNumber represents rational numbers in arbitrary precision. Note that
* the best possible precision is the primary objective of this class. Since
@@ -128,15 +130,15 @@ public class RationalNumber extends Number implements Arithmetic<RationalNumber>
*/
public RationalNumber(final String doubleString) throws IllegalArgumentException {
try {
- Double number = Double.parseDouble(doubleString);
- if(number.isInfinite()) {
+ double number = FormatUtil.parseDouble(doubleString);
+ if(Double.isInfinite(number)) {
throw new IllegalArgumentException("given number is infinite");
}
- if(number.isNaN()) {
+ if(Double.isNaN(number)) {
throw new IllegalArgumentException("given number is NotANumber");
}
// ensure standard encoding of the double argument
- String standardDoubleString = number.toString();
+ String standardDoubleString = Double.toString(number);
// index of decimal point '.'
int pointIndex = standardDoubleString.indexOf('\u002E');
// read integer part
diff --git a/src/de/lmu/ifi/dbs/elki/data/ShortVector.java b/src/de/lmu/ifi/dbs/elki/data/ShortVector.java
new file mode 100644
index 00000000..fe82abe7
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/data/ShortVector.java
@@ -0,0 +1,290 @@
+package de.lmu.ifi.dbs.elki.data;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
+import de.lmu.ifi.dbs.elki.persistent.ByteArrayUtil;
+import de.lmu.ifi.dbs.elki.persistent.ByteBufferSerializer;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayAdapter;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+
+/**
+ * An ShortVector is to store Short values.
+ *
+ * @author Erich Schubert
+ */
+public class ShortVector extends AbstractNumberVector<Short> {
+ /**
+ * Static instance (object factory).
+ */
+ public static final ShortVector.Factory STATIC = new ShortVector.Factory();
+
+ /**
+ * Serializer for up to 2^15-1 dimensions.
+ */
+ public static final ByteBufferSerializer<ShortVector> SHORT_SERIALIZER = new ShortSerializer();
+
+ /**
+ * Serializer using varint encoding.
+ */
+ public static final ByteBufferSerializer<ShortVector> VARIABLE_SERIALIZER = new VariableSerializer();
+
+ /**
+ * Keeps the values of the real vector.
+ */
+ private final short[] values;
+
+ /**
+ * Private constructor. NOT for public use.
+ *
+ * @param values Value data
+ * @param nocopy Flag to use without copying.
+ */
+ private ShortVector(short[] values, boolean nocopy) {
+ if (nocopy) {
+ this.values = values;
+ } else {
+ this.values = new short[values.length];
+ System.arraycopy(values, 0, this.values, 0, values.length);
+ }
+ }
+
+ /**
+ * Provides an ShortVector consisting of the given Short values.
+ *
+ * @param values the values to be set as values of the ShortVector
+ */
+ public ShortVector(short[] values) {
+ this.values = new short[values.length];
+ System.arraycopy(values, 0, this.values, 0, values.length);
+ }
+
+ @Override
+ public int getDimensionality() {
+ return values.length;
+ }
+
+ /**
+ * Returns the value of the specified attribute.
+ *
+ * @param dimension the selected attribute. Attributes are counted starting
+ * with 0.
+ *
+ * @throws IllegalArgumentException if the specified dimension is out of range
+ * of the possible attributes
+ *
+ * {@inheritDoc}
+ */
+ @Override
+ @Deprecated
+ public Short getValue(int dimension) {
+ return Short.valueOf(values[dimension]);
+ }
+
+ @Override
+ public double doubleValue(int dimension) {
+ return values[dimension];
+ }
+
+ @Override
+ public long longValue(int dimension) {
+ return values[dimension];
+ }
+
+ @Override
+ public int intValue(int dimension) {
+ return values[dimension];
+ }
+
+ @Override
+ public short shortValue(int dimension) {
+ return values[dimension];
+ }
+
+ /**
+ * Get a copy of the raw short[] array.
+ *
+ * @return copy of values array.
+ */
+ public short[] getValues() {
+ short[] copy = new short[values.length];
+ System.arraycopy(values, 0, copy, 0, values.length);
+ return copy;
+ }
+
+ @Override
+ public Vector getColumnVector() {
+ double[] data = new double[values.length];
+ for (int i = 0; i < values.length; i++) {
+ data[i] = values[i];
+ }
+ return new Vector(data);
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder featureLine = new StringBuilder();
+ for (int i = 0; i < values.length; i++) {
+ featureLine.append(values[i]);
+ if (i + 1 < values.length) {
+ featureLine.append(ATTRIBUTE_SEPARATOR);
+ }
+ }
+ return featureLine.toString();
+ }
+
+ /**
+ * Factory for Short vectors.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.has ShortVector
+ */
+ public static class Factory extends AbstractNumberVector.Factory<ShortVector, Short> {
+ @Override
+ public <A> ShortVector newFeatureVector(A array, ArrayAdapter<Short, A> adapter) {
+ int dim = adapter.size(array);
+ short[] values = new short[dim];
+ for (int i = 0; i < dim; i++) {
+ values[i] = adapter.get(array, i);
+ }
+ return new ShortVector(values, true);
+ }
+
+ @Override
+ public <A> ShortVector newNumberVector(A array, NumberArrayAdapter<?, ? super A> adapter) {
+ int dim = adapter.size(array);
+ short[] values = new short[dim];
+ for (int i = 0; i < dim; i++) {
+ values[i] = adapter.getShort(array, i);
+ }
+ return new ShortVector(values, true);
+ }
+
+ @Override
+ public ByteBufferSerializer<ShortVector> getDefaultSerializer() {
+ return VARIABLE_SERIALIZER;
+ }
+
+ @Override
+ public Class<? super ShortVector> getRestrictionClass() {
+ return ShortVector.class;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ @Override
+ protected ShortVector.Factory makeInstance() {
+ return STATIC;
+ }
+ }
+ }
+
+ /**
+ * Serialization class for dense Short vectors with up to
+ * {@link Short#MAX_VALUE} dimensions, by using a short for storing the
+ * dimensionality.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.uses ShortVector - - «serializes»
+ */
+ public static class ShortSerializer implements ByteBufferSerializer<ShortVector> {
+ @Override
+ public ShortVector fromByteBuffer(ByteBuffer buffer) throws IOException {
+ final short dimensionality = buffer.getShort();
+ assert (buffer.remaining() >= ByteArrayUtil.SIZE_SHORT * dimensionality);
+ final short[] values = new short[dimensionality];
+ for (int i = 0; i < dimensionality; i++) {
+ values[i] = buffer.getShort();
+ }
+ return new ShortVector(values, true);
+ }
+
+ @Override
+ public void toByteBuffer(ByteBuffer buffer, ShortVector vec) throws IOException {
+ assert (vec.values.length < Short.MAX_VALUE) : "This serializer only supports a maximum dimensionality of " + Short.MAX_VALUE + "!";
+ assert (buffer.remaining() >= ByteArrayUtil.SIZE_SHORT * vec.values.length);
+ buffer.putShort((short) vec.values.length);
+ for (int i = 0; i < vec.values.length; i++) {
+ buffer.putShort(vec.values[i]);
+ }
+ }
+
+ @Override
+ public int getByteSize(ShortVector vec) {
+ assert (vec.values.length < Short.MAX_VALUE) : "This serializer only supports a maximum dimensionality of " + Short.MAX_VALUE + "!";
+ return ByteArrayUtil.SIZE_SHORT + ByteArrayUtil.SIZE_SHORT * vec.getDimensionality();
+ }
+ }
+
+ /**
+ * Serialization class for variable dimensionality by using VarInt encoding.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.uses ShortVector - - «serializes»
+ */
+ public static class VariableSerializer implements ByteBufferSerializer<ShortVector> {
+ @Override
+ public ShortVector fromByteBuffer(ByteBuffer buffer) throws IOException {
+ final int dimensionality = ByteArrayUtil.readUnsignedVarint(buffer);
+ assert (buffer.remaining() >= ByteArrayUtil.SIZE_INT * dimensionality);
+ final short[] values = new short[dimensionality];
+ for (int i = 0; i < dimensionality; i++) {
+ values[i] = (short) ByteArrayUtil.readSignedVarint(buffer);
+ }
+ return new ShortVector(values, true);
+ }
+
+ @Override
+ public void toByteBuffer(ByteBuffer buffer, ShortVector vec) throws IOException {
+ assert (vec.values.length < Short.MAX_VALUE) : "This serializer only supports a maximum dimensionality of " + Short.MAX_VALUE + "!";
+ ByteArrayUtil.writeUnsignedVarint(buffer, vec.values.length);
+ for (int i = 0; i < vec.values.length; i++) {
+ ByteArrayUtil.writeSignedVarint(buffer, vec.values[i]);
+ }
+ }
+
+ @Override
+ public int getByteSize(ShortVector vec) {
+ assert (vec.values.length < Short.MAX_VALUE) : "This serializer only supports a maximum dimensionality of " + Short.MAX_VALUE + "!";
+ int len = ByteArrayUtil.getUnsignedVarintSize(vec.values.length);
+ for (int i = 0; i < vec.values.length; i++) {
+ len += ByteArrayUtil.getSignedVarintSize(vec.values[i]);
+ }
+ return len;
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/data/SparseByteVector.java b/src/de/lmu/ifi/dbs/elki/data/SparseByteVector.java
new file mode 100644
index 00000000..06649bb0
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/data/SparseByteVector.java
@@ -0,0 +1,459 @@
+package de.lmu.ifi.dbs.elki.data;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import gnu.trove.iterator.TIntDoubleIterator;
+import gnu.trove.map.TIntDoubleMap;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
+import de.lmu.ifi.dbs.elki.persistent.ByteArrayUtil;
+import de.lmu.ifi.dbs.elki.persistent.ByteBufferSerializer;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayAdapter;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+
+/**
+ * <p>
+ * A SparseByteVector is to store real values as double values.
+ * </p>
+ *
+ * A SparseByteVector only requires storage for those attribute values that
+ * are non-zero.
+ *
+ * @author Arthur Zimek
+ */
+public class SparseByteVector extends AbstractNumberVector<Byte> implements SparseNumberVector<Byte> {
+ /**
+ * Static instance.
+ */
+ public static final SparseByteVector.Factory FACTORY = new SparseByteVector.Factory();
+
+ /**
+ * Serializer using varint encoding.
+ */
+ public static final ByteBufferSerializer<SparseByteVector> VARIABLE_SERIALIZER = new VariableSerializer();
+
+ /**
+ * Indexes of values.
+ */
+ private final int[] indexes;
+
+ /**
+ * Stored values.
+ */
+ private final byte[] values;
+
+ /**
+ * The dimensionality of this feature vector.
+ */
+ private int dimensionality;
+
+ /**
+ * Direct constructor.
+ *
+ * @param indexes Indexes Must be sorted!
+ * @param values Associated value.
+ * @param dimensionality "true" dimensionality
+ */
+ public SparseByteVector(int[] indexes, byte[] values, int dimensionality) {
+ super();
+ this.indexes = indexes;
+ this.values = values;
+ this.dimensionality = dimensionality;
+ }
+
+ /**
+ * Provides a SparseByteVector consisting of double values according to the
+ * specified mapping of indices and values.
+ *
+ * @param values the values to be set as values of the real vector
+ * @param dimensionality the dimensionality of this feature vector
+ * @throws IllegalArgumentException if the given dimensionality is too small
+ * to cover the given values (i.e., the maximum index of any value not
+ * zero is bigger than the given dimensionality)
+ */
+ public SparseByteVector(TIntDoubleMap values, int dimensionality) throws IllegalArgumentException {
+ if(values.size() > dimensionality) {
+ throw new IllegalArgumentException("values.size() > dimensionality!");
+ }
+
+ this.indexes = new int[values.size()];
+ this.values = new byte[values.size()];
+ // Import and sort the indexes
+ {
+ TIntDoubleIterator iter = values.iterator();
+ for(int i = 0; iter.hasNext(); i++) {
+ iter.advance();
+ this.indexes[i] = iter.key();
+ }
+ Arrays.sort(this.indexes);
+ }
+ // Import the values accordingly
+ {
+ for(int i = 0; i < values.size(); i++) {
+ this.values[i] = (byte) values.get(this.indexes[i]);
+ }
+ }
+ this.dimensionality = dimensionality;
+ final int maxdim = getMaxDim();
+ if(maxdim > dimensionality) {
+ throw new IllegalArgumentException("Given dimensionality " + dimensionality + " is too small w.r.t. the given values (occurring maximum: " + maxdim + ").");
+ }
+ }
+
+ /**
+ * Get the maximum dimensionality.
+ *
+ * @return the maximum dimensionality seen
+ */
+ private int getMaxDim() {
+ if(this.indexes.length == 0) {
+ return 0;
+ }
+ else {
+ return this.indexes[this.indexes.length - 1];
+ }
+ }
+
+ /**
+ * Provides a SparseByteVector consisting of double values according to the
+ * specified mapping of indices and values.
+ *
+ * @param values the values to be set as values of the real vector
+ * @throws IllegalArgumentException if the given dimensionality is too small
+ * to cover the given values (i.e., the maximum index of any value not
+ * zero is bigger than the given dimensionality)
+ */
+ public SparseByteVector(byte[] values) throws IllegalArgumentException {
+ this.dimensionality = values.length;
+
+ // Count the number of non-zero entries
+ int size = 0;
+ {
+ for(int i = 0; i < values.length; i++) {
+ if(values[i] != 0) {
+ size++;
+ }
+ }
+ }
+ this.indexes = new int[size];
+ this.values = new byte[size];
+
+ // Copy the values
+ {
+ int pos = 0;
+ for(int i = 0; i < values.length; i++) {
+ byte value = values[i];
+ if(value != 0) {
+ this.indexes[pos] = i;
+ this.values[pos] = value;
+ pos++;
+ }
+ }
+ }
+ }
+
+ @Override
+ public int getDimensionality() {
+ return dimensionality;
+ }
+
+ /**
+ * Sets the dimensionality to the new value.
+ *
+ *
+ * @param dimensionality the new dimensionality
+ * @throws IllegalArgumentException if the given dimensionality is too small
+ * to cover the given values (i.e., the maximum index of any value not
+ * zero is bigger than the given dimensionality)
+ */
+ @Override
+ public void setDimensionality(int dimensionality) throws IllegalArgumentException {
+ final int maxdim = getMaxDim();
+ if(maxdim > dimensionality) {
+ throw new IllegalArgumentException("Given dimensionality " + dimensionality + " is too small w.r.t. the given values (occurring maximum: " + maxdim + ").");
+ }
+ this.dimensionality = dimensionality;
+ }
+
+ @Override
+ @Deprecated
+ public Byte getValue(int dimension) {
+ int pos = Arrays.binarySearch(this.indexes, dimension);
+ if(pos >= 0) {
+ return values[pos];
+ }
+ else {
+ return 0;
+ }
+ }
+
+ @Override
+ @Deprecated
+ public double doubleValue(int dimension) {
+ int pos = Arrays.binarySearch(this.indexes, dimension);
+ if(pos >= 0) {
+ return values[pos];
+ }
+ else {
+ return 0.;
+ }
+ }
+
+ @Override
+ @Deprecated
+ public long longValue(int dimension) {
+ int pos = Arrays.binarySearch(this.indexes, dimension);
+ if(pos >= 0) {
+ return (long) values[pos];
+ }
+ else {
+ return 0;
+ }
+ }
+
+ @Override
+ public byte byteValue(int dimension) {
+ int pos = Arrays.binarySearch(this.indexes, dimension);
+ if(pos >= 0) {
+ return values[pos];
+ }
+ else {
+ return 0;
+ }
+ }
+
+ @Override
+ public Vector getColumnVector() {
+ return new Vector(getValues());
+ }
+
+ /**
+ * <p>
+ * Provides a String representation of this SparseByteVector as suitable
+ * for
+ * {@link de.lmu.ifi.dbs.elki.datasource.parser.SparseNumberVectorLabelParser}
+ * .
+ * </p>
+ *
+ * <p>
+ * The returned String is a single line with entries separated by
+ * {@link AbstractNumberVector#ATTRIBUTE_SEPARATOR}. The first entry gives the
+ * number of values actually not zero. Following entries are pairs of Byte
+ * and Byte where the Byte gives the index of the dimensionality and the
+ * Byte gives the corresponding value.
+ * </p>
+ *
+ * <p>
+ * Example: a vector (0,1.2,1.3,0)<sup>T</sup> would result in the String<br>
+ * <code>2 2 1.2 3 1.3</code><br>
+ * </p>
+ *
+ * @return a String representation of this SparseByteVector
+ */
+ @Override
+ public String toString() {
+ StringBuilder featureLine = new StringBuilder();
+ featureLine.append(this.indexes.length);
+ for(int i = 0; i < this.indexes.length; i++) {
+ featureLine.append(ATTRIBUTE_SEPARATOR);
+ featureLine.append(this.indexes[i]);
+ featureLine.append(ATTRIBUTE_SEPARATOR);
+ featureLine.append(this.values[i]);
+ }
+
+ return featureLine.toString();
+ }
+
+ /**
+ * Returns an array consisting of the values of this feature vector.
+ *
+ * @return an array consisting of the values of this feature vector
+ */
+ private double[] getValues() {
+ double[] vals = new double[dimensionality];
+ for(int i = 0; i < indexes.length; i++) {
+ vals[this.indexes[i]] = this.values[i];
+ }
+ return vals;
+ }
+
+ @Override
+ public int iter() {
+ return 0;
+ }
+
+ @Override
+ public int iterDim(int iter) {
+ return indexes[iter];
+ }
+
+ @Override
+ public int iterAdvance(int iter) {
+ return iter + 1;
+ }
+
+ @Override
+ public boolean iterValid(int iter) {
+ return iter < indexes.length;
+ }
+
+ @Override
+ public double iterDoubleValue(int iter) {
+ return (double) values[iter];
+ }
+
+ @Override
+ public float iterFloatValue(int iter) {
+ return (float) values[iter];
+ }
+
+ @Override
+ public int iterIntValue(int iter) {
+ return (int) values[iter];
+ }
+
+ @Override
+ public short iterShortValue(int iter) {
+ return (short) values[iter];
+ }
+
+ @Override
+ public long iterLongValue(int iter) {
+ return (long) values[iter];
+ }
+
+ @Override
+ public byte iterByteValue(int iter) {
+ return values[iter];
+ }
+
+ /**
+ * Factory class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.has SparseByteVector
+ */
+ public static class Factory extends AbstractNumberVector.Factory<SparseByteVector, Byte> implements SparseNumberVector.Factory<SparseByteVector, Byte> {
+ @Override
+ public <A> SparseByteVector newFeatureVector(A array, ArrayAdapter<Byte, A> adapter) {
+ int dim = adapter.size(array);
+ byte[] values = new byte[dim];
+ for(int i = 0; i < dim; i++) {
+ values[i] = adapter.get(array, i);
+ }
+ // TODO: improve efficiency
+ return new SparseByteVector(values);
+ }
+
+ @Override
+ public <A> SparseByteVector newNumberVector(A array, NumberArrayAdapter<?, ? super A> adapter) {
+ int dim = adapter.size(array);
+ byte[] values = new byte[dim];
+ for(int i = 0; i < dim; i++) {
+ values[i] = adapter.getByte(array, i);
+ }
+ // TODO: improve efficiency
+ return new SparseByteVector(values);
+ }
+
+ @Override
+ public SparseByteVector newNumberVector(TIntDoubleMap values, int maxdim) {
+ return new SparseByteVector(values, maxdim);
+ }
+
+ @Override
+ public ByteBufferSerializer<SparseByteVector> getDefaultSerializer() {
+ return VARIABLE_SERIALIZER;
+ }
+
+ @Override
+ public Class<? super SparseByteVector> getRestrictionClass() {
+ return SparseByteVector.class;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ @Override
+ protected SparseByteVector.Factory makeInstance() {
+ return FACTORY;
+ }
+ }
+ }
+
+ /**
+ * Serialization class using VarInt encodings.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.uses SparseByteVector - - «serializes»
+ */
+ public static class VariableSerializer implements ByteBufferSerializer<SparseByteVector> {
+ @Override
+ public SparseByteVector fromByteBuffer(ByteBuffer buffer) throws IOException {
+ final int dimensionality = ByteArrayUtil.readUnsignedVarint(buffer);
+ final int nonzero = ByteArrayUtil.readUnsignedVarint(buffer);
+ final int[] dims = new int[nonzero];
+ final byte[] values = new byte[nonzero];
+ for(int i = 0; i < nonzero; i++) {
+ dims[i] = ByteArrayUtil.readUnsignedVarint(buffer);
+ values[i] = buffer.get();
+ }
+ return new SparseByteVector(dims, values, dimensionality);
+ }
+
+ @Override
+ public void toByteBuffer(ByteBuffer buffer, SparseByteVector vec) throws IOException {
+ ByteArrayUtil.writeUnsignedVarint(buffer, vec.dimensionality);
+ ByteArrayUtil.writeUnsignedVarint(buffer, vec.values.length);
+ for(int i = 0; i < vec.values.length; i++) {
+ ByteArrayUtil.writeUnsignedVarint(buffer, vec.indexes[i]);
+ buffer.put(vec.values[i]);
+ }
+ }
+
+ @Override
+ public int getByteSize(SparseByteVector vec) {
+ int sum = 0;
+ sum += ByteArrayUtil.getUnsignedVarintSize(vec.dimensionality);
+ sum += ByteArrayUtil.getUnsignedVarintSize(vec.values.length);
+ for(int i = 0; i < vec.values.length; i++) {
+ sum += ByteArrayUtil.getUnsignedVarintSize(vec.indexes[i]);
+ ++sum;
+ }
+ return sum;
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/data/SparseDoubleVector.java b/src/de/lmu/ifi/dbs/elki/data/SparseDoubleVector.java
index 110f3084..10d30b7f 100644
--- a/src/de/lmu/ifi/dbs/elki/data/SparseDoubleVector.java
+++ b/src/de/lmu/ifi/dbs/elki/data/SparseDoubleVector.java
@@ -23,15 +23,12 @@ package de.lmu.ifi.dbs.elki.data;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import gnu.trove.impl.unmodifiable.TUnmodifiableIntDoubleMap;
import gnu.trove.iterator.TIntDoubleIterator;
import gnu.trove.map.TIntDoubleMap;
-import gnu.trove.map.hash.TIntDoubleHashMap;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Arrays;
-import java.util.BitSet;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
import de.lmu.ifi.dbs.elki.persistent.ByteArrayUtil;
@@ -101,7 +98,7 @@ public class SparseDoubleVector extends AbstractNumberVector<Double> implements
* zero is bigger than the given dimensionality)
*/
public SparseDoubleVector(TIntDoubleMap values, int dimensionality) throws IllegalArgumentException {
- if (values.size() > dimensionality) {
+ if(values.size() > dimensionality) {
throw new IllegalArgumentException("values.size() > dimensionality!");
}
@@ -110,7 +107,7 @@ public class SparseDoubleVector extends AbstractNumberVector<Double> implements
// Import and sort the indexes
{
TIntDoubleIterator iter = values.iterator();
- for (int i = 0; iter.hasNext(); i++) {
+ for(int i = 0; iter.hasNext(); i++) {
iter.advance();
this.indexes[i] = iter.key();
}
@@ -118,13 +115,13 @@ public class SparseDoubleVector extends AbstractNumberVector<Double> implements
}
// Import the values accordingly
{
- for (int i = 0; i < values.size(); i++) {
+ for(int i = 0; i < values.size(); i++) {
this.values[i] = values.get(this.indexes[i]);
}
}
this.dimensionality = dimensionality;
final int maxdim = getMaxDim();
- if (maxdim > dimensionality) {
+ if(maxdim > dimensionality) {
throw new IllegalArgumentException("Given dimensionality " + dimensionality + " is too small w.r.t. the given values (occurring maximum: " + maxdim + ").");
}
}
@@ -135,9 +132,10 @@ public class SparseDoubleVector extends AbstractNumberVector<Double> implements
* @return the maximum dimensionality seen
*/
private int getMaxDim() {
- if (this.indexes.length == 0) {
+ if(this.indexes.length == 0) {
return 0;
- } else {
+ }
+ else {
return this.indexes[this.indexes.length - 1];
}
}
@@ -157,8 +155,8 @@ public class SparseDoubleVector extends AbstractNumberVector<Double> implements
// Count the number of non-zero entries
int size = 0;
{
- for (int i = 0; i < values.length; i++) {
- if (values[i] != 0.0f) {
+ for(int i = 0; i < values.length; i++) {
+ if(values[i] != 0.0f) {
size++;
}
}
@@ -169,9 +167,9 @@ public class SparseDoubleVector extends AbstractNumberVector<Double> implements
// Copy the values
{
int pos = 0;
- for (int i = 0; i < values.length; i++) {
+ for(int i = 0; i < values.length; i++) {
double value = values[i];
- if (value != 0.0f) {
+ if(value != 0.0f) {
this.indexes[pos] = i;
this.values[pos] = value;
pos++;
@@ -197,7 +195,7 @@ public class SparseDoubleVector extends AbstractNumberVector<Double> implements
@Override
public void setDimensionality(int dimensionality) throws IllegalArgumentException {
final int maxdim = getMaxDim();
- if (maxdim > dimensionality) {
+ if(maxdim > dimensionality) {
throw new IllegalArgumentException("Given dimensionality " + dimensionality + " is too small w.r.t. the given values (occurring maximum: " + maxdim + ").");
}
this.dimensionality = dimensionality;
@@ -207,29 +205,34 @@ public class SparseDoubleVector extends AbstractNumberVector<Double> implements
@Deprecated
public Double getValue(int dimension) {
int pos = Arrays.binarySearch(this.indexes, dimension);
- if (pos >= 0) {
+ if(pos >= 0) {
return values[pos];
- } else {
+ }
+ else {
return 0.0;
}
}
@Override
+ @Deprecated
public double doubleValue(int dimension) {
int pos = Arrays.binarySearch(this.indexes, dimension);
- if (pos >= 0) {
+ if(pos >= 0) {
return values[pos];
- } else {
+ }
+ else {
return 0.0;
}
}
@Override
+ @Deprecated
public long longValue(int dimension) {
int pos = Arrays.binarySearch(this.indexes, dimension);
- if (pos >= 0) {
+ if(pos >= 0) {
return (long) values[pos];
- } else {
+ }
+ else {
return 0;
}
}
@@ -265,7 +268,7 @@ public class SparseDoubleVector extends AbstractNumberVector<Double> implements
public String toString() {
StringBuilder featureLine = new StringBuilder();
featureLine.append(this.indexes.length);
- for (int i = 0; i < this.indexes.length; i++) {
+ for(int i = 0; i < this.indexes.length; i++) {
featureLine.append(ATTRIBUTE_SEPARATOR);
featureLine.append(this.indexes[i]);
featureLine.append(ATTRIBUTE_SEPARATOR);
@@ -282,12 +285,62 @@ public class SparseDoubleVector extends AbstractNumberVector<Double> implements
*/
private double[] getValues() {
double[] vals = new double[dimensionality];
- for (int i = 0; i < indexes.length; i++) {
+ for(int i = 0; i < indexes.length; i++) {
vals[this.indexes[i]] = this.values[i];
}
return vals;
}
+ @Override
+ public int iter() {
+ return 0;
+ }
+
+ @Override
+ public int iterDim(int iter) {
+ return indexes[iter];
+ }
+
+ @Override
+ public int iterAdvance(int iter) {
+ return iter + 1;
+ }
+
+ @Override
+ public boolean iterValid(int iter) {
+ return iter < indexes.length;
+ }
+
+ @Override
+ public double iterDoubleValue(int iter) {
+ return values[iter];
+ }
+
+ @Override
+ public float iterFloatValue(int iter) {
+ return (float) values[iter];
+ }
+
+ @Override
+ public int iterIntValue(int iter) {
+ return (int) values[iter];
+ }
+
+ @Override
+ public short iterShortValue(int iter) {
+ return (short) values[iter];
+ }
+
+ @Override
+ public long iterLongValue(int iter) {
+ return (long) values[iter];
+ }
+
+ @Override
+ public byte iterByteValue(int iter) {
+ return (byte) values[iter];
+ }
+
/**
* Factory class.
*
@@ -300,7 +353,7 @@ public class SparseDoubleVector extends AbstractNumberVector<Double> implements
public <A> SparseDoubleVector newFeatureVector(A array, ArrayAdapter<Double, A> adapter) {
int dim = adapter.size(array);
double[] values = new double[dim];
- for (int i = 0; i < dim; i++) {
+ for(int i = 0; i < dim; i++) {
values[i] = adapter.get(array, i);
}
// TODO: improve efficiency
@@ -311,7 +364,7 @@ public class SparseDoubleVector extends AbstractNumberVector<Double> implements
public <A> SparseDoubleVector newNumberVector(A array, NumberArrayAdapter<?, ? super A> adapter) {
int dim = adapter.size(array);
double[] values = new double[dim];
- for (int i = 0; i < dim; i++) {
+ for(int i = 0; i < dim; i++) {
values[i] = adapter.getDouble(array, i);
}
// TODO: improve efficiency
@@ -348,20 +401,6 @@ public class SparseDoubleVector extends AbstractNumberVector<Double> implements
}
}
- @Override
- public BitSet getNotNullMask() {
- BitSet b = new BitSet();
- for (int key : indexes) {
- b.set(key);
- }
- return b;
- }
-
- /**
- * Empty map.
- */
- public static final TIntDoubleMap EMPTYMAP = new TUnmodifiableIntDoubleMap(new TIntDoubleHashMap());
-
/**
* Serialization class using VarInt encodings.
*
@@ -376,7 +415,7 @@ public class SparseDoubleVector extends AbstractNumberVector<Double> implements
final int nonzero = ByteArrayUtil.readUnsignedVarint(buffer);
final int[] dims = new int[nonzero];
final double[] values = new double[nonzero];
- for (int i = 0; i < nonzero; i++) {
+ for(int i = 0; i < nonzero; i++) {
dims[i] = ByteArrayUtil.readUnsignedVarint(buffer);
values[i] = buffer.getDouble();
}
@@ -387,7 +426,7 @@ public class SparseDoubleVector extends AbstractNumberVector<Double> implements
public void toByteBuffer(ByteBuffer buffer, SparseDoubleVector vec) throws IOException {
ByteArrayUtil.writeUnsignedVarint(buffer, vec.dimensionality);
ByteArrayUtil.writeUnsignedVarint(buffer, vec.values.length);
- for (int i = 0; i < vec.values.length; i++) {
+ for(int i = 0; i < vec.values.length; i++) {
ByteArrayUtil.writeUnsignedVarint(buffer, vec.indexes[i]);
buffer.putDouble(vec.values[i]);
}
@@ -398,7 +437,7 @@ public class SparseDoubleVector extends AbstractNumberVector<Double> implements
int sum = 0;
sum += ByteArrayUtil.getUnsignedVarintSize(vec.dimensionality);
sum += ByteArrayUtil.getUnsignedVarintSize(vec.values.length);
- for (int d : vec.indexes) {
+ for(int d : vec.indexes) {
sum += ByteArrayUtil.getUnsignedVarintSize(d);
}
sum += vec.values.length * ByteArrayUtil.SIZE_DOUBLE;
diff --git a/src/de/lmu/ifi/dbs/elki/data/SparseFeatureVector.java b/src/de/lmu/ifi/dbs/elki/data/SparseFeatureVector.java
index a430ad3c..4de65abf 100644
--- a/src/de/lmu/ifi/dbs/elki/data/SparseFeatureVector.java
+++ b/src/de/lmu/ifi/dbs/elki/data/SparseFeatureVector.java
@@ -23,20 +23,62 @@ package de.lmu.ifi.dbs.elki.data;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import java.util.BitSet;
-
/**
* Extended interface for sparse feature vector types.
*
* @author Erich Schubert
- *
+ *
* @param <D> Data type
*/
public interface SparseFeatureVector<D> extends FeatureVector<D> {
/**
- * Bit set of non-null features.
+ * Iterator over non-zero features only, <em>ascendingly</em>.
+ *
+ * Note: depending on the underlying implementation, this may or may not be
+ * the dimension. Use {@link #iterDim} to get the actual dimension. In fact,
+ * usually this will be the ith non-zero value, assuming an array
+ * representation.
+ *
+ * Think of this number as an iterator. For efficiency, it has a primitive
+ * type!
+ *
+ * Intended usage:
+ *
+ * <pre>
+ * {@code
+ * for (int iter = v.iter(); v.iterValid(iter); iter = v.iterAdvance(iter)) {
+ * final int dim = v.iterDim(iter);
+ * // Do something.
+ * }
+ * }
+ * </pre>
+ *
+ * @return Identifier for the first non-zero dimension, <b>not necessarily the
+ * dimension!</b>
+ */
+ int iter();
+
+ /**
+ * Get the dimension an iterator points to.
+ *
+ * @param iter Iterator position
+ * @return Dimension the iterator refers to
+ */
+ int iterDim(int iter);
+
+ /**
+ * Advance the iterator to the next position.
+ *
+ * @param iter Previous iterator position
+ * @return Next iterator position
+ */
+ int iterAdvance(int iter);
+
+ /**
+ * Test the iterator position for validity.
*
- * @return Bit set
+ * @param iter Iterator position
+ * @return {@code true} when it refers to a valid position.
*/
- public BitSet getNotNullMask();
+ boolean iterValid(int iter);
}
diff --git a/src/de/lmu/ifi/dbs/elki/data/SparseFloatVector.java b/src/de/lmu/ifi/dbs/elki/data/SparseFloatVector.java
index ca337b4b..5148920e 100644
--- a/src/de/lmu/ifi/dbs/elki/data/SparseFloatVector.java
+++ b/src/de/lmu/ifi/dbs/elki/data/SparseFloatVector.java
@@ -23,17 +23,14 @@ package de.lmu.ifi.dbs.elki.data;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import gnu.trove.impl.unmodifiable.TUnmodifiableIntFloatMap;
import gnu.trove.iterator.TIntDoubleIterator;
import gnu.trove.iterator.TIntFloatIterator;
import gnu.trove.map.TIntDoubleMap;
import gnu.trove.map.TIntFloatMap;
-import gnu.trove.map.hash.TIntFloatHashMap;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Arrays;
-import java.util.BitSet;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
import de.lmu.ifi.dbs.elki.persistent.ByteArrayUtil;
@@ -103,7 +100,7 @@ public class SparseFloatVector extends AbstractNumberVector<Float> implements Sp
* zero is bigger than the given dimensionality)
*/
public SparseFloatVector(TIntFloatMap values, int dimensionality) throws IllegalArgumentException {
- if (values.size() > dimensionality) {
+ if(values.size() > dimensionality) {
throw new IllegalArgumentException("values.size() > dimensionality!");
}
@@ -112,7 +109,7 @@ public class SparseFloatVector extends AbstractNumberVector<Float> implements Sp
// Import and sort the indexes
{
TIntFloatIterator iter = values.iterator();
- for (int i = 0; iter.hasNext(); i++) {
+ for(int i = 0; iter.hasNext(); i++) {
iter.advance();
this.indexes[i] = iter.key();
}
@@ -120,13 +117,13 @@ public class SparseFloatVector extends AbstractNumberVector<Float> implements Sp
}
// Import the values accordingly
{
- for (int i = 0; i < values.size(); i++) {
+ for(int i = 0; i < values.size(); i++) {
this.values[i] = values.get(this.indexes[i]);
}
}
this.dimensionality = dimensionality;
final int maxdim = getMaxDim();
- if (maxdim > dimensionality) {
+ if(maxdim > dimensionality) {
throw new IllegalArgumentException("Given dimensionality " + dimensionality + " is too small w.r.t. the given values (occurring maximum: " + maxdim + ").");
}
}
@@ -137,9 +134,10 @@ public class SparseFloatVector extends AbstractNumberVector<Float> implements Sp
* @return the maximum dimensionality seen
*/
private int getMaxDim() {
- if (this.indexes.length == 0) {
+ if(this.indexes.length == 0) {
return 0;
- } else {
+ }
+ else {
return this.indexes[this.indexes.length - 1];
}
}
@@ -159,8 +157,8 @@ public class SparseFloatVector extends AbstractNumberVector<Float> implements Sp
// Count the number of non-zero entries
int size = 0;
{
- for (int i = 0; i < values.length; i++) {
- if (values[i] != 0.0f) {
+ for(int i = 0; i < values.length; i++) {
+ if(values[i] != 0.0f) {
size++;
}
}
@@ -171,9 +169,9 @@ public class SparseFloatVector extends AbstractNumberVector<Float> implements Sp
// Copy the values
{
int pos = 0;
- for (int i = 0; i < values.length; i++) {
+ for(int i = 0; i < values.length; i++) {
float value = values[i];
- if (value != 0.0f) {
+ if(value != 0.0f) {
this.indexes[pos] = i;
this.values[pos] = value;
pos++;
@@ -199,7 +197,7 @@ public class SparseFloatVector extends AbstractNumberVector<Float> implements Sp
@Override
public void setDimensionality(int dimensionality) throws IllegalArgumentException {
final int maxdim = getMaxDim();
- if (maxdim > dimensionality) {
+ if(maxdim > dimensionality) {
throw new IllegalArgumentException("Given dimensionality " + dimensionality + " is too small w.r.t. the given values (occurring maximum: " + maxdim + ").");
}
this.dimensionality = dimensionality;
@@ -209,29 +207,34 @@ public class SparseFloatVector extends AbstractNumberVector<Float> implements Sp
@Deprecated
public Float getValue(int dimension) {
int pos = Arrays.binarySearch(this.indexes, dimension);
- if (pos >= 0) {
+ if(pos >= 0) {
return values[pos];
- } else {
+ }
+ else {
return 0.0f;
}
}
@Override
+ @Deprecated
public double doubleValue(int dimension) {
int pos = Arrays.binarySearch(this.indexes, dimension);
- if (pos >= 0) {
+ if(pos >= 0) {
return values[pos];
- } else {
+ }
+ else {
return 0.0;
}
}
@Override
+ @Deprecated
public long longValue(int dimension) {
int pos = Arrays.binarySearch(this.indexes, dimension);
- if (pos >= 0) {
+ if(pos >= 0) {
return (long) values[pos];
- } else {
+ }
+ else {
return 0;
}
}
@@ -267,7 +270,7 @@ public class SparseFloatVector extends AbstractNumberVector<Float> implements Sp
public String toString() {
StringBuilder featureLine = new StringBuilder();
featureLine.append(this.indexes.length);
- for (int i = 0; i < this.indexes.length; i++) {
+ for(int i = 0; i < this.indexes.length; i++) {
featureLine.append(ATTRIBUTE_SEPARATOR);
featureLine.append(this.indexes[i]);
featureLine.append(ATTRIBUTE_SEPARATOR);
@@ -284,12 +287,62 @@ public class SparseFloatVector extends AbstractNumberVector<Float> implements Sp
*/
private double[] getValues() {
double[] vals = new double[dimensionality];
- for (int i = 0; i < indexes.length; i++) {
+ for(int i = 0; i < indexes.length; i++) {
vals[this.indexes[i]] = this.values[i];
}
return vals;
}
+ @Override
+ public int iter() {
+ return 0;
+ }
+
+ @Override
+ public int iterDim(int iter) {
+ return indexes[iter];
+ }
+
+ @Override
+ public int iterAdvance(int iter) {
+ return iter + 1;
+ }
+
+ @Override
+ public boolean iterValid(int iter) {
+ return iter < indexes.length;
+ }
+
+ @Override
+ public double iterDoubleValue(int iter) {
+ return (double) values[iter];
+ }
+
+ @Override
+ public float iterFloatValue(int iter) {
+ return values[iter];
+ }
+
+ @Override
+ public int iterIntValue(int iter) {
+ return (int) values[iter];
+ }
+
+ @Override
+ public short iterShortValue(int iter) {
+ return (short) values[iter];
+ }
+
+ @Override
+ public long iterLongValue(int iter) {
+ return (long) values[iter];
+ }
+
+ @Override
+ public byte iterByteValue(int iter) {
+ return (byte) values[iter];
+ }
+
/**
* Factory class.
*
@@ -302,7 +355,7 @@ public class SparseFloatVector extends AbstractNumberVector<Float> implements Sp
public <A> SparseFloatVector newFeatureVector(A array, ArrayAdapter<Float, A> adapter) {
int dim = adapter.size(array);
float[] values = new float[dim];
- for (int i = 0; i < dim; i++) {
+ for(int i = 0; i < dim; i++) {
values[i] = adapter.get(array, i);
}
// TODO: inefficient
@@ -313,7 +366,7 @@ public class SparseFloatVector extends AbstractNumberVector<Float> implements Sp
public <A> SparseFloatVector newNumberVector(A array, NumberArrayAdapter<?, ? super A> adapter) {
int dim = adapter.size(array);
float[] values = new float[dim];
- for (int i = 0; i < dim; i++) {
+ for(int i = 0; i < dim; i++) {
values[i] = adapter.getFloat(array, i);
}
// TODO: inefficient
@@ -326,13 +379,13 @@ public class SparseFloatVector extends AbstractNumberVector<Float> implements Sp
float[] values = new float[dvalues.size()];
// Import and sort the indexes
TIntDoubleIterator iter = dvalues.iterator();
- for (int i = 0; iter.hasNext(); i++) {
+ for(int i = 0; iter.hasNext(); i++) {
iter.advance();
indexes[i] = iter.key();
}
Arrays.sort(indexes);
// Import the values accordingly
- for (int i = 0; i < dvalues.size(); i++) {
+ for(int i = 0; i < dvalues.size(); i++) {
values[i] = (float) dvalues.get(indexes[i]);
}
return new SparseFloatVector(indexes, values, maxdim);
@@ -363,20 +416,6 @@ public class SparseFloatVector extends AbstractNumberVector<Float> implements Sp
}
}
- @Override
- public BitSet getNotNullMask() {
- BitSet b = new BitSet();
- for (int key : indexes) {
- b.set(key);
- }
- return b;
- }
-
- /**
- * Empty map.
- */
- public static final TIntFloatMap EMPTYMAP = new TUnmodifiableIntFloatMap(new TIntFloatHashMap());
-
/**
* Serialization class using VarInt encodings.
*
@@ -391,7 +430,7 @@ public class SparseFloatVector extends AbstractNumberVector<Float> implements Sp
final int nonzero = ByteArrayUtil.readUnsignedVarint(buffer);
final int[] dims = new int[nonzero];
final float[] values = new float[nonzero];
- for (int i = 0; i < nonzero; i++) {
+ for(int i = 0; i < nonzero; i++) {
dims[i] = ByteArrayUtil.readUnsignedVarint(buffer);
values[i] = buffer.getFloat();
}
@@ -402,7 +441,7 @@ public class SparseFloatVector extends AbstractNumberVector<Float> implements Sp
public void toByteBuffer(ByteBuffer buffer, SparseFloatVector vec) throws IOException {
ByteArrayUtil.writeUnsignedVarint(buffer, vec.dimensionality);
ByteArrayUtil.writeUnsignedVarint(buffer, vec.values.length);
- for (int i = 0; i < vec.values.length; i++) {
+ for(int i = 0; i < vec.values.length; i++) {
ByteArrayUtil.writeUnsignedVarint(buffer, vec.indexes[i]);
buffer.putFloat(vec.values[i]);
}
@@ -413,7 +452,7 @@ public class SparseFloatVector extends AbstractNumberVector<Float> implements Sp
int sum = 0;
sum += ByteArrayUtil.getUnsignedVarintSize(vec.dimensionality);
sum += ByteArrayUtil.getUnsignedVarintSize(vec.values.length);
- for (int d : vec.indexes) {
+ for(int d : vec.indexes) {
sum += ByteArrayUtil.getUnsignedVarintSize(d);
}
sum += vec.values.length * ByteArrayUtil.SIZE_FLOAT;
diff --git a/src/de/lmu/ifi/dbs/elki/data/SparseIntegerVector.java b/src/de/lmu/ifi/dbs/elki/data/SparseIntegerVector.java
new file mode 100644
index 00000000..00811ba3
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/data/SparseIntegerVector.java
@@ -0,0 +1,460 @@
+package de.lmu.ifi.dbs.elki.data;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import gnu.trove.iterator.TIntDoubleIterator;
+import gnu.trove.map.TIntDoubleMap;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
+import de.lmu.ifi.dbs.elki.persistent.ByteArrayUtil;
+import de.lmu.ifi.dbs.elki.persistent.ByteBufferSerializer;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayAdapter;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+
+/**
+ * <p>
+ * A SparseIntegerVector is to store real values as double values.
+ * </p>
+ *
+ * A SparseIntegerVector only requires storage for those attribute values that
+ * are non-zero.
+ *
+ * @author Arthur Zimek
+ */
+public class SparseIntegerVector extends AbstractNumberVector<Integer> implements SparseNumberVector<Integer> {
+ /**
+ * Static instance.
+ */
+ public static final SparseIntegerVector.Factory FACTORY = new SparseIntegerVector.Factory();
+
+ /**
+ * Serializer using varint encoding.
+ */
+ public static final ByteBufferSerializer<SparseIntegerVector> VARIABLE_SERIALIZER = new VariableSerializer();
+
+ /**
+ * Indexes of values.
+ */
+ private final int[] indexes;
+
+ /**
+ * Stored values.
+ */
+ private final int[] values;
+
+ /**
+ * The dimensionality of this feature vector.
+ */
+ private int dimensionality;
+
+ /**
+ * Direct constructor.
+ *
+ * @param indexes Indexes Must be sorted!
+ * @param values Associated value.
+ * @param dimensionality "true" dimensionality
+ */
+ public SparseIntegerVector(int[] indexes, int[] values, int dimensionality) {
+ super();
+ this.indexes = indexes;
+ this.values = values;
+ this.dimensionality = dimensionality;
+ }
+
+ /**
+ * Provides a SparseIntegerVector consisting of double values according to the
+ * specified mapping of indices and values.
+ *
+ * @param values the values to be set as values of the real vector
+ * @param dimensionality the dimensionality of this feature vector
+ * @throws IllegalArgumentException if the given dimensionality is too small
+ * to cover the given values (i.e., the maximum index of any value not
+ * zero is bigger than the given dimensionality)
+ */
+ public SparseIntegerVector(TIntDoubleMap values, int dimensionality) throws IllegalArgumentException {
+ if(values.size() > dimensionality) {
+ throw new IllegalArgumentException("values.size() > dimensionality!");
+ }
+
+ this.indexes = new int[values.size()];
+ this.values = new int[values.size()];
+ // Import and sort the indexes
+ {
+ TIntDoubleIterator iter = values.iterator();
+ for(int i = 0; iter.hasNext(); i++) {
+ iter.advance();
+ this.indexes[i] = iter.key();
+ }
+ Arrays.sort(this.indexes);
+ }
+ // Import the values accordingly
+ {
+ for(int i = 0; i < values.size(); i++) {
+ this.values[i] = (int) values.get(this.indexes[i]);
+ }
+ }
+ this.dimensionality = dimensionality;
+ final int maxdim = getMaxDim();
+ if(maxdim > dimensionality) {
+ throw new IllegalArgumentException("Given dimensionality " + dimensionality + " is too small w.r.t. the given values (occurring maximum: " + maxdim + ").");
+ }
+ }
+
+ /**
+ * Get the maximum dimensionality.
+ *
+ * @return the maximum dimensionality seen
+ */
+ private int getMaxDim() {
+ if(this.indexes.length == 0) {
+ return 0;
+ }
+ else {
+ return this.indexes[this.indexes.length - 1];
+ }
+ }
+
+ /**
+ * Provides a SparseIntegerVector consisting of double values according to the
+ * specified mapping of indices and values.
+ *
+ * @param values the values to be set as values of the real vector
+ * @throws IllegalArgumentException if the given dimensionality is too small
+ * to cover the given values (i.e., the maximum index of any value not
+ * zero is bigger than the given dimensionality)
+ */
+ public SparseIntegerVector(int[] values) throws IllegalArgumentException {
+ this.dimensionality = values.length;
+
+ // Count the number of non-zero entries
+ int size = 0;
+ {
+ for(int i = 0; i < values.length; i++) {
+ if(values[i] != 0) {
+ size++;
+ }
+ }
+ }
+ this.indexes = new int[size];
+ this.values = new int[size];
+
+ // Copy the values
+ {
+ int pos = 0;
+ for(int i = 0; i < values.length; i++) {
+ int value = values[i];
+ if(value != 0) {
+ this.indexes[pos] = i;
+ this.values[pos] = value;
+ pos++;
+ }
+ }
+ }
+ }
+
+ @Override
+ public int getDimensionality() {
+ return dimensionality;
+ }
+
+ /**
+ * Sets the dimensionality to the new value.
+ *
+ *
+ * @param dimensionality the new dimensionality
+ * @throws IllegalArgumentException if the given dimensionality is too small
+ * to cover the given values (i.e., the maximum index of any value not
+ * zero is bigger than the given dimensionality)
+ */
+ @Override
+ public void setDimensionality(int dimensionality) throws IllegalArgumentException {
+ final int maxdim = getMaxDim();
+ if(maxdim > dimensionality) {
+ throw new IllegalArgumentException("Given dimensionality " + dimensionality + " is too small w.r.t. the given values (occurring maximum: " + maxdim + ").");
+ }
+ this.dimensionality = dimensionality;
+ }
+
+ @Override
+ @Deprecated
+ public Integer getValue(int dimension) {
+ int pos = Arrays.binarySearch(this.indexes, dimension);
+ if(pos >= 0) {
+ return values[pos];
+ }
+ else {
+ return 0;
+ }
+ }
+
+ @Override
+ @Deprecated
+ public double doubleValue(int dimension) {
+ int pos = Arrays.binarySearch(this.indexes, dimension);
+ if(pos >= 0) {
+ return values[pos];
+ }
+ else {
+ return 0.;
+ }
+ }
+
+ @Override
+ @Deprecated
+ public long longValue(int dimension) {
+ int pos = Arrays.binarySearch(this.indexes, dimension);
+ if(pos >= 0) {
+ return (long) values[pos];
+ }
+ else {
+ return 0;
+ }
+ }
+
+ @Override
+ @Deprecated
+ public int intValue(int dimension) {
+ int pos = Arrays.binarySearch(this.indexes, dimension);
+ if(pos >= 0) {
+ return values[pos];
+ }
+ else {
+ return 0;
+ }
+ }
+
+ @Override
+ public Vector getColumnVector() {
+ return new Vector(getValues());
+ }
+
+ /**
+ * <p>
+ * Provides a String representation of this SparseIntegerVector as suitable
+ * for
+ * {@link de.lmu.ifi.dbs.elki.datasource.parser.SparseNumberVectorLabelParser}
+ * .
+ * </p>
+ *
+ * <p>
+ * The returned String is a single line with entries separated by
+ * {@link AbstractNumberVector#ATTRIBUTE_SEPARATOR}. The first entry gives the
+ * number of values actually not zero. Following entries are pairs of Integer
+ * and Integer where the Integer gives the index of the dimensionality and the
+ * Integer gives the corresponding value.
+ * </p>
+ *
+ * <p>
+ * Example: a vector (0,1.2,1.3,0)<sup>T</sup> would result in the String<br>
+ * <code>2 2 1.2 3 1.3</code><br>
+ * </p>
+ *
+ * @return a String representation of this SparseIntegerVector
+ */
+ @Override
+ public String toString() {
+ StringBuilder featureLine = new StringBuilder();
+ featureLine.append(this.indexes.length);
+ for(int i = 0; i < this.indexes.length; i++) {
+ featureLine.append(ATTRIBUTE_SEPARATOR);
+ featureLine.append(this.indexes[i]);
+ featureLine.append(ATTRIBUTE_SEPARATOR);
+ featureLine.append(this.values[i]);
+ }
+
+ return featureLine.toString();
+ }
+
+ /**
+ * Returns an array consisting of the values of this feature vector.
+ *
+ * @return an array consisting of the values of this feature vector
+ */
+ private double[] getValues() {
+ double[] vals = new double[dimensionality];
+ for(int i = 0; i < indexes.length; i++) {
+ vals[this.indexes[i]] = this.values[i];
+ }
+ return vals;
+ }
+
+ @Override
+ public int iter() {
+ return 0;
+ }
+
+ @Override
+ public int iterDim(int iter) {
+ return indexes[iter];
+ }
+
+ @Override
+ public int iterAdvance(int iter) {
+ return iter + 1;
+ }
+
+ @Override
+ public boolean iterValid(int iter) {
+ return iter < indexes.length;
+ }
+
+ @Override
+ public double iterDoubleValue(int iter) {
+ return (double) values[iter];
+ }
+
+ @Override
+ public float iterFloatValue(int iter) {
+ return (float) values[iter];
+ }
+
+ @Override
+ public int iterIntValue(int iter) {
+ return values[iter];
+ }
+
+ @Override
+ public short iterShortValue(int iter) {
+ return (short) values[iter];
+ }
+
+ @Override
+ public long iterLongValue(int iter) {
+ return (long) values[iter];
+ }
+
+ @Override
+ public byte iterByteValue(int iter) {
+ return (byte) values[iter];
+ }
+
+ /**
+ * Factory class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.has SparseIntegerVector
+ */
+ public static class Factory extends AbstractNumberVector.Factory<SparseIntegerVector, Integer> implements SparseNumberVector.Factory<SparseIntegerVector, Integer> {
+ @Override
+ public <A> SparseIntegerVector newFeatureVector(A array, ArrayAdapter<Integer, A> adapter) {
+ int dim = adapter.size(array);
+ int[] values = new int[dim];
+ for(int i = 0; i < dim; i++) {
+ values[i] = adapter.get(array, i);
+ }
+ // TODO: improve efficiency
+ return new SparseIntegerVector(values);
+ }
+
+ @Override
+ public <A> SparseIntegerVector newNumberVector(A array, NumberArrayAdapter<?, ? super A> adapter) {
+ int dim = adapter.size(array);
+ int[] values = new int[dim];
+ for(int i = 0; i < dim; i++) {
+ values[i] = adapter.getInteger(array, i);
+ }
+ // TODO: improve efficiency
+ return new SparseIntegerVector(values);
+ }
+
+ @Override
+ public SparseIntegerVector newNumberVector(TIntDoubleMap values, int maxdim) {
+ return new SparseIntegerVector(values, maxdim);
+ }
+
+ @Override
+ public ByteBufferSerializer<SparseIntegerVector> getDefaultSerializer() {
+ return VARIABLE_SERIALIZER;
+ }
+
+ @Override
+ public Class<? super SparseIntegerVector> getRestrictionClass() {
+ return SparseIntegerVector.class;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ @Override
+ protected SparseIntegerVector.Factory makeInstance() {
+ return FACTORY;
+ }
+ }
+ }
+
+ /**
+ * Serialization class using VarInt encodings.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.uses SparseIntegerVector - - «serializes»
+ */
+ public static class VariableSerializer implements ByteBufferSerializer<SparseIntegerVector> {
+ @Override
+ public SparseIntegerVector fromByteBuffer(ByteBuffer buffer) throws IOException {
+ final int dimensionality = ByteArrayUtil.readUnsignedVarint(buffer);
+ final int nonzero = ByteArrayUtil.readUnsignedVarint(buffer);
+ final int[] dims = new int[nonzero];
+ final int[] values = new int[nonzero];
+ for(int i = 0; i < nonzero; i++) {
+ dims[i] = ByteArrayUtil.readUnsignedVarint(buffer);
+ values[i] = ByteArrayUtil.readSignedVarint(buffer);
+ }
+ return new SparseIntegerVector(dims, values, dimensionality);
+ }
+
+ @Override
+ public void toByteBuffer(ByteBuffer buffer, SparseIntegerVector vec) throws IOException {
+ ByteArrayUtil.writeUnsignedVarint(buffer, vec.dimensionality);
+ ByteArrayUtil.writeUnsignedVarint(buffer, vec.values.length);
+ for(int i = 0; i < vec.values.length; i++) {
+ ByteArrayUtil.writeUnsignedVarint(buffer, vec.indexes[i]);
+ ByteArrayUtil.writeSignedVarint(buffer, vec.values[i]);
+ }
+ }
+
+ @Override
+ public int getByteSize(SparseIntegerVector vec) {
+ int sum = 0;
+ sum += ByteArrayUtil.getUnsignedVarintSize(vec.dimensionality);
+ sum += ByteArrayUtil.getUnsignedVarintSize(vec.values.length);
+ for(int i = 0; i < vec.values.length; i++) {
+ sum += ByteArrayUtil.getUnsignedVarintSize(vec.indexes[i]);
+ sum += ByteArrayUtil.getSignedVarintSize(vec.values[i]);
+ }
+ return sum;
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/data/SparseNumberVector.java b/src/de/lmu/ifi/dbs/elki/data/SparseNumberVector.java
index 26ab968a..7b97b268 100644
--- a/src/de/lmu/ifi/dbs/elki/data/SparseNumberVector.java
+++ b/src/de/lmu/ifi/dbs/elki/data/SparseNumberVector.java
@@ -34,6 +34,35 @@ import gnu.trove.map.TIntDoubleMap;
*/
public interface SparseNumberVector<N extends Number> extends NumberVector<N>, SparseFeatureVector<N> {
/**
+ * Iterator over non-zero features only, <em>ascendingly</em>.
+ *
+ * Note: depending on the underlying implementation, this may or may not be
+ * the dimension. Use {@link #iterDim} to get the actual dimension. In fact,
+ * usually this will be the ith non-zero value, assuming an array
+ * representation.
+ *
+ * Think of this number as an iterator. For efficiency, it has a primitive
+ * type!
+ *
+ * Intended usage:
+ *
+ * <pre>
+ * {@code
+ * for (int iter = v.iter(); v.iterValid(iter); iter = v.iterAdvance(iter)) {
+ * final int dim = v.iterDim(iter);
+ * final double val = v.iterDoubleValue(iter);
+ * // Do something.
+ * }
+ * }
+ * </pre>
+ *
+ * @return Identifier for the first non-zero dimension, <b>not necessarily the
+ * dimension!</b>
+ */
+ @Override
+ int iter();
+
+ /**
* Update the vector space dimensionality.
*
* @param maxdim New dimensionality
@@ -41,6 +70,102 @@ public interface SparseNumberVector<N extends Number> extends NumberVector<N>, S
void setDimensionality(int maxdim);
/**
+ * Get the value of the iterators' current dimension.
+ *
+ * @param iter Iterator
+ * @return Value at the current position
+ */
+ double iterDoubleValue(int iter);
+
+ /**
+ * Get the value of the iterators' current dimension.
+ *
+ * @param iter Iterator
+ * @return Value at the current position
+ */
+ float iterFloatValue(int iter);
+
+ /**
+ * Get the value of the iterators' current dimension.
+ *
+ * @param iter Iterator
+ * @return Value at the current position
+ */
+ int iterIntValue(int iter);
+
+ /**
+ * Get the value of the iterators' current dimension.
+ *
+ * @param iter Iterator
+ * @return Value at the current position
+ */
+ short iterShortValue(int iter);
+
+ /**
+ * Get the value of the iterators' current dimension.
+ *
+ * @param iter Iterator
+ * @return Value at the current position
+ */
+ long iterLongValue(int iter);
+
+ /**
+ * Get the value of the iterators' current dimension.
+ *
+ * @param iter Iterator
+ * @return Value at the current position
+ */
+ byte iterByteValue(int iter);
+
+ /**
+ * @deprecated As the vectors are sparse, try to iterate over the sparse
+ * dimensions only, see {@link #iterDoubleValue}.
+ */
+ @Override
+ @Deprecated
+ double doubleValue(int dimension);
+
+ /**
+ * @deprecated As the vectors are sparse, try to iterate over the sparse
+ * dimensions only, see {@link #iterFloatValue}.
+ */
+ @Override
+ @Deprecated
+ float floatValue(int dimension);
+
+ /**
+ * @deprecated As the vectors are sparse, try to iterate over the sparse
+ * dimensions only, see {@link #iterIntValue}.
+ */
+ @Override
+ @Deprecated
+ int intValue(int dimension);
+
+ /**
+ * @deprecated As the vectors are sparse, try to iterate over the sparse
+ * dimensions only, see {@link #iterLongValue}.
+ */
+ @Override
+ @Deprecated
+ long longValue(int dimension);
+
+ /**
+ * @deprecated As the vectors are sparse, try to iterate over the sparse
+ * dimensions only, see {@link #iterShortValue}.
+ */
+ @Override
+ @Deprecated
+ short shortValue(int dimension);
+
+ /**
+ * @deprecated As the vectors are sparse, try to iterate over the sparse
+ * dimensions only, see {@link #iterByteValue}.
+ */
+ @Override
+ @Deprecated
+ byte byteValue(int dimension);
+
+ /**
* Factory for sparse number vectors: make from a dim-value map.
*
* @author Erich Schubert
diff --git a/src/de/lmu/ifi/dbs/elki/data/SparseShortVector.java b/src/de/lmu/ifi/dbs/elki/data/SparseShortVector.java
new file mode 100644
index 00000000..7e54dd5a
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/data/SparseShortVector.java
@@ -0,0 +1,459 @@
+package de.lmu.ifi.dbs.elki.data;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import gnu.trove.iterator.TIntDoubleIterator;
+import gnu.trove.map.TIntDoubleMap;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
+import de.lmu.ifi.dbs.elki.persistent.ByteArrayUtil;
+import de.lmu.ifi.dbs.elki.persistent.ByteBufferSerializer;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayAdapter;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+
+/**
+ * <p>
+ * A SparseShortVector is to store real values as double values.
+ * </p>
+ *
+ * A SparseShortVector only requires storage for those attribute values that are
+ * non-zero.
+ *
+ * @author Arthur Zimek
+ */
+public class SparseShortVector extends AbstractNumberVector<Short> implements SparseNumberVector<Short> {
+ /**
+ * Static instance.
+ */
+ public static final SparseShortVector.Factory FACTORY = new SparseShortVector.Factory();
+
+ /**
+ * Serializer using varint encoding.
+ */
+ public static final ByteBufferSerializer<SparseShortVector> VARIABLE_SERIALIZER = new VariableSerializer();
+
+ /**
+ * Indexes of values.
+ */
+ private final int[] indexes;
+
+ /**
+ * Stored values.
+ */
+ private final short[] values;
+
+ /**
+ * The dimensionality of this feature vector.
+ */
+ private int dimensionality;
+
+ /**
+ * Direct constructor.
+ *
+ * @param indexes Indexes Must be sorted!
+ * @param values Associated value.
+ * @param dimensionality "true" dimensionality
+ */
+ public SparseShortVector(int[] indexes, short[] values, int dimensionality) {
+ super();
+ this.indexes = indexes;
+ this.values = values;
+ this.dimensionality = dimensionality;
+ }
+
+ /**
+ * Provides a SparseShortVector consisting of double values according to the
+ * specified mapping of indices and values.
+ *
+ * @param values the values to be set as values of the real vector
+ * @param dimensionality the dimensionality of this feature vector
+ * @throws IllegalArgumentException if the given dimensionality is too small
+ * to cover the given values (i.e., the maximum index of any value not
+ * zero is bigger than the given dimensionality)
+ */
+ public SparseShortVector(TIntDoubleMap values, int dimensionality) throws IllegalArgumentException {
+ if(values.size() > dimensionality) {
+ throw new IllegalArgumentException("values.size() > dimensionality!");
+ }
+
+ this.indexes = new int[values.size()];
+ this.values = new short[values.size()];
+ // Import and sort the indexes
+ {
+ TIntDoubleIterator iter = values.iterator();
+ for(int i = 0; iter.hasNext(); i++) {
+ iter.advance();
+ this.indexes[i] = iter.key();
+ }
+ Arrays.sort(this.indexes);
+ }
+ // Import the values accordingly
+ {
+ for(int i = 0; i < values.size(); i++) {
+ this.values[i] = (short) values.get(this.indexes[i]);
+ }
+ }
+ this.dimensionality = dimensionality;
+ final int maxdim = getMaxDim();
+ if(maxdim > dimensionality) {
+ throw new IllegalArgumentException("Given dimensionality " + dimensionality + " is too small w.r.t. the given values (occurring maximum: " + maxdim + ").");
+ }
+ }
+
+ /**
+ * Get the maximum dimensionality.
+ *
+ * @return the maximum dimensionality seen
+ */
+ private int getMaxDim() {
+ if(this.indexes.length == 0) {
+ return 0;
+ }
+ else {
+ return this.indexes[this.indexes.length - 1];
+ }
+ }
+
+ /**
+ * Provides a SparseShortVector consisting of double values according to the
+ * specified mapping of indices and values.
+ *
+ * @param values the values to be set as values of the real vector
+ * @throws IllegalArgumentException if the given dimensionality is too small
+ * to cover the given values (i.e., the maximum index of any value not
+ * zero is bigger than the given dimensionality)
+ */
+ public SparseShortVector(short[] values) throws IllegalArgumentException {
+ this.dimensionality = values.length;
+
+ // Count the number of non-zero entries
+ int size = 0;
+ {
+ for(int i = 0; i < values.length; i++) {
+ if(values[i] != 0) {
+ size++;
+ }
+ }
+ }
+ this.indexes = new int[size];
+ this.values = new short[size];
+
+ // Copy the values
+ {
+ int pos = 0;
+ for(int i = 0; i < values.length; i++) {
+ short value = values[i];
+ if(value != 0) {
+ this.indexes[pos] = i;
+ this.values[pos] = value;
+ pos++;
+ }
+ }
+ }
+ }
+
+ @Override
+ public int getDimensionality() {
+ return dimensionality;
+ }
+
+ /**
+ * Sets the dimensionality to the new value.
+ *
+ *
+ * @param dimensionality the new dimensionality
+ * @throws IllegalArgumentException if the given dimensionality is too small
+ * to cover the given values (i.e., the maximum index of any value not
+ * zero is bigger than the given dimensionality)
+ */
+ @Override
+ public void setDimensionality(int dimensionality) throws IllegalArgumentException {
+ final int maxdim = getMaxDim();
+ if(maxdim > dimensionality) {
+ throw new IllegalArgumentException("Given dimensionality " + dimensionality + " is too small w.r.t. the given values (occurring maximum: " + maxdim + ").");
+ }
+ this.dimensionality = dimensionality;
+ }
+
+ @Override
+ @Deprecated
+ public Short getValue(int dimension) {
+ int pos = Arrays.binarySearch(this.indexes, dimension);
+ if(pos >= 0) {
+ return values[pos];
+ }
+ else {
+ return 0;
+ }
+ }
+
+ @Override
+ @Deprecated
+ public double doubleValue(int dimension) {
+ int pos = Arrays.binarySearch(this.indexes, dimension);
+ if(pos >= 0) {
+ return values[pos];
+ }
+ else {
+ return 0.0;
+ }
+ }
+
+ @Override
+ @Deprecated
+ public long longValue(int dimension) {
+ int pos = Arrays.binarySearch(this.indexes, dimension);
+ if(pos >= 0) {
+ return (long) values[pos];
+ }
+ else {
+ return 0;
+ }
+ }
+
+ @Override
+ @Deprecated
+ public short shortValue(int dimension) {
+ int pos = Arrays.binarySearch(this.indexes, dimension);
+ if(pos >= 0) {
+ return values[pos];
+ }
+ else {
+ return 0;
+ }
+ }
+
+ @Override
+ public Vector getColumnVector() {
+ return new Vector(getValues());
+ }
+
+ /**
+ * <p>
+ * Provides a String representation of this SparseShortVector as suitable for
+ * {@link de.lmu.ifi.dbs.elki.datasource.parser.SparseNumberVectorLabelParser}
+ * .
+ * </p>
+ *
+ * <p>
+ * The returned String is a single line with entries separated by
+ * {@link AbstractNumberVector#ATTRIBUTE_SEPARATOR}. The first entry gives the
+ * number of values actually not zero. Following entries are pairs of Short
+ * and Short where the Short gives the index of the dimensionality and the
+ * Short gives the corresponding value.
+ * </p>
+ *
+ * <p>
+ * Example: a vector (0,1.2,1.3,0)<sup>T</sup> would result in the String<br>
+ * <code>2 2 1.2 3 1.3</code><br>
+ * </p>
+ *
+ * @return a String representation of this SparseShortVector
+ */
+ @Override
+ public String toString() {
+ StringBuilder featureLine = new StringBuilder();
+ featureLine.append(this.indexes.length);
+ for(int i = 0; i < this.indexes.length; i++) {
+ featureLine.append(ATTRIBUTE_SEPARATOR);
+ featureLine.append(this.indexes[i]);
+ featureLine.append(ATTRIBUTE_SEPARATOR);
+ featureLine.append(this.values[i]);
+ }
+
+ return featureLine.toString();
+ }
+
+ /**
+ * Returns an array consisting of the values of this feature vector.
+ *
+ * @return an array consisting of the values of this feature vector
+ */
+ private double[] getValues() {
+ double[] vals = new double[dimensionality];
+ for(int i = 0; i < indexes.length; i++) {
+ vals[this.indexes[i]] = this.values[i];
+ }
+ return vals;
+ }
+
+ @Override
+ public int iter() {
+ return 0;
+ }
+
+ @Override
+ public int iterDim(int iter) {
+ return indexes[iter];
+ }
+
+ @Override
+ public int iterAdvance(int iter) {
+ return iter + 1;
+ }
+
+ @Override
+ public boolean iterValid(int iter) {
+ return iter < indexes.length;
+ }
+
+ @Override
+ public double iterDoubleValue(int iter) {
+ return (double) values[iter];
+ }
+
+ @Override
+ public float iterFloatValue(int iter) {
+ return (float) values[iter];
+ }
+
+ @Override
+ public int iterIntValue(int iter) {
+ return (int) values[iter];
+ }
+
+ @Override
+ public short iterShortValue(int iter) {
+ return values[iter];
+ }
+
+ @Override
+ public long iterLongValue(int iter) {
+ return (long) values[iter];
+ }
+
+ @Override
+ public byte iterByteValue(int iter) {
+ return (byte) values[iter];
+ }
+
+ /**
+ * Factory class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.has SparseShortVector
+ */
+ public static class Factory extends AbstractNumberVector.Factory<SparseShortVector, Short> implements SparseNumberVector.Factory<SparseShortVector, Short> {
+ @Override
+ public <A> SparseShortVector newFeatureVector(A array, ArrayAdapter<Short, A> adapter) {
+ int dim = adapter.size(array);
+ short[] values = new short[dim];
+ for(int i = 0; i < dim; i++) {
+ values[i] = adapter.get(array, i);
+ }
+ // TODO: improve efficiency
+ return new SparseShortVector(values);
+ }
+
+ @Override
+ public <A> SparseShortVector newNumberVector(A array, NumberArrayAdapter<?, ? super A> adapter) {
+ int dim = adapter.size(array);
+ short[] values = new short[dim];
+ for(int i = 0; i < dim; i++) {
+ values[i] = adapter.getShort(array, i);
+ }
+ // TODO: improve efficiency
+ return new SparseShortVector(values);
+ }
+
+ @Override
+ public SparseShortVector newNumberVector(TIntDoubleMap values, int maxdim) {
+ return new SparseShortVector(values, maxdim);
+ }
+
+ @Override
+ public ByteBufferSerializer<SparseShortVector> getDefaultSerializer() {
+ return VARIABLE_SERIALIZER;
+ }
+
+ @Override
+ public Class<? super SparseShortVector> getRestrictionClass() {
+ return SparseShortVector.class;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ @Override
+ protected SparseShortVector.Factory makeInstance() {
+ return FACTORY;
+ }
+ }
+ }
+
+ /**
+ * Serialization class using VarInt encodings.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.uses SparseShortVector - - «serializes»
+ */
+ public static class VariableSerializer implements ByteBufferSerializer<SparseShortVector> {
+ @Override
+ public SparseShortVector fromByteBuffer(ByteBuffer buffer) throws IOException {
+ final int dimensionality = ByteArrayUtil.readUnsignedVarint(buffer);
+ final int nonzero = ByteArrayUtil.readUnsignedVarint(buffer);
+ final int[] dims = new int[nonzero];
+ final short[] values = new short[nonzero];
+ for(int i = 0; i < nonzero; i++) {
+ dims[i] = ByteArrayUtil.readUnsignedVarint(buffer);
+ values[i] = (short) ByteArrayUtil.readSignedVarint(buffer);
+ }
+ return new SparseShortVector(dims, values, dimensionality);
+ }
+
+ @Override
+ public void toByteBuffer(ByteBuffer buffer, SparseShortVector vec) throws IOException {
+ ByteArrayUtil.writeUnsignedVarint(buffer, vec.dimensionality);
+ ByteArrayUtil.writeUnsignedVarint(buffer, vec.values.length);
+ for(int i = 0; i < vec.values.length; i++) {
+ ByteArrayUtil.writeUnsignedVarint(buffer, vec.indexes[i]);
+ ByteArrayUtil.writeSignedVarint(buffer, vec.values[i]);
+ }
+ }
+
+ @Override
+ public int getByteSize(SparseShortVector vec) {
+ int sum = 0;
+ sum += ByteArrayUtil.getUnsignedVarintSize(vec.dimensionality);
+ sum += ByteArrayUtil.getUnsignedVarintSize(vec.values.length);
+ for(int i = 0; i < vec.values.length; i++) {
+ sum += ByteArrayUtil.getUnsignedVarintSize(vec.indexes[i]);
+ sum += ByteArrayUtil.getSignedVarintSize(vec.values[i]);
+ }
+ return sum;
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/data/Subspace.java b/src/de/lmu/ifi/dbs/elki/data/Subspace.java
index a564d8b5..c5072e46 100644
--- a/src/de/lmu/ifi/dbs/elki/data/Subspace.java
+++ b/src/de/lmu/ifi/dbs/elki/data/Subspace.java
@@ -60,7 +60,6 @@ public class Subspace {
* @param dimensions the dimensions building this subspace
*/
public Subspace(BitSet dimensions) {
- this.dimensions.clear();
this.dimensions.or(dimensions);
dimensionality = dimensions.cardinality();
}
diff --git a/src/de/lmu/ifi/dbs/elki/data/VectorUtil.java b/src/de/lmu/ifi/dbs/elki/data/VectorUtil.java
index 205dee17..0a018622 100644
--- a/src/de/lmu/ifi/dbs/elki/data/VectorUtil.java
+++ b/src/de/lmu/ifi/dbs/elki/data/VectorUtil.java
@@ -69,7 +69,7 @@ public final class VectorUtil {
public static DoubleMinMax getRangeDouble(NumberVector<?> vec) {
DoubleMinMax minmax = new DoubleMinMax();
- for (int i = 0; i < vec.getDimensionality(); i++) {
+ for(int i = 0; i < vec.getDimensionality(); i++) {
minmax.put(vec.doubleValue(i));
}
@@ -109,35 +109,45 @@ public final class VectorUtil {
* @return angle
*/
public static double angleSparse(SparseNumberVector<?> v1, SparseNumberVector<?> v2) {
- BitSet b1 = v1.getNotNullMask();
- BitSet b2 = v2.getNotNullMask();
- BitSet both = (BitSet) b1.clone();
- both.and(b2);
-
- // TODO: add precomputed length to data type!
+ // TODO: exploit precomputed length, when available?
// Length of first vector
- double l1 = 0.0;
- for (int i = b1.nextSetBit(0); i >= 0; i = b1.nextSetBit(i + 1)) {
- final double val = v1.doubleValue(i);
+ double l1 = 0., l2 = 0., cross = 0.;
+ int i1 = v1.iter(), i2 = v2.iter();
+ while(v1.iterValid(i1) && v2.iterValid(i2)) {
+ final int d1 = v1.iterDim(i1), d2 = v2.iterDim(i2);
+ if(d1 < d2) {
+ final double val = v1.iterDoubleValue(i1);
+ l1 += val * val;
+ i1 = v1.iterAdvance(i1);
+ }
+ else if(d2 < d1) {
+ final double val = v2.iterDoubleValue(i2);
+ l2 += val * val;
+ i2 = v2.iterAdvance(i2);
+ }
+ else { // d1 == d2
+ final double val1 = v1.iterDoubleValue(i1);
+ final double val2 = v2.iterDoubleValue(i2);
+ l1 += val1 * val1;
+ l2 += val2 * val2;
+ cross += val1 * val2;
+ i1 = v1.iterAdvance(i1);
+ i2 = v2.iterAdvance(i2);
+ }
+ }
+ while(v1.iterValid(i1)) {
+ final double val = v1.iterDoubleValue(i1);
l1 += val * val;
+ i1 = v1.iterAdvance(i1);
}
- l1 = Math.sqrt(l1);
-
- // Length of second vector
- double l2 = 0.0;
- for (int i = b2.nextSetBit(0); i >= 0; i = b2.nextSetBit(i + 1)) {
- final double val = v2.doubleValue(i);
+ while(v2.iterValid(i2)) {
+ final double val = v2.iterDoubleValue(i2);
l2 += val * val;
+ i2 = v2.iterAdvance(i2);
}
- l2 = Math.sqrt(l2);
- // Cross product
- double cross = 0.0;
- for (int i = both.nextSetBit(0); i >= 0; i = both.nextSetBit(i + 1)) {
- cross += v1.doubleValue(i) * v2.doubleValue(i);
- }
- final double a = cross / (l1 * l2);
- return (a > 1.0) ? 1.0 : a;
+ final double a = cross / (Math.sqrt(l1) * Math.sqrt(l2));
+ return (a > 1.) ? 1. : a;
}
/**
@@ -149,22 +159,34 @@ public final class VectorUtil {
* @return Angle
*/
public static double angle(NumberVector<?> v1, NumberVector<?> v2, Vector o) {
+ final int dim1 = v1.getDimensionality(), dim2 = v1.getDimensionality(), dimo = v1.getDimensionality();
+ final int mindim = (dim1 <= dim2) ? dim1 : dim2;
// Essentially, we want to compute this:
// v1' = v1 - o, v2' = v2 - o
// v1'.transposeTimes(v2') / (v1'.euclideanLength()*v2'.euclideanLength());
// We can just compute all three in parallel.
double[] oe = o.getArrayRef();
- final int dim = v1.getDimensionality();
double s = 0, e1 = 0, e2 = 0;
- for (int k = 0; k < dim; k++) {
- final double r1 = v1.doubleValue(k) - oe[k];
- final double r2 = v2.doubleValue(k) - oe[k];
+ for(int k = 0; k < mindim; k++) {
+ final double dk = k < dimo ? oe[k] : 0.;
+ final double r1 = v1.doubleValue(k) - dk;
+ final double r2 = v2.doubleValue(k) - dk;
s += r1 * r2;
e1 += r1 * r1;
e2 += r2 * r2;
}
+ for(int k = mindim; k < dim1; k++) {
+ final double dk = k < dimo ? oe[k] : 0.;
+ final double r1 = v1.doubleValue(k) - dk;
+ e1 += r1 * r1;
+ }
+ for(int k = mindim; k < dim2; k++) {
+ final double dk = k < dimo ? oe[k] : 0.;
+ final double r2 = v2.doubleValue(k) - dk;
+ e2 += r2 * r2;
+ }
final double a = Math.sqrt((s / e1) * (s / e2));
- return (a > 1.0) ? 1.0 : a;
+ return (a > 1.) ? 1. : a;
}
/**
@@ -176,21 +198,33 @@ public final class VectorUtil {
* @return Angle
*/
public static double angle(NumberVector<?> v1, NumberVector<?> v2, NumberVector<?> o) {
+ final int dim1 = v1.getDimensionality(), dim2 = v1.getDimensionality(), dimo = o.getDimensionality();
+ final int mindim = (dim1 <= dim2) ? dim1 : dim2;
// Essentially, we want to compute this:
// v1' = v1 - o, v2' = v2 - o
// v1'.transposeTimes(v2') / (v1'.euclideanLength()*v2'.euclideanLength());
// We can just compute all three in parallel.
- final int dim = v1.getDimensionality();
double s = 0, e1 = 0, e2 = 0;
- for (int k = 0; k < dim; k++) {
- final double r1 = v1.doubleValue(k) - o.doubleValue(k);
+ for(int k = 0; k < mindim; k++) {
+ final double ok = k < dimo ? o.doubleValue(k) : 0.;
+ final double r1 = v1.doubleValue(k) - ok;
final double r2 = v2.doubleValue(k) - o.doubleValue(k);
s += r1 * r2;
e1 += r1 * r1;
e2 += r2 * r2;
}
+ for(int k = mindim; k < dim1; k++) {
+ final double ok = k < dimo ? o.doubleValue(k) : 0.;
+ final double r1 = v1.doubleValue(k) - ok;
+ e1 += r1 * r1;
+ }
+ for(int k = mindim; k < dim2; k++) {
+ final double ok = k < dimo ? o.doubleValue(k) : 0.;
+ final double r2 = v2.doubleValue(k) - ok;
+ e2 += r2 * r2;
+ }
final double a = Math.sqrt((s / e1) * (s / e2));
- return (a > 1.0) ? 1.0 : a;
+ return (a > 1.) ? 1. : a;
}
/**
@@ -203,33 +237,32 @@ public final class VectorUtil {
* @return Angle
*/
public static double cosAngle(NumberVector<?> v1, NumberVector<?> v2) {
- if (v1 instanceof SparseNumberVector<?> && v2 instanceof SparseNumberVector<?>) {
+ if(v1 instanceof SparseNumberVector<?> && v2 instanceof SparseNumberVector<?>) {
return angleSparse((SparseNumberVector<?>) v1, (SparseNumberVector<?>) v2);
}
+ final int dim1 = v1.getDimensionality(), dim2 = v1.getDimensionality();
+ final int mindim = (dim1 <= dim2) ? dim1 : dim2;
// Essentially, we want to compute this:
// v1.transposeTimes(v2) / (v1.euclideanLength() * v2.euclideanLength());
// We can just compute all three in parallel.
- final int d1 = v1.getDimensionality();
- final int d2 = v2.getDimensionality();
- final int dim = Math.min(d1, d2);
double s = 0, e1 = 0, e2 = 0;
- for (int k = 0; k < dim; k++) {
+ for(int k = 0; k < mindim; k++) {
final double r1 = v1.doubleValue(k);
final double r2 = v2.doubleValue(k);
s += r1 * r2;
e1 += r1 * r1;
e2 += r2 * r2;
}
- for (int k = dim; k < d1; k++) {
+ for(int k = mindim; k < dim1; k++) {
final double r1 = v1.doubleValue(k);
e1 += r1 * r1;
}
- for (int k = dim; k < d2; k++) {
+ for(int k = mindim; k < dim2; k++) {
final double r2 = v2.doubleValue(k);
e2 += r2 * r2;
}
final double a = Math.sqrt((s / e1) * (s / e2));
- return (a > 1.0) ? 1.0 : a;
+ return (a > 1.) ? 1. : a;
}
// TODO: add more precise but slower O(n^2) angle computation according to:
@@ -244,35 +277,56 @@ public final class VectorUtil {
* @return Angle
*/
public static double minCosAngle(SpatialComparable v1, SpatialComparable v2) {
- if (v1 instanceof NumberVector<?> && v2 instanceof NumberVector<?>) {
+ if(v1 instanceof NumberVector<?> && v2 instanceof NumberVector<?>) {
return cosAngle((NumberVector<?>) v1, (NumberVector<?>) v2);
}
+ final int dim1 = v1.getDimensionality(), dim2 = v1.getDimensionality();
+ final int mindim = (dim1 <= dim2) ? dim1 : dim2;
// Essentially, we want to compute this:
// absmax(v1.transposeTimes(v2))/(min(v1.euclideanLength())*min(v2.euclideanLength()));
// We can just compute all three in parallel.
- final int dim = v1.getDimensionality();
double s1 = 0, s2 = 0, e1 = 0, e2 = 0;
- for (int k = 0; k < dim; k++) {
+ for(int k = 0; k < mindim; k++) {
final double min1 = v1.getMin(k), max1 = v1.getMax(k);
final double min2 = v2.getMin(k), max2 = v2.getMax(k);
final double p1 = min1 * min2, p2 = min1 * max2;
final double p3 = max1 * min2, p4 = max1 * max2;
s1 += Math.max(Math.max(p1, p2), Math.max(p3, p4));
s2 += Math.min(Math.min(p1, p2), Math.min(p3, p4));
- if (max1 < 0) {
+ if(max1 < 0) {
+ e1 += max1 * max1;
+ }
+ else if(min1 > 0) {
+ e1 += min1 * min1;
+ } // else: 0
+ if(max2 < 0) {
+ e2 += max2 * max2;
+ }
+ else if(min2 > 0) {
+ e2 += min2 * min2;
+ } // else: 0
+ }
+ for(int k = mindim; k < dim1; k++) {
+ final double min1 = v1.getMin(k), max1 = v1.getMax(k);
+ if(max1 < 0.) {
e1 += max1 * max1;
- } else if (min1 > 0) {
+ }
+ else if(min1 > 0.) {
e1 += min1 * min1;
} // else: 0
- if (max2 < 0) {
+ }
+ for(int k = mindim; k < dim2; k++) {
+ final double min2 = v2.getMin(k), max2 = v2.getMax(k);
+ if(max2 < 0.) {
e2 += max2 * max2;
- } else if (min2 > 0) {
+ }
+ else if(min2 > 0.) {
e2 += min2 * min2;
} // else: 0
}
final double s = Math.max(s1, Math.abs(s2));
final double a = Math.sqrt((s / e1) * (s / e2));
- return (a > 1.0) ? 1.0 : a;
+ return (a > 1.) ? 1. : a;
}
/**
@@ -286,8 +340,8 @@ public final class VectorUtil {
*/
public static double scalarProduct(NumberVector<?> d1, NumberVector<?> d2) {
final int dim = d1.getDimensionality();
- double result = 0.0;
- for (int i = 0; i < dim; i++) {
+ double result = 0.;
+ for(int i = 0; i < dim; i++) {
result += d1.doubleValue(i) * d2.doubleValue(i);
}
return result;
@@ -305,7 +359,7 @@ public final class VectorUtil {
ArrayModifiableDBIDs mids = DBIDUtil.newArray(sample);
SortDBIDsBySingleDimension s = new SortDBIDsBySingleDimension(relation);
Vector medoid = new Vector(dim);
- for (int d = 0; d < dim; d++) {
+ for(int d = 0; d < dim; d++) {
s.setDimension(d);
medoid.set(d, relation.get(QuickSelect.median(mids, s)).doubleValue(d));
}
@@ -326,10 +380,10 @@ public final class VectorUtil {
final int cdim = mat.getColumnDimensionality();
final double[] X = new double[elements.length];
// multiply it with each row from A
- for (int i = 0; i < elements.length; i++) {
+ for(int i = 0; i < elements.length; i++) {
final double[] Arowi = elements[i];
double s = 0;
- for (int k = 0; k < cdim; k++) {
+ for(int k = 0; k < cdim; k++) {
s += Arowi[k] * v.doubleValue(k);
}
X[i] = s;
@@ -359,6 +413,18 @@ public final class VectorUtil {
* Constructor.
*
* @param data Vector data source
+ * @param dim Dimension to sort by
+ */
+ public SortDBIDsBySingleDimension(Relation<? extends NumberVector<?>> data, int dim) {
+ super();
+ this.data = data;
+ this.d = dim;
+ };
+
+ /**
+ * Constructor.
+ *
+ * @param data Vector data source
*/
public SortDBIDsBySingleDimension(Relation<? extends NumberVector<?>> data) {
super();
@@ -404,6 +470,16 @@ public final class VectorUtil {
/**
* Constructor.
+ *
+ * @param dim Dimension to sort by.
+ */
+ public SortVectorsBySingleDimension(int dim) {
+ super();
+ this.d = dim;
+ };
+
+ /**
+ * Constructor.
*/
public SortVectorsBySingleDimension() {
super();
@@ -443,11 +519,11 @@ public final class VectorUtil {
* @return a new NumberVector as a projection on the specified attributes
*/
public static <V extends NumberVector<?>> V project(V v, BitSet selectedAttributes, NumberVector.Factory<V, ?> factory) {
- if (factory instanceof SparseNumberVector.Factory) {
+ if(factory instanceof SparseNumberVector.Factory) {
final SparseNumberVector.Factory<?, ?> sfactory = (SparseNumberVector.Factory<?, ?>) factory;
TIntDoubleHashMap values = new TIntDoubleHashMap(selectedAttributes.cardinality(), 1);
- for (int d = selectedAttributes.nextSetBit(0); d >= 0; d = selectedAttributes.nextSetBit(d + 1)) {
- if (v.doubleValue(d) != 0.0) {
+ for(int d = selectedAttributes.nextSetBit(0); d >= 0; d = selectedAttributes.nextSetBit(d + 1)) {
+ if(v.doubleValue(d) != 0.0) {
values.put(d, v.doubleValue(d));
}
}
@@ -456,10 +532,11 @@ public final class VectorUtil {
@SuppressWarnings("unchecked")
V projectedVector = (V) sfactory.newNumberVector(values, selectedAttributes.cardinality());
return projectedVector;
- } else {
+ }
+ else {
double[] newAttributes = new double[selectedAttributes.cardinality()];
int i = 0;
- for (int d = selectedAttributes.nextSetBit(0); d >= 0; d = selectedAttributes.nextSetBit(d + 1)) {
+ for(int d = selectedAttributes.nextSetBit(0); d >= 0; d = selectedAttributes.nextSetBit(d + 1)) {
newAttributes[i] = v.doubleValue(d);
i++;
}
diff --git a/src/de/lmu/ifi/dbs/elki/data/images/ComputeHSBColorHistogram.java b/src/de/lmu/ifi/dbs/elki/data/images/ComputeHSBColorHistogram.java
index 96f3bf73..53dd6496 100644
--- a/src/de/lmu/ifi/dbs/elki/data/images/ComputeHSBColorHistogram.java
+++ b/src/de/lmu/ifi/dbs/elki/data/images/ComputeHSBColorHistogram.java
@@ -29,8 +29,7 @@ import java.util.List;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.WrongParameterValueException;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.ListEachConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.ListSizeConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntListParameter;
@@ -127,7 +126,7 @@ public class ComputeHSBColorHistogram extends AbstractComputeColorHistogram {
super.makeOptions(config);
final IntListParameter param = new IntListParameter(BINSPERPLANE_ID);
param.addConstraint(new ListSizeConstraint(3));
- param.addConstraint(new ListEachConstraint<Integer>(new GreaterEqualConstraint(1)));
+ param.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT_LIST);
if(config.grab(param)) {
List<Integer> quant = param.getValue();
diff --git a/src/de/lmu/ifi/dbs/elki/data/model/Bicluster.java b/src/de/lmu/ifi/dbs/elki/data/model/Bicluster.java
deleted file mode 100644
index 27cdef74..00000000
--- a/src/de/lmu/ifi/dbs/elki/data/model/Bicluster.java
+++ /dev/null
@@ -1,194 +0,0 @@
-package de.lmu.ifi.dbs.elki.data.model;
-
-/*
- This file is part of ELKI:
- Environment for Developing KDD-Applications Supported by Index-Structures
-
- Copyright (C) 2013
- Ludwig-Maximilians-Universität München
- Lehr- und Forschungseinheit für Datenbanksysteme
- ELKI Development Team
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-import java.util.Arrays;
-import java.util.Iterator;
-
-import de.lmu.ifi.dbs.elki.data.FeatureVector;
-import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.relation.Relation;
-import de.lmu.ifi.dbs.elki.result.textwriter.TextWriteable;
-import de.lmu.ifi.dbs.elki.result.textwriter.TextWriterStream;
-import de.lmu.ifi.dbs.elki.utilities.FormatUtil;
-import de.lmu.ifi.dbs.elki.utilities.exceptions.ExceptionMessages;
-
-/**
- * Wrapper class to provide the basic properties of a bicluster.
- *
- * @author Arthur Zimek
- * @param <V> the type of NumberVector handled by this Result
- */
-public class Bicluster<V extends FeatureVector<?>> implements TextWriteable, Model {
- /**
- * The ids of the rows included in the bicluster.
- */
- private ArrayDBIDs rowIDs;
-
- /**
- * The ids of the rows included in the bicluster.
- */
- private int[] colIDs;
-
- /**
- * The database this bicluster is defined for.
- */
- private Relation<V> database;
-
- /**
- * Defines a new bicluster for given parameters.
- *
- * @param rowIDs the ids of the rows included in the bicluster
- * @param colIDs the ids of the columns included in the bicluster
- * @param database the database this bicluster is defined for
- */
- public Bicluster(ArrayDBIDs rowIDs, int[] colIDs, Relation<V> database) {
- this.rowIDs = rowIDs;
- this.colIDs = colIDs;
- this.database = database;
- }
-
- /**
- * Defines a new bicluster for given parameters.
- *
- * @param rowIDs the ids of the rows included in the bicluster
- * @param colIDs the ids of the columns included in the bicluster
- * @param database the database this bicluster is defined for
- *
- * @deprecated Use DBIDs, not integers!
- */
- @Deprecated
- public Bicluster(int[] rowIDs, int[] colIDs, Relation<V> database) {
- ArrayModifiableDBIDs ids = DBIDUtil.newArray(rowIDs.length);
- for(int rowid : rowIDs) {
- ids.add(DBIDUtil.importInteger(rowid));
- }
- this.rowIDs = ids;
- this.colIDs = colIDs;
- this.database = database;
- }
-
- /**
- * Sorts the row and column ids in ascending order.
- */
- public void sortIDs() {
- if(!(this.rowIDs instanceof ModifiableDBIDs)) {
- this.rowIDs = DBIDUtil.newArray(this.rowIDs);
- }
- ((ArrayModifiableDBIDs) this.rowIDs).sort();
- Arrays.sort(this.colIDs);
- }
-
- /**
- * The size of the cluster.
- * <p/>
- * The size of a bicluster is the number of included rows.
- *
- * @return the size of the bicluster, i.e., the number or rows included in the
- * bicluster
- */
- public int size() {
- return rowIDs.size();
- }
-
- /**
- * Provides an iterator for the row ids.
- * <p/>
- * Note that the iterator is not guaranteed to touch all elements if the
- * {@link #sortIDs()} is called during the lifetime of the iterator.
- *
- * @return an iterator for the row ids
- */
- public Iterator<V> rowIterator() {
- return new Iterator<V>() {
- private int index = -1;
-
- @Override
- public boolean hasNext() {
- return index + 1 < size();
- }
-
- @Override
- @SuppressWarnings("synthetic-access")
- public V next() {
- return database.get(rowIDs.get(++index));
- }
-
- @Override
- public void remove() {
- throw new UnsupportedOperationException(ExceptionMessages.UNSUPPORTED_REMOVE);
- }
-
- };
- }
-
- /**
- * Creates a DBIDs for the row IDs included in this Bicluster.
- *
- *
- * @return a DBIDs for the row IDs included in this Bicluster
- */
- public DBIDs getDatabaseObjectGroup() {
- return this.rowIDs;
- }
-
- /**
- * Getter to retrieve the database
- *
- * @return Database
- */
- public Relation<V> getDatabase() {
- return database;
- }
-
- /**
- * Provides a copy of the column IDs contributing to the bicluster.
- *
- * @return a copy of the columnsIDs
- */
- public int[] getColumnIDs() {
- int[] columnIDs = new int[colIDs.length];
- System.arraycopy(colIDs, 0, columnIDs, 0, colIDs.length);
- return columnIDs;
- }
-
- /**
- * Implementation of {@link TextWriteable} interface.
- */
- @Override
- public void writeToText(TextWriterStream out, String label) {
- if(label != null) {
- out.commentPrintLn(label);
- }
- out.commentPrintLn("Serialization class: " + this.getClass().getName());
- out.commentPrintLn("Cluster size: " + size());
- out.commentPrintLn("Cluster dimensions: " + colIDs.length);
- out.commentPrintLn("Included row IDs: " + rowIDs.toString());
- out.commentPrintLn("Included column IDs: " + FormatUtil.format(colIDs));
- }
-}
diff --git a/src/de/lmu/ifi/dbs/elki/data/model/BiclusterModel.java b/src/de/lmu/ifi/dbs/elki/data/model/BiclusterModel.java
new file mode 100644
index 00000000..d3be1b66
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/data/model/BiclusterModel.java
@@ -0,0 +1,54 @@
+package de.lmu.ifi.dbs.elki.data.model;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/**
+ * Wrapper class to provide the basic properties of a Bicluster.
+ *
+ * @author Arthur Zimek
+ */
+public class BiclusterModel implements Model {
+ /**
+ * The column numbers included in the Bicluster.
+ */
+ private int[] colIDs;
+
+ /**
+ * Defines a new Bicluster for given parameters.
+ *
+ * @param colIDs the numbers of the columns included in the Bicluster
+ */
+ public BiclusterModel(int[] colIDs) {
+ this.colIDs = colIDs;
+ }
+
+ /**
+ * Provides a copy of the column IDs contributing to the Bicluster.
+ *
+ * @return a copy of the columnsIDs
+ */
+ public int[] getColumnIDs() {
+ return colIDs.clone();
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/data/model/BiclusterWithInversionsModel.java b/src/de/lmu/ifi/dbs/elki/data/model/BiclusterWithInversionsModel.java
new file mode 100644
index 00000000..5f4c1bba
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/data/model/BiclusterWithInversionsModel.java
@@ -0,0 +1,66 @@
+package de.lmu.ifi.dbs.elki.data.model;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+
+/**
+ * This code was factored out of the Bicluster class, since not all biclusters
+ * have inverted rows.
+ *
+ * @author Erich Schubert
+ */
+public class BiclusterWithInversionsModel extends BiclusterModel {
+ /**
+ * The ids of inverted rows.
+ */
+ private DBIDs invertedRows = null;
+
+ /**
+ * @param colIDs Col IDs
+ */
+ public BiclusterWithInversionsModel(int[] colIDs, DBIDs invertedRows) {
+ super(colIDs);
+ this.invertedRows = invertedRows;
+ }
+
+ /**
+ * Sets the ids of the inverted rows.
+ *
+ * @param invertedRows the ids of the inverted rows
+ */
+ public void setInvertedRows(DBIDs invertedRows) {
+ this.invertedRows = DBIDUtil.makeUnmodifiable(invertedRows);
+ }
+
+ /**
+ * Provides a copy of the inverted column IDs.
+ *
+ * @return a copy of the inverted column IDs.
+ */
+ public DBIDs getInvertedRows() {
+ return invertedRows;
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/data/model/BiclusterWithInverted.java b/src/de/lmu/ifi/dbs/elki/data/model/BiclusterWithInverted.java
deleted file mode 100644
index 12d4f18b..00000000
--- a/src/de/lmu/ifi/dbs/elki/data/model/BiclusterWithInverted.java
+++ /dev/null
@@ -1,110 +0,0 @@
-package de.lmu.ifi.dbs.elki.data.model;
-
-/*
- This file is part of ELKI:
- Environment for Developing KDD-Applications Supported by Index-Structures
-
- Copyright (C) 2013
- Ludwig-Maximilians-Universität München
- Lehr- und Forschungseinheit für Datenbanksysteme
- ELKI Development Team
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-import de.lmu.ifi.dbs.elki.data.FeatureVector;
-import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.relation.Relation;
-import de.lmu.ifi.dbs.elki.result.textwriter.TextWriteable;
-import de.lmu.ifi.dbs.elki.result.textwriter.TextWriterStream;
-
-/**
- * This code was factored out of the Bicluster class, since not all biclusters
- * have inverted rows. TODO: shouldn't that be inverted columns?
- *
- * @author Erich Schubert
- *
- * @param <V> Vector type
- */
-public class BiclusterWithInverted<V extends FeatureVector<?>> extends Bicluster<V> {
- /**
- * The ids of inverted rows.
- */
- private ArrayModifiableDBIDs invertedRows;
-
- /**
- * @param rowIDs Row IDs
- * @param colIDs Col IDs
- * @param database Database
- */
- public BiclusterWithInverted(ArrayDBIDs rowIDs, int[] colIDs, Relation<V> database) {
- super(rowIDs, colIDs, database);
- }
-
- /**
- * @param rowIDs Row IDs
- * @param colIDs Col IDs
- * @param database Database
- *
- * @deprecated Use DBIDs, not integer indexes!
- */
- @Deprecated
- public BiclusterWithInverted(int[] rowIDs, int[] colIDs, Relation<V> database) {
- super(rowIDs, colIDs, database);
- }
-
- /**
- * Sets the ids of the inverted rows.
- *
- * @param invertedRows the ids of the inverted rows
- */
- public void setInvertedRows(DBIDs invertedRows) {
- this.invertedRows = DBIDUtil.newArray(invertedRows);
- }
-
- /**
- * Provides a copy of the inverted column IDs.
- *
- * @return a copy of the inverted column IDs.
- */
- public DBIDs getInvertedRows() {
- return DBIDUtil.makeUnmodifiable(invertedRows);
- }
-
- /**
- * Sorts the row and column ids (and - if applicable - the ids of inverted
- * rows) in ascending order.
- */
- @Override
- public void sortIDs() {
- super.sortIDs();
- if(this.invertedRows != null) {
- this.invertedRows.sort();
- }
- }
-
- /**
- * Implementation of {@link TextWriteable} interface.
- */
- @Override
- public void writeToText(TextWriterStream out, String label) {
- super.writeToText(out, label);
- if(this.invertedRows != null) {
- out.commentPrintLn("inverted rows (row IDs): " + this.invertedRows.toString());
- }
- }
-} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/data/projection/FeatureSelection.java b/src/de/lmu/ifi/dbs/elki/data/projection/FeatureSelection.java
index f60f1145..f6fb6a6a 100644
--- a/src/de/lmu/ifi/dbs/elki/data/projection/FeatureSelection.java
+++ b/src/de/lmu/ifi/dbs/elki/data/projection/FeatureSelection.java
@@ -36,8 +36,7 @@ import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayLikeUtil;
import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.SubsetArrayAdapter;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.ListEachConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntListParameter;
@@ -80,7 +79,7 @@ public class FeatureSelection<V extends FeatureVector<F>, F> implements Projecti
this.dimensionality = dims.length;
int mind = 0;
- for (int dim : dims) {
+ for(int dim : dims) {
mind = Math.max(mind, dim + 1);
}
this.mindim = mind;
@@ -91,7 +90,7 @@ public class FeatureSelection<V extends FeatureVector<F>, F> implements Projecti
public void initialize(SimpleTypeInformation<V> in) {
final VectorFieldTypeInformation<V> vin = (VectorFieldTypeInformation<V>) in;
factory = (FeatureVector.Factory<V, F>) vin.getFactory();
- if (vin.getDimensionality() < mindim) {
+ if(vin.getDimensionality() < mindim) {
throw new AbortException("Data does not have enough dimensions for this projection!");
}
}
@@ -111,7 +110,7 @@ public class FeatureSelection<V extends FeatureVector<F>, F> implements Projecti
*/
@SuppressWarnings("unchecked")
private static <V extends FeatureVector<F>, F> ArrayAdapter<F, ? super V> getAdapter(Factory<V, F> factory) {
- if (factory instanceof NumberVector.Factory) {
+ if(factory instanceof NumberVector.Factory) {
return (ArrayAdapter<F, ? super V>) ArrayLikeUtil.NUMBERVECTORADAPTER;
}
return (ArrayAdapter<F, ? super V>) ArrayLikeUtil.FEATUREVECTORADAPTER;
@@ -145,10 +144,10 @@ public class FeatureSelection<V extends FeatureVector<F>, F> implements Projecti
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
-
+
IntListParameter selectedAttributesP = new IntListParameter(NumberVectorFeatureSelectionFilter.Parameterizer.SELECTED_ATTRIBUTES_ID);
- selectedAttributesP.addConstraint(new ListEachConstraint<Integer>(new GreaterEqualConstraint(0)));
- if (config.grab(selectedAttributesP)) {
+ selectedAttributesP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_INT_LIST);
+ if(config.grab(selectedAttributesP)) {
dims = ArrayLikeUtil.toPrimitiveIntegerArray(selectedAttributesP.getValue());
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/data/projection/NumericalFeatureSelection.java b/src/de/lmu/ifi/dbs/elki/data/projection/NumericalFeatureSelection.java
index ea03888b..158866c1 100644
--- a/src/de/lmu/ifi/dbs/elki/data/projection/NumericalFeatureSelection.java
+++ b/src/de/lmu/ifi/dbs/elki/data/projection/NumericalFeatureSelection.java
@@ -33,8 +33,7 @@ import de.lmu.ifi.dbs.elki.data.type.VectorTypeInformation;
import de.lmu.ifi.dbs.elki.datasource.filter.transform.NumberVectorFeatureSelectionFilter;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.ListEachConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntListParameter;
@@ -79,7 +78,7 @@ public class NumericalFeatureSelection<V extends NumberVector<?>> implements Pro
this.dimensionality = bits.cardinality();
int mind = 0;
- for (int i = bits.nextSetBit(0); i >= 0; i = bits.nextSetBit(i + 1)) {
+ for(int i = bits.nextSetBit(0); i >= 0; i = bits.nextSetBit(i + 1)) {
mind = Math.max(mind, i + 1);
}
this.mindim = mind;
@@ -89,7 +88,7 @@ public class NumericalFeatureSelection<V extends NumberVector<?>> implements Pro
public void initialize(SimpleTypeInformation<V> in) {
final VectorFieldTypeInformation<V> vin = (VectorFieldTypeInformation<V>) in;
factory = (NumberVector.Factory<V, ?>) vin.getFactory();
- if (vin.getDimensionality() < mindim) {
+ if(vin.getDimensionality() < mindim) {
throw new AbortException("Data does not have enough dimensions for this projection!");
}
}
@@ -97,7 +96,7 @@ public class NumericalFeatureSelection<V extends NumberVector<?>> implements Pro
@Override
public V project(V data) {
double[] dbl = new double[dimensionality];
- for (int i = bits.nextSetBit(0), j = 0; i >= 0; i = bits.nextSetBit(i + 1), j++) {
+ for(int i = bits.nextSetBit(0), j = 0; i >= 0; i = bits.nextSetBit(i + 1), j++) {
dbl[j] = data.doubleValue(i);
}
return factory.newNumberVector(dbl);
@@ -129,12 +128,12 @@ public class NumericalFeatureSelection<V extends NumberVector<?>> implements Pro
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
-
+
IntListParameter selectedAttributesP = new IntListParameter(NumberVectorFeatureSelectionFilter.Parameterizer.SELECTED_ATTRIBUTES_ID);
- selectedAttributesP.addConstraint(new ListEachConstraint<Integer>(new GreaterEqualConstraint(0)));
- if (config.grab(selectedAttributesP)) {
+ selectedAttributesP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_INT_LIST);
+ if(config.grab(selectedAttributesP)) {
dims.clear();
- for (int in : selectedAttributesP.getValue()) {
+ for(int in : selectedAttributesP.getValue()) {
dims.set(in);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/data/projection/RandomProjection.java b/src/de/lmu/ifi/dbs/elki/data/projection/RandomProjection.java
index 74edd3e1..7400a3f0 100644
--- a/src/de/lmu/ifi/dbs/elki/data/projection/RandomProjection.java
+++ b/src/de/lmu/ifi/dbs/elki/data/projection/RandomProjection.java
@@ -32,7 +32,7 @@ import de.lmu.ifi.dbs.elki.math.linearalgebra.randomprojections.AchlioptasRandom
import de.lmu.ifi.dbs.elki.math.linearalgebra.randomprojections.RandomProjectionFamily;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
@@ -94,7 +94,7 @@ public class RandomProjection<V extends NumberVector<?>> implements Projection<V
int inputdim = vin.getDimensionality();
projection = family.generateProjection(inputdim, dimensionality);
- if (LOG.isDebugging()) {
+ if(LOG.isDebugging()) {
LOG.debug(projection.toString());
}
}
@@ -147,13 +147,13 @@ public class RandomProjection<V extends NumberVector<?>> implements Projection<V
super.makeOptions(config);
ObjectParameter<RandomProjectionFamily> familyP = new ObjectParameter<>(FAMILY_ID, RandomProjectionFamily.class);
familyP.setDefaultValue(AchlioptasRandomProjectionFamily.class);
- if (config.grab(familyP)) {
+ if(config.grab(familyP)) {
family = familyP.instantiateClass(config);
}
IntParameter dimP = new IntParameter(DIMENSIONALITY_ID);
- dimP.addConstraint(new GreaterEqualConstraint(1));
- if (config.grab(dimP)) {
+ dimP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
+ if(config.grab(dimP)) {
dimensionality = dimP.intValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/data/spatial/SpatialUtil.java b/src/de/lmu/ifi/dbs/elki/data/spatial/SpatialUtil.java
index 3d50f6e2..50ea7243 100644
--- a/src/de/lmu/ifi/dbs/elki/data/spatial/SpatialUtil.java
+++ b/src/de/lmu/ifi/dbs/elki/data/spatial/SpatialUtil.java
@@ -25,7 +25,6 @@ package de.lmu.ifi.dbs.elki.data.spatial;
import de.lmu.ifi.dbs.elki.data.HyperBoundingBox;
import de.lmu.ifi.dbs.elki.data.ModifiableHyperBoundingBox;
-import de.lmu.ifi.dbs.elki.logging.LoggingConfiguration;
import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayAdapter;
/**
@@ -47,7 +46,23 @@ public final class SpatialUtil {
private SpatialUtil() {
// Do not instantiate.
}
-
+
+ /**
+ * Check that two spatial objects have the same dimensionality.
+ *
+ * @param box1 First object
+ * @param box2 Second object
+ * @return Dimensionality
+ * @throws IllegalArgumentException when the dimensionalities do not agree
+ */
+ public static int assertSameDimensionality(SpatialComparable box1, SpatialComparable box2) {
+ final int dim = box1.getDimensionality();
+ if (dim != box2.getDimensionality()) {
+ throw new IllegalArgumentException("The spatial objects do not have the same dimensionality!");
+ }
+ return dim;
+ }
+
/**
* Returns a clone of the minimum hyper point.
*
@@ -57,7 +72,7 @@ public final class SpatialUtil {
public static double[] getMin(SpatialComparable box) {
final int dim = box.getDimensionality();
double[] min = new double[dim];
- for(int i = 0; i < dim; i++) {
+ for (int i = 0; i < dim; i++) {
min[i] = box.getMin(i);
}
return min;
@@ -72,7 +87,7 @@ public final class SpatialUtil {
public static double[] getMax(SpatialComparable box) {
final int dim = box.getDimensionality();
double[] max = new double[dim];
- for(int i = 0; i < dim; i++) {
+ for (int i = 0; i < dim; i++) {
max[i] = box.getMax(i);
}
return max;
@@ -86,18 +101,13 @@ public final class SpatialUtil {
* @return true if the SpatialComparables intersect, false otherwise
*/
public static boolean intersects(SpatialComparable box1, SpatialComparable box2) {
- final int dim = box1.getDimensionality();
- if(dim != box2.getDimensionality()) {
- throw new IllegalArgumentException("The spatial objects do not have the same dimensionality: " + box1.getDimensionality() + " " + box2.getDimensionality());
- }
- boolean intersect = true;
- for(int i = 0; i < dim; i++) {
- if(box1.getMin(i) > box2.getMax(i) || box1.getMax(i) < box2.getMin(i)) {
- intersect = false;
- break;
+ final int dim = assertSameDimensionality(box1, box2);
+ for (int i = 0; i < dim; i++) {
+ if (box2.getMax(i) < box1.getMin(i) || box1.getMax(i) < box2.getMin(i)) {
+ return false;
}
}
- return intersect;
+ return true;
}
/**
@@ -110,19 +120,13 @@ public final class SpatialUtil {
* SpatialComparable, false otherwise
*/
public static boolean contains(SpatialComparable box1, SpatialComparable box2) {
- final int dim = box1.getDimensionality();
- if(dim != box2.getDimensionality()) {
- throw new IllegalArgumentException("The spatial objects do not have the same dimensionality!");
- }
-
- boolean contains = true;
- for(int i = 0; i < dim; i++) {
- if(box1.getMin(i) > box2.getMin(i) || box1.getMax(i) < box2.getMax(i)) {
- contains = false;
- break;
+ final int dim = assertSameDimensionality(box1, box2);
+ for (int i = 0; i < dim; i++) {
+ if (box2.getMin(i) < box1.getMin(i) || box1.getMax(i) < box2.getMax(i)) {
+ return false;
}
}
- return contains;
+ return true;
}
/**
@@ -136,18 +140,16 @@ public final class SpatialUtil {
*/
public static boolean contains(SpatialComparable box, double[] point) {
final int dim = box.getDimensionality();
- if(dim != point.length) {
+ if (dim != point.length) {
throw new IllegalArgumentException("This HyperBoundingBox and the given point need same dimensionality");
}
- boolean contains = true;
- for(int i = 0; i < dim; i++) {
- if(box.getMin(i) > point[i] || box.getMax(i) < point[i]) {
- contains = false;
- break;
+ for (int i = 0; i < dim; i++) {
+ if (box.getMin(i) > point[i] || box.getMax(i) < point[i]) {
+ return false;
}
}
- return contains;
+ return true;
}
/**
@@ -157,12 +159,12 @@ public final class SpatialUtil {
* @return the volume of this SpatialComparable
*/
public static double volume(SpatialComparable box) {
- double vol = 1;
final int dim = box.getDimensionality();
- for(int i = 0; i < dim; i++) {
+ double vol = 1.;
+ for (int i = 0; i < dim; i++) {
double delta = box.getMax(i) - box.getMin(i);
- if(delta == 0.0) {
- return 0.0;
+ if (delta == 0.) {
+ return 0.;
}
vol *= delta;
}
@@ -172,18 +174,16 @@ public final class SpatialUtil {
/**
* Compute the volume (area) of the union of two MBRs.
*
- * @param r1 First object
- * @param r2 Second object
+ * @param box1 First object
+ * @param box2 Second object
* @return Volume of union
*/
- public static double volumeUnion(SpatialComparable r1, SpatialComparable r2) {
- final int dim1 = r1.getDimensionality();
- final int dim2 = r2.getDimensionality();
- assert (!LoggingConfiguration.DEBUG || dim1 == dim2) : "Computing union with different dimensionality: " + dim1 + " vs. " + dim2;
- double volume = 1.0;
- for(int i = 0; i < dim1; i++) {
- final double min = Math.min(r1.getMin(i), r2.getMin(i));
- final double max = Math.max(r1.getMax(i), r2.getMax(i));
+ public static double volumeUnion(SpatialComparable box1, SpatialComparable box2) {
+ final int dim = assertSameDimensionality(box1, box2);
+ double volume = 1.;
+ for (int i = 0; i < dim; i++) {
+ final double min = Math.min(box1.getMin(i), box2.getMin(i));
+ final double max = Math.max(box1.getMax(i), box2.getMax(i));
volume *= (max - min);
}
return volume;
@@ -197,12 +197,12 @@ public final class SpatialUtil {
* @return the volume of this SpatialComparable
*/
public static double volumeScaled(SpatialComparable box, double scale) {
- double vol = 1;
final int dim = box.getDimensionality();
- for(int i = 0; i < dim; i++) {
+ double vol = 1.;
+ for (int i = 0; i < dim; i++) {
double delta = box.getMax(i) - box.getMin(i);
- if(delta == 0.0) {
- return 0.0;
+ if (delta == 0.) {
+ return 0.;
}
vol *= delta * scale;
}
@@ -212,19 +212,17 @@ public final class SpatialUtil {
/**
* Compute the volume (area) of the union of two MBRs.
*
- * @param r1 First object
- * @param r2 Second object
+ * @param box1 First object
+ * @param box2 Second object
* @param scale Scaling factor
* @return Volume of union
*/
- public static double volumeUnionScaled(SpatialComparable r1, SpatialComparable r2, double scale) {
- final int dim1 = r1.getDimensionality();
- final int dim2 = r2.getDimensionality();
- assert (!LoggingConfiguration.DEBUG || dim1 == dim2) : "Computing union with different dimensionality: " + dim1 + " vs. " + dim2;
- double volume = 1.0;
- for(int i = 0; i < dim1; i++) {
- final double min = Math.min(r1.getMin(i), r2.getMin(i));
- final double max = Math.max(r1.getMax(i), r2.getMax(i));
+ public static double volumeUnionScaled(SpatialComparable box1, SpatialComparable box2, double scale) {
+ final int dim = assertSameDimensionality(box1, box2);
+ double volume = 1.;
+ for (int i = 0; i < dim; i++) {
+ final double min = Math.min(box1.getMin(i), box2.getMin(i));
+ final double max = Math.max(box1.getMax(i), box2.getMax(i));
volume *= (max - min) * scale;
}
return volume;
@@ -238,12 +236,10 @@ public final class SpatialUtil {
* @return Enlargement factor
*/
public static double enlargement(SpatialComparable exist, SpatialComparable addit) {
- final int dim1 = exist.getDimensionality();
- final int dim2 = addit.getDimensionality();
- assert (!LoggingConfiguration.DEBUG || dim1 == dim2) : "Computing union with different dimensionality: " + dim1 + " vs. " + dim2;
- double v1 = 1.0;
- double v2 = 1.0;
- for(int i = 0; i < dim1; i++) {
+ final int dim = assertSameDimensionality(exist, addit);
+ double v1 = 1.;
+ double v2 = 1.;
+ for (int i = 0; i < dim; i++) {
final double emin = exist.getMin(i);
final double emax = exist.getMax(i);
final double amin = addit.getMin(i);
@@ -266,12 +262,10 @@ public final class SpatialUtil {
* @return Enlargement factor
*/
public static double enlargementScaled(SpatialComparable exist, SpatialComparable addit, double scale) {
- final int dim1 = exist.getDimensionality();
- final int dim2 = addit.getDimensionality();
- assert (!LoggingConfiguration.DEBUG || dim1 == dim2) : "Computing union with different dimensionality: " + dim1 + " vs. " + dim2;
- double v1 = 1.0;
- double v2 = 1.0;
- for(int i = 0; i < dim1; i++) {
+ final int dim = assertSameDimensionality(exist, addit);
+ double v1 = 1.;
+ double v2 = 1.;
+ for (int i = 0; i < dim; i++) {
final double emin = exist.getMin(i);
final double emax = exist.getMax(i);
final double amin = addit.getMin(i);
@@ -293,8 +287,8 @@ public final class SpatialUtil {
*/
public static double perimeter(SpatialComparable box) {
final int dim = box.getDimensionality();
- double perimeter = 0;
- for(int i = 0; i < dim; i++) {
+ double perimeter = 0.;
+ for (int i = 0; i < dim; i++) {
perimeter += box.getMax(i) - box.getMin(i);
}
return perimeter;
@@ -308,18 +302,15 @@ public final class SpatialUtil {
* @return the overlap volume.
*/
public static double overlap(SpatialComparable box1, SpatialComparable box2) {
- final int dim = box1.getDimensionality();
- if(dim != box2.getDimensionality()) {
- throw new IllegalArgumentException("This HyperBoundingBox and the given HyperBoundingBox need same dimensionality");
- }
+ final int dim = assertSameDimensionality(box1, box2);
// the maximal and minimal value of the overlap box.
double omax, omin;
// the overlap volume
- double overlap = 1.0;
+ double overlap = 1.;
- for(int i = 0; i < dim; i++) {
+ for (int i = 0; i < dim; i++) {
// The maximal value of that overlap box in the current
// dimension is the minimum of the max values.
omax = Math.min(box1.getMax(i), box2.getMax(i));
@@ -327,8 +318,8 @@ public final class SpatialUtil {
omin = Math.max(box1.getMin(i), box2.getMin(i));
// if omax <= omin in any dimension, the overlap box has a volume of zero
- if(omax <= omin) {
- return 0.0;
+ if (omax <= omin) {
+ return 0.;
}
overlap *= omax - omin;
@@ -347,17 +338,14 @@ public final class SpatialUtil {
* @return the overlap volume in relation to the singular volumes.
*/
public static double relativeOverlap(SpatialComparable box1, SpatialComparable box2) {
- final int dim = box1.getDimensionality();
- if(dim != box2.getDimensionality()) {
- throw new IllegalArgumentException("This HyperBoundingBox and the given HyperBoundingBox need same dimensionality");
- }
+ final int dim = assertSameDimensionality(box1, box2);
// the overlap volume
- double overlap = 1.0;
- double vol1 = 1.0;
- double vol2 = 1.0;
+ double overlap = 1.;
+ double vol1 = 1.;
+ double vol2 = 1.;
- for(int i = 0; i < dim; i++) {
+ for (int i = 0; i < dim; i++) {
final double box1min = box1.getMin(i);
final double box1max = box1.getMax(i);
final double box2min = box2.getMin(i);
@@ -367,8 +355,8 @@ public final class SpatialUtil {
final double omin = Math.max(box1min, box2min);
// if omax <= omin in any dimension, the overlap box has a volume of zero
- if(omax <= omin) {
- return 0.0;
+ if (omax <= omin) {
+ return 0.;
}
overlap *= omax - omin;
@@ -388,15 +376,12 @@ public final class SpatialUtil {
* HyperBoundingBox
*/
public static ModifiableHyperBoundingBox union(SpatialComparable box1, SpatialComparable box2) {
- final int dim = box1.getDimensionality();
- if(dim != box2.getDimensionality()) {
- throw new IllegalArgumentException("This HyperBoundingBox and the given HyperBoundingBox need same dimensionality");
- }
+ final int dim = assertSameDimensionality(box1, box2);
double[] min = new double[dim];
double[] max = new double[dim];
- for(int i = 0; i < dim; i++) {
+ for (int i = 0; i < dim; i++) {
min[i] = Math.min(box1.getMin(i), box2.getMin(i));
max[i] = Math.max(box1.getMax(i), box2.getMax(i));
}
@@ -411,14 +396,14 @@ public final class SpatialUtil {
* @return the union of the two specified MBRs
*/
public static HyperBoundingBox unionTolerant(SpatialComparable mbr1, SpatialComparable mbr2) {
- if(mbr1 == null && mbr2 == null) {
+ if (mbr1 == null && mbr2 == null) {
return null;
}
- if(mbr1 == null) {
+ if (mbr1 == null) {
// Clone - intentionally
return new HyperBoundingBox(mbr2);
}
- if(mbr2 == null) {
+ if (mbr2 == null) {
// Clone - intentionally
return new HyperBoundingBox(mbr1);
}
@@ -444,14 +429,14 @@ public final class SpatialUtil {
final E first = getter.get(data, 0);
dim = first.getDimensionality();
mbr = new double[2 * dim];
- for(int d = 0; d < dim; d++) {
+ for (int d = 0; d < dim; d++) {
mbr[d] = first.getMin(d);
mbr[dim + d] = first.getMax(d);
}
} // Remaining entries
- for(int i = 1; i < num; i++) {
+ for (int i = 1; i < num; i++) {
E next = getter.get(data, i);
- for(int d = 0; d < dim; d++) {
+ for (int d = 0; d < dim; d++) {
mbr[d] = Math.min(mbr[d], next.getMin(d));
mbr[dim + d] = Math.max(mbr[dim + d], next.getMax(d));
}
@@ -468,8 +453,8 @@ public final class SpatialUtil {
public static double[] centroid(SpatialComparable obj) {
final int dim = obj.getDimensionality();
double[] centroid = new double[dim];
- for(int d = 0; d < dim; d++) {
- centroid[d] = (obj.getMax(d) + obj.getMin(d)) / 2.0;
+ for (int d = 0; d < dim; d++) {
+ centroid[d] = (obj.getMax(d) + obj.getMin(d)) * .5;
}
return centroid;
}
@@ -482,17 +467,17 @@ public final class SpatialUtil {
* @return true when the boxes are equal
*/
public static boolean equals(SpatialComparable box1, SpatialComparable box2) {
- if(box1.getDimensionality() != box2.getDimensionality()) {
+ if (box1.getDimensionality() != box2.getDimensionality()) {
return false;
}
- for(int i = 0; i < box1.getDimensionality(); i++) {
- if(box1.getMin(i) != box2.getMin(i)) {
+ for (int i = 0; i < box1.getDimensionality(); i++) {
+ if (box1.getMin(i) != box2.getMin(i)) {
return false;
}
- if(box1.getMax(i) != box2.getMax(i)) {
+ if (box1.getMax(i) != box2.getMax(i)) {
return false;
}
}
return true;
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/data/synthetic/bymodel/GeneratorSingleCluster.java b/src/de/lmu/ifi/dbs/elki/data/synthetic/bymodel/GeneratorSingleCluster.java
index ccbeb9b7..8ebe2ca6 100644
--- a/src/de/lmu/ifi/dbs/elki/data/synthetic/bymodel/GeneratorSingleCluster.java
+++ b/src/de/lmu/ifi/dbs/elki/data/synthetic/bymodel/GeneratorSingleCluster.java
@@ -40,7 +40,7 @@ import de.lmu.ifi.dbs.elki.utilities.exceptions.UnableToComplyException;
*
* @author Erich Schubert
*
- * @apiviz.composedOf DistributionWithRandom
+ * @apiviz.composedOf Distribution
* @apiviz.composedOf AffineTransformation
*/
public class GeneratorSingleCluster implements GeneratorInterfaceDynamic, Model {
diff --git a/src/de/lmu/ifi/dbs/elki/data/type/SimpleTypeInformation.java b/src/de/lmu/ifi/dbs/elki/data/type/SimpleTypeInformation.java
index dcf155db..9fdabd36 100644
--- a/src/de/lmu/ifi/dbs/elki/data/type/SimpleTypeInformation.java
+++ b/src/de/lmu/ifi/dbs/elki/data/type/SimpleTypeInformation.java
@@ -30,7 +30,7 @@ import de.lmu.ifi.dbs.elki.persistent.ByteBufferSerializer;
*
* @author Erich Schubert
*
- * @apiviz.composedOf ByteBuffererSerializer
+ * @apiviz.composedOf ByteBufferSerializer
*
* @param <T> Java type we represent.
*/
diff --git a/src/de/lmu/ifi/dbs/elki/data/type/TypeUtil.java b/src/de/lmu/ifi/dbs/elki/data/type/TypeUtil.java
index 4236740b..2eabf64b 100644
--- a/src/de/lmu/ifi/dbs/elki/data/type/TypeUtil.java
+++ b/src/de/lmu/ifi/dbs/elki/data/type/TypeUtil.java
@@ -27,6 +27,7 @@ import de.lmu.ifi.dbs.elki.data.BitVector;
import de.lmu.ifi.dbs.elki.data.ClassLabel;
import de.lmu.ifi.dbs.elki.data.DoubleVector;
import de.lmu.ifi.dbs.elki.data.ExternalID;
+import de.lmu.ifi.dbs.elki.data.FeatureVector;
import de.lmu.ifi.dbs.elki.data.FloatVector;
import de.lmu.ifi.dbs.elki.data.LabelList;
import de.lmu.ifi.dbs.elki.data.NumberVector;
@@ -59,7 +60,7 @@ public final class TypeUtil {
private TypeUtil() {
// Do not instantiate.
}
-
+
/**
* Input type for algorithms that accept anything.
*/
@@ -103,7 +104,8 @@ public final class TypeUtil {
/**
* Either class label, object labels or a string - anything that will be
* accepted by
- * {@link de.lmu.ifi.dbs.elki.utilities.DatabaseUtil#guessObjectLabelRepresentation}.
+ * {@link de.lmu.ifi.dbs.elki.utilities.DatabaseUtil#guessObjectLabelRepresentation}
+ * .
*/
public static final TypeInformation GUESSED_LABEL = new AlternativeTypeInformation(LABELLIST, CLASSLABEL, STRING);
@@ -196,6 +198,11 @@ public final class TypeUtil {
public static final SimpleTypeInformation<Model> MODEL = new SimpleTypeInformation<>(Model.class);
/**
+ * Any feature vector type.
+ */
+ public static final SimpleTypeInformation<FeatureVector<?>> FEATURE_VECTORS = new SimpleTypeInformation<>(FeatureVector.class);
+
+ /**
* Make a type array easily.
*
* @param ts Types
@@ -204,4 +211,4 @@ public final class TypeUtil {
public static TypeInformation[] array(TypeInformation... ts) {
return ts;
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/database/AbstractDatabase.java b/src/de/lmu/ifi/dbs/elki/database/AbstractDatabase.java
index 8b3a64eb..ef2374cc 100644
--- a/src/de/lmu/ifi/dbs/elki/database/AbstractDatabase.java
+++ b/src/de/lmu/ifi/dbs/elki/database/AbstractDatabase.java
@@ -53,6 +53,7 @@ import de.lmu.ifi.dbs.elki.index.RangeIndex;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.result.AbstractHierarchicalResult;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
/**
@@ -67,14 +68,6 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
*/
public abstract class AbstractDatabase extends AbstractHierarchicalResult implements Database {
/**
- * Parameter to specify the indexes to use.
- * <p>
- * Key: {@code -db.index}
- * </p>
- */
- public static final OptionID INDEX_ID = new OptionID("db.index", "Database indexes to add.");
-
- /**
* The event manager, collects events and fires them on demand.
*/
protected final DatabaseEventManager eventManager = new DatabaseEventManager();
@@ -302,5 +295,40 @@ public abstract class AbstractDatabase extends AbstractHierarchicalResult implem
return "database";
}
+ /**
+ * Get the class logger.
+ *
+ * @return Class logger
+ */
protected abstract Logging getLogger();
-} \ No newline at end of file
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public abstract static class Parameterizer extends AbstractParameterizer {
+ /**
+ * Option to specify the data source for the database.
+ *
+ * Key:
+ * <p>
+ * {@code -dbc}
+ * </p>
+ */
+ public static final OptionID DATABASE_CONNECTION_ID = new OptionID("dbc", "Database connection class.");
+
+ /**
+ * Parameter to specify the indexes to use.
+ * <p>
+ * Key: {@code -db.index}
+ * </p>
+ */
+ public static final OptionID INDEX_ID = new OptionID("db.index", "Database indexes to add.");
+
+ @Override
+ protected abstract Database makeInstance();
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/database/Database.java b/src/de/lmu/ifi/dbs/elki/database/Database.java
index da4cc827..10bc0eac 100644
--- a/src/de/lmu/ifi/dbs/elki/database/Database.java
+++ b/src/de/lmu/ifi/dbs/elki/database/Database.java
@@ -42,7 +42,6 @@ import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.distance.similarityfunction.SimilarityFunction;
import de.lmu.ifi.dbs.elki.index.Index;
import de.lmu.ifi.dbs.elki.result.HierarchicalResult;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
/**
* Database specifies the requirements for any database implementation. Note
@@ -62,16 +61,6 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
*/
public interface Database extends HierarchicalResult {
/**
- * Option to specify the data source for the database.
- *
- * Key:
- * <p>
- * {@code -dbc}
- * </p>
- */
- public static final OptionID DATABASE_CONNECTION_ID = new OptionID("dbc", "Database connection class.");
-
- /**
* Initialize the database, for example by loading the input data. (Since this
* should NOT be done on construction time!)
*/
diff --git a/src/de/lmu/ifi/dbs/elki/database/HashmapDatabase.java b/src/de/lmu/ifi/dbs/elki/database/HashmapDatabase.java
index e7106927..26f18bce 100644
--- a/src/de/lmu/ifi/dbs/elki/database/HashmapDatabase.java
+++ b/src/de/lmu/ifi/dbs/elki/database/HashmapDatabase.java
@@ -50,7 +50,6 @@ import de.lmu.ifi.dbs.elki.index.IndexFactory;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.Parameterizable;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectListParameter;
@@ -339,7 +338,7 @@ public class HashmapDatabase extends AbstractDatabase implements UpdatableDataba
*
* @apiviz.exclude
*/
- public static class Parameterizer extends AbstractParameterizer {
+ public static class Parameterizer extends AbstractDatabase.Parameterizer {
/**
* Holds the database connection to get the initial data from.
*/
@@ -354,7 +353,7 @@ public class HashmapDatabase extends AbstractDatabase implements UpdatableDataba
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
// Get database connection.
- final ObjectParameter<DatabaseConnection> dbcP = new ObjectParameter<>(Database.DATABASE_CONNECTION_ID, DatabaseConnection.class, FileBasedDatabaseConnection.class);
+ final ObjectParameter<DatabaseConnection> dbcP = new ObjectParameter<>(DATABASE_CONNECTION_ID, DatabaseConnection.class, FileBasedDatabaseConnection.class);
if (config.grab(dbcP)) {
databaseConnection = dbcP.instantiateClass(config);
}
diff --git a/src/de/lmu/ifi/dbs/elki/database/QueryUtil.java b/src/de/lmu/ifi/dbs/elki/database/QueryUtil.java
index 73259b5d..44afece1 100644
--- a/src/de/lmu/ifi/dbs/elki/database/QueryUtil.java
+++ b/src/de/lmu/ifi/dbs/elki/database/QueryUtil.java
@@ -25,13 +25,13 @@ package de.lmu.ifi.dbs.elki.database;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.distance.PrimitiveDistanceQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.DoubleOptimizedDistanceKNNQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.query.knn.LinearScanKNNQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.LinearScanDistanceKNNQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.LinearScanPrimitiveDistanceKNNQuery;
-import de.lmu.ifi.dbs.elki.database.query.knn.DoubleOptimizedKNNQuery;
+import de.lmu.ifi.dbs.elki.database.query.range.DoubleOptimizedDistanceRangeQuery;
+import de.lmu.ifi.dbs.elki.database.query.range.LinearScanDistanceRangeQuery;
import de.lmu.ifi.dbs.elki.database.query.range.LinearScanPrimitiveDistanceRangeQuery;
-import de.lmu.ifi.dbs.elki.database.query.range.LinearScanRangeQuery;
-import de.lmu.ifi.dbs.elki.database.query.range.DoubleOptimizedRangeQuery;
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
import de.lmu.ifi.dbs.elki.database.query.rknn.RKNNQuery;
import de.lmu.ifi.dbs.elki.database.query.similarity.SimilarityQuery;
@@ -127,6 +127,7 @@ public final class QueryUtil {
* <li>{@link de.lmu.ifi.dbs.elki.database.query.DatabaseQuery#HINT_BULK} bulk
* query needed</li>
* </ul>
+ *
* @param relation Relation used
* @param distanceFunction Distance function
* @param hints Optimizer hints
@@ -179,6 +180,7 @@ public final class QueryUtil {
* <li>{@link de.lmu.ifi.dbs.elki.database.query.DatabaseQuery#HINT_BULK} bulk
* query needed</li>
* </ul>
+ *
* @param relation Relation used
* @param distanceFunction Distance function
* @param hints Optimizer hints
@@ -205,6 +207,7 @@ public final class QueryUtil {
* <li>{@link de.lmu.ifi.dbs.elki.database.query.DatabaseQuery#HINT_BULK} bulk
* query needed</li>
* </ul>
+ *
* @param relation Relation used
* @param distanceFunction Distance function
* @param hints Optimizer hints
@@ -233,7 +236,7 @@ public final class QueryUtil {
if(distanceQuery.getDistanceFunction() instanceof PrimitiveDoubleDistanceFunction) {
final PrimitiveDistanceQuery<O, ?> pdq = (PrimitiveDistanceQuery<O, ?>) distanceQuery;
@SuppressWarnings("unchecked")
- final KNNQuery<O, ?> knnQuery = new DoubleOptimizedKNNQuery<>((PrimitiveDistanceQuery<O, DoubleDistance>) pdq);
+ final KNNQuery<O, ?> knnQuery = new DoubleOptimizedDistanceKNNQuery<>((PrimitiveDistanceQuery<O, DoubleDistance>) pdq);
@SuppressWarnings("unchecked")
final KNNQuery<O, D> castQuery = (KNNQuery<O, D>) knnQuery;
return castQuery;
@@ -243,7 +246,7 @@ public final class QueryUtil {
return new LinearScanPrimitiveDistanceKNNQuery<>(pdq);
}
}
- return new LinearScanKNNQuery<>(distanceQuery);
+ return new LinearScanDistanceKNNQuery<>(distanceQuery);
}
/**
@@ -260,7 +263,7 @@ public final class QueryUtil {
if(distanceQuery.getDistanceFunction() instanceof PrimitiveDoubleDistanceFunction) {
final PrimitiveDistanceQuery<O, ?> pdq = (PrimitiveDistanceQuery<O, ?>) distanceQuery;
@SuppressWarnings("unchecked")
- final RangeQuery<O, ?> knnQuery = new DoubleOptimizedRangeQuery<>((PrimitiveDistanceQuery<O, DoubleDistance>) pdq);
+ final RangeQuery<O, ?> knnQuery = new DoubleOptimizedDistanceRangeQuery<>((PrimitiveDistanceQuery<O, DoubleDistance>) pdq);
@SuppressWarnings("unchecked")
final RangeQuery<O, D> castQuery = (RangeQuery<O, D>) knnQuery;
return castQuery;
@@ -270,6 +273,6 @@ public final class QueryUtil {
return new LinearScanPrimitiveDistanceRangeQuery<>(pdq);
}
}
- return new LinearScanRangeQuery<>(distanceQuery);
+ return new LinearScanDistanceRangeQuery<>(distanceQuery);
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/database/StaticArrayDatabase.java b/src/de/lmu/ifi/dbs/elki/database/StaticArrayDatabase.java
index 7916c75f..2fc4a5bb 100644
--- a/src/de/lmu/ifi/dbs/elki/database/StaticArrayDatabase.java
+++ b/src/de/lmu/ifi/dbs/elki/database/StaticArrayDatabase.java
@@ -45,7 +45,6 @@ import de.lmu.ifi.dbs.elki.index.IndexFactory;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.statistics.Duration;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.Parameterizable;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectListParameter;
@@ -98,7 +97,7 @@ public class StaticArrayDatabase extends AbstractDatabase implements Parameteriz
this.idrep = null;
// Add indexes.
- if (indexFactories != null) {
+ if(indexFactories != null) {
this.indexFactories.addAll(indexFactories);
}
}
@@ -116,8 +115,8 @@ public class StaticArrayDatabase extends AbstractDatabase implements Parameteriz
*/
@Override
public void initialize() {
- if (databaseConnection != null) {
- if (LOG.isDebugging()) {
+ if(databaseConnection != null) {
+ if(LOG.isDebugging()) {
LOG.debugFine("Loading data from database connection.");
}
MultipleObjectsBundle objpackages = databaseConnection.loadData();
@@ -127,11 +126,16 @@ public class StaticArrayDatabase extends AbstractDatabase implements Parameteriz
// Find DBID column
int idrepnr = findDBIDColumn(objpackages);
// Build DBID array
- if (idrepnr == -1) {
- this.ids = DBIDUtil.generateStaticDBIDRange(objpackages.dataLength());
- } else {
- final ArrayModifiableDBIDs newids = DBIDUtil.newArray(objpackages.dataLength());
- for (int j = 0; j < objpackages.dataLength(); j++) {
+ final int numObjects = objpackages.dataLength();
+ if(LOG.isDebugging()) {
+ LOG.debugFine("Importing " + numObjects + " instances.");
+ }
+ if(idrepnr == -1) {
+ this.ids = DBIDUtil.generateStaticDBIDRange(numObjects);
+ }
+ else {
+ final ArrayModifiableDBIDs newids = DBIDUtil.newArray(numObjects);
+ for(int j = 0; j < numObjects; j++) {
DBID newid = (DBID) objpackages.data(j, idrepnr);
newids.add(newid);
}
@@ -147,11 +151,11 @@ public class StaticArrayDatabase extends AbstractDatabase implements Parameteriz
Relation<?>[] targets = alignColumns(objpackages);
DBIDIter newid = ids.iter();
- for (int j = 0; j < objpackages.dataLength(); j++, newid.advance()) {
+ for(int j = 0; j < numObjects; j++, newid.advance()) {
// insert object
- for (int i = 0; i < targets.length; i++) {
+ for(int i = 0; i < targets.length; i++) {
// DBIDs were handled above.
- if (i == idrepnr) {
+ if(i == idrepnr) {
continue;
}
@SuppressWarnings("unchecked")
@@ -160,22 +164,22 @@ public class StaticArrayDatabase extends AbstractDatabase implements Parameteriz
}
}
- for (Relation<?> relation : relations) {
+ for(Relation<?> relation : relations) {
SimpleTypeInformation<?> meta = relation.getDataTypeInformation();
// Try to add indexes where appropriate
- for (IndexFactory<?, ?> factory : indexFactories) {
- if (factory.getInputTypeRestriction().isAssignableFromType(meta)) {
+ for(IndexFactory<?, ?> factory : indexFactories) {
+ if(factory.getInputTypeRestriction().isAssignableFromType(meta)) {
@SuppressWarnings("unchecked")
final IndexFactory<Object, ?> ofact = (IndexFactory<Object, ?>) factory;
@SuppressWarnings("unchecked")
final Relation<Object> orep = (Relation<Object>) relation;
final Index index = ofact.instantiate(orep);
Duration duration = LOG.isStatistics() ? LOG.newDuration(index.getClass().getName() + ".construction") : null;
- if (duration != null) {
+ if(duration != null) {
duration.begin();
}
index.initialize();
- if (duration != null) {
+ if(duration != null) {
duration.end();
LOG.statistics(duration);
}
@@ -191,7 +195,7 @@ public class StaticArrayDatabase extends AbstractDatabase implements Parameteriz
@Override
public void addIndex(Index index) {
- if (LOG.isDebuggingFiner()) {
+ if(LOG.isDebuggingFiner()) {
LOG.debugFine("Adding index: " + index);
}
this.indexes.add(index);
@@ -206,9 +210,9 @@ public class StaticArrayDatabase extends AbstractDatabase implements Parameteriz
* @return DBID column
*/
protected int findDBIDColumn(ObjectBundle pack) {
- for (int i = 0; i < pack.metaLength(); i++) {
+ for(int i = 0; i < pack.metaLength(); i++) {
SimpleTypeInformation<?> meta = pack.meta(i);
- if (TypeUtil.DBID.isAssignableFromType(meta)) {
+ if(TypeUtil.DBID.isAssignableFromType(meta)) {
return i;
}
}
@@ -226,19 +230,19 @@ public class StaticArrayDatabase extends AbstractDatabase implements Parameteriz
// align representations.
Relation<?>[] targets = new Relation<?>[pack.metaLength()];
BitSet used = new BitSet(relations.size());
- for (int i = 0; i < targets.length; i++) {
+ for(int i = 0; i < targets.length; i++) {
SimpleTypeInformation<?> meta = pack.meta(i);
// TODO: aggressively try to match exact metas first?
// Try to match unused representations only
- for (int j = used.nextClearBit(0); j >= 0 && j < relations.size(); j = used.nextClearBit(j + 1)) {
+ for(int j = used.nextClearBit(0); j >= 0 && j < relations.size(); j = used.nextClearBit(j + 1)) {
Relation<?> relation = relations.get(j);
- if (relation.getDataTypeInformation().isAssignableFromType(meta)) {
+ if(relation.getDataTypeInformation().isAssignableFromType(meta)) {
targets[i] = relation;
used.set(j);
break;
}
}
- if (targets[i] == null) {
+ if(targets[i] == null) {
targets[i] = addNewRelation(meta);
used.set(relations.size() - 1);
}
@@ -273,7 +277,7 @@ public class StaticArrayDatabase extends AbstractDatabase implements Parameteriz
*
* @apiviz.exclude
*/
- public static class Parameterizer extends AbstractParameterizer {
+ public static class Parameterizer extends AbstractDatabase.Parameterizer {
/**
* Holds the database connection to get the initial data from.
*/
@@ -288,13 +292,13 @@ public class StaticArrayDatabase extends AbstractDatabase implements Parameteriz
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
// Get database connection.
- final ObjectParameter<DatabaseConnection> dbcP = new ObjectParameter<>(Database.DATABASE_CONNECTION_ID, DatabaseConnection.class, FileBasedDatabaseConnection.class);
- if (config.grab(dbcP)) {
+ final ObjectParameter<DatabaseConnection> dbcP = new ObjectParameter<>(DATABASE_CONNECTION_ID, DatabaseConnection.class, FileBasedDatabaseConnection.class);
+ if(config.grab(dbcP)) {
databaseConnection = dbcP.instantiateClass(config);
}
// Get indexes.
final ObjectListParameter<IndexFactory<?, ?>> indexFactoryP = new ObjectListParameter<>(INDEX_ID, IndexFactory.class, true);
- if (config.grab(indexFactoryP)) {
+ if(config.grab(indexFactoryP)) {
indexFactories = indexFactoryP.instantiateClasses(config);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/database/datastore/memory/ArrayDBIDStore.java b/src/de/lmu/ifi/dbs/elki/database/datastore/memory/ArrayDBIDStore.java
index 2d5033ab..78cb51d2 100644
--- a/src/de/lmu/ifi/dbs/elki/database/datastore/memory/ArrayDBIDStore.java
+++ b/src/de/lmu/ifi/dbs/elki/database/datastore/memory/ArrayDBIDStore.java
@@ -69,11 +69,7 @@ public class ArrayDBIDStore implements WritableDBIDDataStore {
@Override
@Deprecated
public DBID get(DBIDRef id) {
- try {
- return data.get(idmap.mapDBIDToOffset(id));
- } catch (ArrayIndexOutOfBoundsException e) {
- return null;
- }
+ return data.get(idmap.mapDBIDToOffset(id));
}
@Override
diff --git a/src/de/lmu/ifi/dbs/elki/database/datastore/memory/ArrayDoubleDistanceStore.java b/src/de/lmu/ifi/dbs/elki/database/datastore/memory/ArrayDoubleDistanceStore.java
index 2bf8161d..abb81d95 100644
--- a/src/de/lmu/ifi/dbs/elki/database/datastore/memory/ArrayDoubleDistanceStore.java
+++ b/src/de/lmu/ifi/dbs/elki/database/datastore/memory/ArrayDoubleDistanceStore.java
@@ -68,7 +68,7 @@ public class ArrayDoubleDistanceStore implements WritableDoubleDistanceDataStore
public ArrayDoubleDistanceStore(int size, DataStoreIDMap idmap, double def) {
super();
this.data = new double[size];
- if(def != 0) {
+ if (def != 0) {
Arrays.fill(this.data, def);
}
this.idmap = idmap;
@@ -77,12 +77,7 @@ public class ArrayDoubleDistanceStore implements WritableDoubleDistanceDataStore
@Override
@Deprecated
public DoubleDistance get(DBIDRef id) {
- try {
- return new DoubleDistance(data[idmap.mapDBIDToOffset(id)]);
- }
- catch(ArrayIndexOutOfBoundsException e) {
- return null;
- }
+ return new DoubleDistance(data[idmap.mapDBIDToOffset(id)]);
}
@Override
@@ -135,4 +130,4 @@ public class ArrayDoubleDistanceStore implements WritableDoubleDistanceDataStore
public String getShortName() {
return "raw";
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/database/datastore/memory/ArrayDoubleStore.java b/src/de/lmu/ifi/dbs/elki/database/datastore/memory/ArrayDoubleStore.java
index 501d5369..c41991eb 100644
--- a/src/de/lmu/ifi/dbs/elki/database/datastore/memory/ArrayDoubleStore.java
+++ b/src/de/lmu/ifi/dbs/elki/database/datastore/memory/ArrayDoubleStore.java
@@ -67,7 +67,7 @@ public class ArrayDoubleStore implements WritableDoubleDataStore {
public ArrayDoubleStore(int size, DataStoreIDMap idmap, double def) {
super();
this.data = new double[size];
- if(def != 0) {
+ if (def != 0) {
Arrays.fill(this.data, def);
}
this.idmap = idmap;
@@ -76,12 +76,7 @@ public class ArrayDoubleStore implements WritableDoubleDataStore {
@Override
@Deprecated
public Double get(DBIDRef id) {
- try {
- return Double.valueOf(data[idmap.mapDBIDToOffset(id)]);
- }
- catch(ArrayIndexOutOfBoundsException e) {
- return null;
- }
+ return Double.valueOf(data[idmap.mapDBIDToOffset(id)]);
}
@Override
@@ -134,4 +129,4 @@ public class ArrayDoubleStore implements WritableDoubleDataStore {
public String getShortName() {
return "raw";
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/database/datastore/memory/ArrayIntegerStore.java b/src/de/lmu/ifi/dbs/elki/database/datastore/memory/ArrayIntegerStore.java
index d5b5cfb9..b8a76646 100644
--- a/src/de/lmu/ifi/dbs/elki/database/datastore/memory/ArrayIntegerStore.java
+++ b/src/de/lmu/ifi/dbs/elki/database/datastore/memory/ArrayIntegerStore.java
@@ -56,7 +56,7 @@ public class ArrayIntegerStore implements WritableIntegerDataStore {
public ArrayIntegerStore(int size, DataStoreIDMap idmap) {
this(size, idmap, 0);
}
-
+
/**
* Constructor.
*
@@ -76,12 +76,7 @@ public class ArrayIntegerStore implements WritableIntegerDataStore {
@Override
@Deprecated
public Integer get(DBIDRef id) {
- try {
- return Integer.valueOf(data[idmap.mapDBIDToOffset(id)]);
- }
- catch(ArrayIndexOutOfBoundsException e) {
- return null;
- }
+ return Integer.valueOf(data[idmap.mapDBIDToOffset(id)]);
}
@Override
@@ -92,7 +87,7 @@ public class ArrayIntegerStore implements WritableIntegerDataStore {
data[off] = value.intValue();
return Integer.valueOf(ret);
}
-
+
@Override
public int intValue(DBIDRef id) {
return data[idmap.mapDBIDToOffset(id)];
@@ -134,4 +129,4 @@ public class ArrayIntegerStore implements WritableIntegerDataStore {
public String getShortName() {
return "raw";
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/database/datastore/memory/ArrayRecordStore.java b/src/de/lmu/ifi/dbs/elki/database/datastore/memory/ArrayRecordStore.java
index 2767b180..e9c78cb7 100644
--- a/src/de/lmu/ifi/dbs/elki/database/datastore/memory/ArrayRecordStore.java
+++ b/src/de/lmu/ifi/dbs/elki/database/datastore/memory/ArrayRecordStore.java
@@ -74,18 +74,7 @@ public class ArrayRecordStore implements WritableRecordStore {
*/
@SuppressWarnings("unchecked")
protected <T> T get(DBIDRef id, int index) {
- try {
- return (T) data[idmap.mapDBIDToOffset(id)][index];
- }
- catch(ArrayIndexOutOfBoundsException e) {
- return null;
- }
- catch(NullPointerException e) {
- return null;
- }
- catch(ClassCastException e) {
- return null;
- }
+ return (T) data[idmap.mapDBIDToOffset(id)][index];
}
/**
@@ -162,4 +151,4 @@ public class ArrayRecordStore implements WritableRecordStore {
public boolean remove(DBIDRef id) {
throw new UnsupportedOperationException("ArrayStore records cannot be removed.");
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/database/datastore/memory/ArrayStore.java b/src/de/lmu/ifi/dbs/elki/database/datastore/memory/ArrayStore.java
index 367b634f..695ddc48 100644
--- a/src/de/lmu/ifi/dbs/elki/database/datastore/memory/ArrayStore.java
+++ b/src/de/lmu/ifi/dbs/elki/database/datastore/memory/ArrayStore.java
@@ -62,18 +62,7 @@ public class ArrayStore<T> implements WritableDataStore<T> {
@SuppressWarnings("unchecked")
@Override
public T get(DBIDRef id) {
- try {
- return (T) data[idmap.mapDBIDToOffset(id)];
- }
- catch(ArrayIndexOutOfBoundsException e) {
- return null;
- }
- catch(NullPointerException e) {
- return null;
- }
- catch(ClassCastException e) {
- return null;
- }
+ return (T) data[idmap.mapDBIDToOffset(id)];
}
@Override
@@ -103,4 +92,4 @@ public class ArrayStore<T> implements WritableDataStore<T> {
public String getShortName() {
return "raw";
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/database/datastore/memory/MapIntegerDBIDRecordStore.java b/src/de/lmu/ifi/dbs/elki/database/datastore/memory/MapIntegerDBIDRecordStore.java
index c9004a0f..9efe2221 100644
--- a/src/de/lmu/ifi/dbs/elki/database/datastore/memory/MapIntegerDBIDRecordStore.java
+++ b/src/de/lmu/ifi/dbs/elki/database/datastore/memory/MapIntegerDBIDRecordStore.java
@@ -97,18 +97,10 @@ public class MapIntegerDBIDRecordStore implements WritableRecordStore {
@SuppressWarnings("unchecked")
protected <T> T get(DBIDRef id, int index) {
Object[] d = data.get(DBIDUtil.asInteger(id));
- if(d == null) {
- return null;
- }
- try {
- return (T) d[index];
- }
- catch(ClassCastException e) {
- return null;
- }
- catch(ArrayIndexOutOfBoundsException e) {
+ if (d == null) {
return null;
}
+ return (T) d[index];
}
/**
@@ -123,7 +115,7 @@ public class MapIntegerDBIDRecordStore implements WritableRecordStore {
@SuppressWarnings("unchecked")
protected <T> T set(DBIDRef id, int index, T value) {
Object[] d = data.get(DBIDUtil.asInteger(id));
- if(d == null) {
+ if (d == null) {
d = new Object[rlen];
data.put(DBIDUtil.asInteger(id), d);
}
@@ -191,4 +183,4 @@ public class MapIntegerDBIDRecordStore implements WritableRecordStore {
public boolean remove(DBIDRef id) {
return data.remove(DBIDUtil.asInteger(id)) != null;
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/database/datastore/memory/MapRecordStore.java b/src/de/lmu/ifi/dbs/elki/database/datastore/memory/MapRecordStore.java
index 8c8c4bc9..045d5bf4 100644
--- a/src/de/lmu/ifi/dbs/elki/database/datastore/memory/MapRecordStore.java
+++ b/src/de/lmu/ifi/dbs/elki/database/datastore/memory/MapRecordStore.java
@@ -90,18 +90,10 @@ public class MapRecordStore implements WritableRecordStore {
@SuppressWarnings("unchecked")
protected <T> T get(DBIDRef id, int index) {
Object[] d = data.get(DBIDUtil.deref(id));
- if(d == null) {
- return null;
- }
- try {
- return (T) d[index];
- }
- catch(ClassCastException e) {
- return null;
- }
- catch(ArrayIndexOutOfBoundsException e) {
+ if (d == null) {
return null;
}
+ return (T) d[index];
}
/**
@@ -116,7 +108,7 @@ public class MapRecordStore implements WritableRecordStore {
@SuppressWarnings("unchecked")
protected <T> T set(DBIDRef id, int index, T value) {
Object[] d = data.get(DBIDUtil.deref(id));
- if(d == null) {
+ if (d == null) {
d = new Object[rlen];
data.put(DBIDUtil.deref(id), d);
}
@@ -185,4 +177,4 @@ public class MapRecordStore implements WritableRecordStore {
return data.remove(id) != null;
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/database/ids/ArrayDBIDs.java b/src/de/lmu/ifi/dbs/elki/database/ids/ArrayDBIDs.java
index 865e0bb7..5839a2e0 100644
--- a/src/de/lmu/ifi/dbs/elki/database/ids/ArrayDBIDs.java
+++ b/src/de/lmu/ifi/dbs/elki/database/ids/ArrayDBIDs.java
@@ -76,4 +76,13 @@ public interface ArrayDBIDs extends DBIDs {
* @return Offset of key
*/
public int binarySearch(DBIDRef key);
+
+ /**
+ * Slice a subarray (as view, not copy!)
+ *
+ * @param begin Begin (inclusive)
+ * @param end End (exclusive)
+ * @return Array slice.
+ */
+ public ArrayDBIDs slice(int begin, int end);
}
diff --git a/src/de/lmu/ifi/dbs/elki/database/ids/DBIDFactory.java b/src/de/lmu/ifi/dbs/elki/database/ids/DBIDFactory.java
index 4c6472a7..1b269eea 100644
--- a/src/de/lmu/ifi/dbs/elki/database/ids/DBIDFactory.java
+++ b/src/de/lmu/ifi/dbs/elki/database/ids/DBIDFactory.java
@@ -25,6 +25,7 @@ package de.lmu.ifi.dbs.elki.database.ids;
import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDPair;
import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDPair;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNHeap;
import de.lmu.ifi.dbs.elki.database.ids.distance.KNNHeap;
import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
import de.lmu.ifi.dbs.elki.database.ids.integer.TrivialDBIDFactory;
@@ -217,6 +218,14 @@ public interface DBIDFactory {
<D extends Distance<D>> KNNHeap<D> newHeap(KNNList<D> exist);
/**
+ * Create an appropriate heap for double distances.
+ *
+ * @param k K value
+ * @return New heap of size k, appropriate for this distance type.
+ */
+ DoubleDistanceKNNHeap newDoubleDistanceHeap(int k);
+
+ /**
* Get a serializer for DBIDs.
*
* @return DBID serializer
diff --git a/src/de/lmu/ifi/dbs/elki/database/ids/DBIDUtil.java b/src/de/lmu/ifi/dbs/elki/database/ids/DBIDUtil.java
index 78dba268..c60e63ae 100644
--- a/src/de/lmu/ifi/dbs/elki/database/ids/DBIDUtil.java
+++ b/src/de/lmu/ifi/dbs/elki/database/ids/DBIDUtil.java
@@ -27,6 +27,7 @@ import java.util.Random;
import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDPair;
import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDPair;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNHeap;
import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList;
import de.lmu.ifi.dbs.elki.database.ids.distance.KNNHeap;
import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
@@ -41,6 +42,7 @@ import de.lmu.ifi.dbs.elki.database.ids.integer.UnmodifiableIntegerDBIDs;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.persistent.ByteBufferSerializer;
import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
+import de.lmu.ifi.dbs.elki.utilities.UnsafeRandom;
/**
* DBID Utility functions.
@@ -549,6 +551,16 @@ public final class DBIDUtil {
}
/**
+ * Create an appropriate heap for double distances.
+ *
+ * @param k K value
+ * @return New heap of size k, appropriate for this distance type.
+ */
+ public static DoubleDistanceKNNHeap newDoubleDistanceHeap(int k) {
+ return DBIDFactory.FACTORY.newDoubleDistanceHeap(k);
+ }
+
+ /**
* Build a new heap from a given list.
*
* @param exist Existing result
@@ -560,25 +572,13 @@ public final class DBIDUtil {
}
/**
- * Produce a random sample of the given DBIDs.
- *
- * @param source Original DBIDs
- * @param k k Parameter
- * @param rnd Random generator
- * @return new DBIDs
- */
- public static ModifiableDBIDs randomSample(DBIDs source, int k, RandomFactory rnd) {
- return randomSample(source, k, rnd.getRandom());
- }
-
- /**
* Produce a random shuffling of the given DBID array.
*
* @param ids Original DBIDs
* @param rnd Random generator
*/
public static void randomShuffle(ArrayModifiableDBIDs ids, RandomFactory rnd) {
- randomShuffle(ids, rnd.getRandom(), ids.size());
+ randomShuffle(ids, rnd.getSingleThreadedRandom(), ids.size());
}
/**
@@ -639,16 +639,29 @@ public final class DBIDUtil {
*
* @param source Original DBIDs
* @param k k Parameter
+ * @param rnd Random generator
+ * @return new DBIDs
+ */
+ public static ModifiableDBIDs randomSample(DBIDs source, int k, RandomFactory rnd) {
+ return randomSample(source, k, rnd.getSingleThreadedRandom());
+ }
+
+ /**
+ * Produce a random sample of the given DBIDs.
+ *
+ * @param source Original DBIDs
+ * @param k k Parameter
* @param random Random generator
* @return new DBIDs
*/
public static ModifiableDBIDs randomSample(DBIDs source, int k, Random random) {
- if (k <= 0 || k > source.size()) {
+ if (k < 0 || k > source.size()) {
throw new IllegalArgumentException("Illegal value for size of random sample: " + k + " > " + source.size() + " or < 0");
}
if (random == null) {
- random = new Random();
+ random = new UnsafeRandom(); // Fast, and we're single-threaded here anyway.
}
+
// TODO: better balancing for different sizes
// Two methods: constructive vs. destructive
if (k < source.size() >> 1) {
@@ -672,6 +685,46 @@ public final class DBIDUtil {
}
/**
+ * Randomly split IDs into {@code p} partitions of almost-equal size.
+ *
+ * @param ids Original DBIDs
+ * @param p Desired number of partitions.
+ * @param rnd Random generator
+ */
+ public static ArrayDBIDs[] randomSplit(DBIDs ids, int p, RandomFactory rnd) {
+ return randomSplit(ids, p, rnd.getSingleThreadedRandom());
+ }
+
+ /**
+ * Randomly split IDs into {@code p} partitions of almost-equal size.
+ *
+ * @param oids Original DBIDs
+ * @param p Desired number of partitions.
+ * @param random Random generator
+ */
+ public static ArrayDBIDs[] randomSplit(DBIDs oids, int p, Random random) {
+ if (random == null) {
+ random = new UnsafeRandom(); // Fast, and we're single-threaded here anyway.
+ }
+ ArrayModifiableDBIDs ids = newArray(oids);
+ final int size = ids.size();
+ ArrayDBIDs[] split = new ArrayDBIDs[p];
+ // Shuffle
+ for (int i = 1; i < size; i++) {
+ ids.swap(i - 1, i + random.nextInt(size - i));
+ }
+ final int minsize = size / p; // Floor.
+ final int extra = size % p; // Remainder
+ for (int beg = 0, part = 0; part < p; part++) {
+ // First partitions are smaller, last partitions are larger.
+ final int psize = minsize + ((part < extra) ? 1 : 0);
+ split[part] = ids.slice(beg, beg + psize);
+ beg += psize;
+ }
+ return split;
+ }
+
+ /**
* Get a subset of the KNN result.
*
* @param list Existing list
diff --git a/src/de/lmu/ifi/dbs/elki/database/ids/EmptyDBIDs.java b/src/de/lmu/ifi/dbs/elki/database/ids/EmptyDBIDs.java
index 8a7b2e28..0081fd44 100644
--- a/src/de/lmu/ifi/dbs/elki/database/ids/EmptyDBIDs.java
+++ b/src/de/lmu/ifi/dbs/elki/database/ids/EmptyDBIDs.java
@@ -82,6 +82,11 @@ public class EmptyDBIDs implements ArrayStaticDBIDs, SetDBIDs {
return -1; // Not found
}
+ @Override
+ public ArrayDBIDs slice(int begin, int end) {
+ return this;
+ }
+
/**
* Iterator for empty DBIDs-
*
diff --git a/src/de/lmu/ifi/dbs/elki/database/ids/distance/DoubleDistanceDBIDPairList.java b/src/de/lmu/ifi/dbs/elki/database/ids/distance/DoubleDistanceDBIDPairList.java
index f9bfc20a..58ce433d 100644
--- a/src/de/lmu/ifi/dbs/elki/database/ids/distance/DoubleDistanceDBIDPairList.java
+++ b/src/de/lmu/ifi/dbs/elki/database/ids/distance/DoubleDistanceDBIDPairList.java
@@ -101,6 +101,11 @@ public class DoubleDistanceDBIDPairList implements ModifiableDoubleDistanceDBIDL
}
@Override
+ public void clear() {
+ storage.clear();
+ }
+
+ @Override
public void sort() {
Collections.sort(storage, DistanceDBIDResultUtil.distanceComparator());
}
diff --git a/src/de/lmu/ifi/dbs/elki/database/ids/distance/DoubleDistanceKNNHeap.java b/src/de/lmu/ifi/dbs/elki/database/ids/distance/DoubleDistanceKNNHeap.java
index 1e75f120..ed687877 100644
--- a/src/de/lmu/ifi/dbs/elki/database/ids/distance/DoubleDistanceKNNHeap.java
+++ b/src/de/lmu/ifi/dbs/elki/database/ids/distance/DoubleDistanceKNNHeap.java
@@ -39,8 +39,9 @@ public interface DoubleDistanceKNNHeap extends KNNHeap<DoubleDistance> {
*
* @param distance Distance value
* @param id ID number
+ * @return updated k-distance
*/
- void add(double distance, DBIDRef id);
+ double insert(double distance, DBIDRef id);
/**
* Add a distance-id pair to the heap unless the distance is too large.
@@ -51,7 +52,7 @@ public interface DoubleDistanceKNNHeap extends KNNHeap<DoubleDistance> {
* @param id ID number
*/
@Deprecated
- void add(Double distance, DBIDRef id);
+ void insert(Double distance, DBIDRef id);
/**
* Add a distance-id pair to the heap unless the distance is too large.
@@ -60,7 +61,7 @@ public interface DoubleDistanceKNNHeap extends KNNHeap<DoubleDistance> {
*
* @param e Existing distance pair
*/
- void add(DoubleDistanceDBIDPair e);
+ void insert(DoubleDistanceDBIDPair e);
/**
* {@inheritDoc}
@@ -70,7 +71,7 @@ public interface DoubleDistanceKNNHeap extends KNNHeap<DoubleDistance> {
*/
@Override
@Deprecated
- void add(DoubleDistance dist, DBIDRef id);
+ void insert(DoubleDistance dist, DBIDRef id);
/**
* Get the distance to the k nearest neighbor, or maxdist otherwise.
diff --git a/src/de/lmu/ifi/dbs/elki/database/ids/distance/KNNHeap.java b/src/de/lmu/ifi/dbs/elki/database/ids/distance/KNNHeap.java
index c02071e7..5e94de47 100644
--- a/src/de/lmu/ifi/dbs/elki/database/ids/distance/KNNHeap.java
+++ b/src/de/lmu/ifi/dbs/elki/database/ids/distance/KNNHeap.java
@@ -70,7 +70,7 @@ public interface KNNHeap<D extends Distance<D>> {
* @param distance Distance value
* @param id ID number
*/
- void add(D distance, DBIDRef id);
+ void insert(D distance, DBIDRef id);
/**
* Current size of heap.
diff --git a/src/de/lmu/ifi/dbs/elki/database/ids/distance/ModifiableDoubleDistanceDBIDList.java b/src/de/lmu/ifi/dbs/elki/database/ids/distance/ModifiableDoubleDistanceDBIDList.java
index 12cdaf69..4b29e3b6 100644
--- a/src/de/lmu/ifi/dbs/elki/database/ids/distance/ModifiableDoubleDistanceDBIDList.java
+++ b/src/de/lmu/ifi/dbs/elki/database/ids/distance/ModifiableDoubleDistanceDBIDList.java
@@ -60,4 +60,9 @@ public interface ModifiableDoubleDistanceDBIDList extends DoubleDistanceDBIDList
* @param pair Pair to add
*/
void add(DoubleDistanceDBIDPair pair);
+
+ /**
+ * Clear the list contents.
+ */
+ void clear();
}
diff --git a/src/de/lmu/ifi/dbs/elki/database/ids/generic/AbstractKNNHeap.java b/src/de/lmu/ifi/dbs/elki/database/ids/generic/AbstractKNNHeap.java
index 2c2e60b0..c42c728d 100644
--- a/src/de/lmu/ifi/dbs/elki/database/ids/generic/AbstractKNNHeap.java
+++ b/src/de/lmu/ifi/dbs/elki/database/ids/generic/AbstractKNNHeap.java
@@ -59,7 +59,7 @@ abstract class AbstractKNNHeap<P extends DistanceDBIDPair<D>, D extends Distance
*
* @param pair Pair to add.
*/
- public abstract void add(P pair);
+ public abstract void insert(P pair);
@Override
public final int getK() {
diff --git a/src/de/lmu/ifi/dbs/elki/database/ids/generic/DistanceDBIDPairKNNHeap.java b/src/de/lmu/ifi/dbs/elki/database/ids/generic/DistanceDBIDPairKNNHeap.java
index 88459077..e102d716 100644
--- a/src/de/lmu/ifi/dbs/elki/database/ids/generic/DistanceDBIDPairKNNHeap.java
+++ b/src/de/lmu/ifi/dbs/elki/database/ids/generic/DistanceDBIDPairKNNHeap.java
@@ -68,7 +68,7 @@ public class DistanceDBIDPairKNNHeap<D extends Distance<D>> extends AbstractKNNH
}
@Override
- public void add(D distance, DBIDRef id) {
+ public void insert(D distance, DBIDRef id) {
if (size() < getK()) {
heap.add(DBIDFactory.FACTORY.newDistancePair(distance, id));
heapModified();
@@ -83,7 +83,7 @@ public class DistanceDBIDPairKNNHeap<D extends Distance<D>> extends AbstractKNNH
}
@Override
- public void add(DistanceDBIDPair<D> pair) {
+ public void insert(DistanceDBIDPair<D> pair) {
if (size() < getK() || knndistance.compareTo(pair.getDistance()) >= 0) {
heap.add(pair);
heapModified();
diff --git a/src/de/lmu/ifi/dbs/elki/database/ids/generic/DoubleDistanceDBIDPairKNNHeap.java b/src/de/lmu/ifi/dbs/elki/database/ids/generic/DoubleDistanceDBIDPairKNNHeap.java
index 829bb00c..8e489a79 100644
--- a/src/de/lmu/ifi/dbs/elki/database/ids/generic/DoubleDistanceDBIDPairKNNHeap.java
+++ b/src/de/lmu/ifi/dbs/elki/database/ids/generic/DoubleDistanceDBIDPairKNNHeap.java
@@ -36,9 +36,9 @@ import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
*
* See also: {@link de.lmu.ifi.dbs.elki.database.ids.DBIDUtil#newHeap}!
*
- * Experiments have shown that it <em>can</em> be much more performant to track the
- * knndistance <em>outside</em> of the heap, and do comparisons on the stack:
- * <blockquote>
+ * Experiments have shown that it <em>can</em> be much more performant to track
+ * the knndistance <em>outside</em> of the heap, and do comparisons on the
+ * stack: <blockquote>
*
* <pre>
* {@code
@@ -67,8 +67,13 @@ public class DoubleDistanceDBIDPairKNNHeap extends AbstractKNNHeap<DoubleDistanc
/**
* Comparator class.
*/
- public static final Comparator<DoubleDistanceDBIDPair> COMPARATOR = new Comp();
-
+ public static final Comparator<DoubleDistanceDBIDPair> COMPARATOR = new Comp();
+
+ /**
+ * Reverse comparator.
+ */
+ public static final Comparator<DoubleDistanceDBIDPair> REVERSE_COMPARATOR = new RComp();
+
/**
* Cached distance to k nearest neighbor (to avoid going through {@link #peek}
* too often).
@@ -87,7 +92,8 @@ public class DoubleDistanceDBIDPairKNNHeap extends AbstractKNNHeap<DoubleDistanc
}
/**
- * Serialize to a {@link DoubleDistanceDBIDPairKNNList}. This empties the heap!
+ * Serialize to a {@link DoubleDistanceDBIDPairKNNList}. This empties the
+ * heap!
*
* @return KNNList with the heaps contents.
*/
@@ -103,13 +109,15 @@ public class DoubleDistanceDBIDPairKNNHeap extends AbstractKNNHeap<DoubleDistanc
*
* @param distance Distance value
* @param id ID number
+ * @return knn distance.
*/
@Override
- public final void add(final double distance, final DBIDRef id) {
+ public final double insert(final double distance, final DBIDRef id) {
if (size() < getK() || knndistance >= distance) {
heap.add(DBIDFactory.FACTORY.newDistancePair(distance, id));
heapModified();
}
+ return knndistance;
}
/**
@@ -122,7 +130,7 @@ public class DoubleDistanceDBIDPairKNNHeap extends AbstractKNNHeap<DoubleDistanc
*/
@Override
@Deprecated
- public final void add(final Double distance, final DBIDRef id) {
+ public final void insert(final Double distance, final DBIDRef id) {
if (size() < getK() || knndistance >= distance) {
heap.add(DBIDFactory.FACTORY.newDistancePair(distance, id));
heapModified();
@@ -138,7 +146,7 @@ public class DoubleDistanceDBIDPairKNNHeap extends AbstractKNNHeap<DoubleDistanc
}
@Override
- public void add(final DoubleDistanceDBIDPair e) {
+ public void insert(final DoubleDistanceDBIDPair e) {
if (size() < getK() || knndistance >= e.doubleDistance()) {
heap.add(e);
heapModified();
@@ -154,8 +162,8 @@ public class DoubleDistanceDBIDPairKNNHeap extends AbstractKNNHeap<DoubleDistanc
*/
@Override
@Deprecated
- public void add(DoubleDistance dist, DBIDRef id) {
- add(dist.doubleValue(), id);
+ public void insert(DoubleDistance dist, DBIDRef id) {
+ insert(dist.doubleValue(), id);
}
/**
@@ -193,4 +201,18 @@ public class DoubleDistanceDBIDPairKNNHeap extends AbstractKNNHeap<DoubleDistanc
return -Double.compare(o1.doubleDistance(), o2.doubleDistance());
}
}
+
+ /**
+ * Comparator to use.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ protected static class RComp implements Comparator<DoubleDistanceDBIDPair> {
+ @Override
+ public int compare(DoubleDistanceDBIDPair o1, DoubleDistanceDBIDPair o2) {
+ return Double.compare(o1.doubleDistance(), o2.doubleDistance());
+ }
+ }
}
diff --git a/src/de/lmu/ifi/dbs/elki/database/ids/generic/DoubleDistanceDBIDPairKNNListHeap.java b/src/de/lmu/ifi/dbs/elki/database/ids/generic/DoubleDistanceDBIDPairKNNListHeap.java
new file mode 100644
index 00000000..ca00129b
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/database/ids/generic/DoubleDistanceDBIDPairKNNListHeap.java
@@ -0,0 +1,289 @@
+package de.lmu.ifi.dbs.elki.database.ids.generic;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.Arrays;
+
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDPair;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNHeap;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
+
+/**
+ * Finalized KNN List.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.composedOf DoubleDistanceDBIDPair
+ * @apiviz.has DoubleDistanceDBIDListIter
+ */
+public class DoubleDistanceDBIDPairKNNListHeap implements DoubleDistanceKNNList, DoubleDistanceKNNHeap {
+ /**
+ * The value of k this was materialized for.
+ */
+ private final int k;
+
+ /**
+ * The actual data array.
+ */
+ private DoubleDistanceDBIDPair[] data;
+
+ /**
+ * Current size
+ */
+ private int size;
+
+ /**
+ * Constructor.
+ *
+ * @param k K parameter
+ */
+ public DoubleDistanceDBIDPairKNNListHeap(int k) {
+ super();
+ this.data = new DoubleDistanceDBIDPair[k + 11];
+ this.k = k;
+ }
+
+ @Override
+ public void clear() {
+ size = 0;
+ Arrays.fill(data, null);
+ }
+
+ @Override
+ public double insert(double distance, DBIDRef id) {
+ if (size < k || distance <= data[k - 1].doubleDistance()) {
+ insert(DBIDUtil.newDistancePair(distance, id));
+ }
+ return (size < k) ? Double.POSITIVE_INFINITY : get(k - 1).doubleDistance();
+ }
+
+ @Override
+ @Deprecated
+ public void insert(Double distance, DBIDRef id) {
+ insert(DBIDUtil.newDistancePair(distance.doubleValue(), id));
+ }
+
+ @Override
+ @Deprecated
+ public void insert(DoubleDistance dist, DBIDRef id) {
+ insert(DBIDUtil.newDistancePair(dist.doubleValue(), id));
+ }
+
+ @Override
+ public void insert(DoubleDistanceDBIDPair e) {
+ if (size >= k) {
+ if (e.doubleDistance() > data[size - 1].doubleDistance()) {
+ return;
+ }
+ // Ensure we have enough space.
+ final int len = data.length;
+ if (size > len) {
+ final int newlength = len + (len >>> 1);
+ assert (newlength > size);
+ data = Arrays.copyOf(data, newlength);
+ }
+ }
+ insertionSort(e);
+ // Truncate if necessary:
+ if (size > k && data[k].doubleDistance() > data[k - 1].doubleDistance()) {
+ size = k;
+ }
+ }
+
+ /**
+ * Perform insertion sort.
+ *
+ * @param obj Object to insert
+ */
+ private void insertionSort(DoubleDistanceDBIDPair obj) {
+ // Insertion sort:
+ int pos = size;
+ while (pos > 0) {
+ DoubleDistanceDBIDPair pobj = data[pos - 1];
+ if (pobj.doubleDistance() <= obj.doubleDistance()) {
+ break;
+ }
+ data[pos] = pobj;
+ --pos;
+ }
+ data[pos] = obj;
+ ++size;
+ }
+
+ @Override
+ public DoubleDistanceDBIDPair poll() {
+ assert (size > 0);
+ return data[size--];
+ }
+
+ @Override
+ public DoubleDistanceDBIDPair peek() {
+ assert (size > 0);
+ return data[size - 1];
+ }
+
+ @Override
+ public DoubleDistanceKNNList toKNNList() {
+ return this;
+ }
+
+ @Override
+ public int getK() {
+ return k;
+ }
+
+ @Override
+ @Deprecated
+ public DoubleDistance getKNNDistance() {
+ if (size < k) {
+ return DoubleDistance.INFINITE_DISTANCE;
+ }
+ return get(k - 1).getDistance();
+ }
+
+ @Override
+ public double doubleKNNDistance() {
+ return (size < k) ? Double.POSITIVE_INFINITY : get(k - 1).doubleDistance();
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder buf = new StringBuilder();
+ buf.append("kNNList[");
+ for (DoubleDistanceDBIDListIter iter = this.iter(); iter.valid();) {
+ buf.append(iter.doubleDistance()).append(':').append(DBIDUtil.toString(iter));
+ iter.advance();
+ if (iter.valid()) {
+ buf.append(',');
+ }
+ }
+ buf.append(']');
+ return buf.toString();
+ }
+
+ @Override
+ public DoubleDistanceDBIDPair get(int index) {
+ return data[index];
+ }
+
+ @Override
+ public DoubleDistanceDBIDListIter iter() {
+ return new Itr();
+ }
+
+ @Override
+ public int size() {
+ return data.length;
+ }
+
+ @Override
+ public boolean contains(DBIDRef o) {
+ for (DBIDIter iter = iter(); iter.valid(); iter.advance()) {
+ if (DBIDUtil.equal(iter, o)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ @Override
+ public boolean isEmpty() {
+ return size() == 0;
+ }
+
+ /**
+ * Iterator.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ private class Itr implements DoubleDistanceDBIDListIter {
+ /**
+ * Cursor position.
+ */
+ private int pos = 0;
+
+ @Override
+ public int internalGetIndex() {
+ return get(pos).internalGetIndex();
+ }
+
+ @Override
+ public boolean valid() {
+ return pos < data.length;
+ }
+
+ @Override
+ public void advance() {
+ pos++;
+ }
+
+ /**
+ * {@inheritDoc}
+ *
+ * @deprecated use {@link #doubleDistance}!
+ */
+ @Override
+ @Deprecated
+ public DoubleDistance getDistance() {
+ return get(pos).getDistance();
+ }
+
+ @Override
+ public double doubleDistance() {
+ return get(pos).doubleDistance();
+ }
+
+ @Override
+ public DoubleDistanceDBIDPair getDistancePair() {
+ return get(pos);
+ }
+
+ @Override
+ public int getOffset() {
+ return pos;
+ }
+
+ @Override
+ public void advance(int count) {
+ pos += count;
+ }
+
+ @Override
+ public void retract() {
+ --pos;
+ }
+
+ @Override
+ public void seek(int off) {
+ pos = off;
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/database/ids/generic/GenericDistanceDBIDList.java b/src/de/lmu/ifi/dbs/elki/database/ids/generic/GenericDistanceDBIDList.java
index 9b6c188b..911c58e7 100644
--- a/src/de/lmu/ifi/dbs/elki/database/ids/generic/GenericDistanceDBIDList.java
+++ b/src/de/lmu/ifi/dbs/elki/database/ids/generic/GenericDistanceDBIDList.java
@@ -23,7 +23,8 @@ package de.lmu.ifi.dbs.elki.database.ids.generic;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Comparator;
import de.lmu.ifi.dbs.elki.database.ids.DBIDFactory;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
@@ -46,14 +47,19 @@ public class GenericDistanceDBIDList<D extends Distance<D>> implements Modifiabl
/**
* Actual storage.
*/
- final ArrayList<DistanceDBIDPair<D>> storage;
+ Object[] storage;
+
+ /**
+ * Current size.
+ */
+ int size = 0;
/**
* Constructor.
*/
public GenericDistanceDBIDList() {
super();
- storage = new ArrayList<>();
+ storage = new Object[21];
}
/**
@@ -63,12 +69,14 @@ public class GenericDistanceDBIDList<D extends Distance<D>> implements Modifiabl
*/
public GenericDistanceDBIDList(int initialCapacity) {
super();
- storage = new ArrayList<>(initialCapacity);
+ storage = new Object[initialCapacity];
}
@Override
public void add(D dist, DBIDRef id) {
- storage.add(DBIDFactory.FACTORY.newDistancePair(dist, id));
+ ensureSize(size + 1);
+ storage[size] = DBIDFactory.FACTORY.newDistancePair(dist, id);
+ ++size;
}
/**
@@ -77,22 +85,34 @@ public class GenericDistanceDBIDList<D extends Distance<D>> implements Modifiabl
* @param pair Pair to add
*/
public void add(DistanceDBIDPair<D> pair) {
- storage.add(pair);
+ ensureSize(size + 1);
+ storage[size] = pair;
+ ++size;
+ }
+
+ private void ensureSize(int size) {
+ if (size < storage.length) {
+ storage = Arrays.copyOf(storage, (size << 1) + 1);
+ }
}
@Override
public void sort() {
- DistanceDBIDResultUtil.sortByDistance(storage);
+ @SuppressWarnings("unchecked")
+ final Comparator<Object> comp = (Comparator<Object>) DistanceDBIDResultUtil.distanceComparator();
+ Arrays.sort(storage, 0, size, comp);
+ // DistanceDBIDResultUtil.sortByDistance(storage);
}
@Override
public int size() {
- return storage.size();
+ return size;
}
+ @SuppressWarnings("unchecked")
@Override
public DistanceDBIDPair<D> get(int off) {
- return storage.get(off);
+ return (DistanceDBIDPair<D>) storage[off];
}
@Override
@@ -102,8 +122,8 @@ public class GenericDistanceDBIDList<D extends Distance<D>> implements Modifiabl
@Override
public boolean contains(DBIDRef o) {
- for(DBIDIter iter = iter(); iter.valid(); iter.advance()) {
- if(DBIDUtil.equal(iter, o)) {
+ for (DBIDIter iter = iter(); iter.valid(); iter.advance()) {
+ if (DBIDUtil.equal(iter, o)) {
return true;
}
}
@@ -157,7 +177,7 @@ public class GenericDistanceDBIDList<D extends Distance<D>> implements Modifiabl
public DistanceDBIDPair<D> getDistancePair() {
return get(pos);
}
-
+
@Override
public String toString() {
return valid() ? getDistancePair().toString() : "null";
@@ -183,4 +203,4 @@ public class GenericDistanceDBIDList<D extends Distance<D>> implements Modifiabl
pos = off;
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/database/ids/generic/UnmodifiableArrayDBIDs.java b/src/de/lmu/ifi/dbs/elki/database/ids/generic/UnmodifiableArrayDBIDs.java
index 318e4e79..49c6b07e 100644
--- a/src/de/lmu/ifi/dbs/elki/database/ids/generic/UnmodifiableArrayDBIDs.java
+++ b/src/de/lmu/ifi/dbs/elki/database/ids/generic/UnmodifiableArrayDBIDs.java
@@ -68,7 +68,7 @@ public class UnmodifiableArrayDBIDs implements ArrayStaticDBIDs {
@Override
public DBIDArrayIter iter() {
DBIDArrayIter it = inner.iter();
- if(it instanceof DBIDMIter) {
+ if (it instanceof DBIDMIter) {
return new UnmodifiableDBIDArrayIter(it);
}
return it;
@@ -99,6 +99,11 @@ public class UnmodifiableArrayDBIDs implements ArrayStaticDBIDs {
return inner.binarySearch(key);
}
+ @Override
+ public ArrayDBIDs slice(int begin, int end) {
+ return new UnmodifiableArrayDBIDs(inner.slice(begin, end));
+ }
+
/**
* Make an existing DBIDMIter unmodifiable.
*
@@ -155,4 +160,4 @@ public class UnmodifiableArrayDBIDs implements ArrayStaticDBIDs {
return it.getOffset();
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/database/ids/integer/AbstractIntegerDBIDFactory.java b/src/de/lmu/ifi/dbs/elki/database/ids/integer/AbstractIntegerDBIDFactory.java
index 061deb08..72ecb9be 100644
--- a/src/de/lmu/ifi/dbs/elki/database/ids/integer/AbstractIntegerDBIDFactory.java
+++ b/src/de/lmu/ifi/dbs/elki/database/ids/integer/AbstractIntegerDBIDFactory.java
@@ -47,12 +47,12 @@ import de.lmu.ifi.dbs.elki.persistent.FixedSizeByteBufferSerializer;
* Abstract base class for DBID factories.
*
* @author Erich Schubert
- *
+ *
* @apiviz.uses IntegerDBID oneway - - «create»
* @apiviz.uses IntegerDBIDPair oneway - - «create»
* @apiviz.uses IntegerDBIDRange oneway - - «create»
- * @apiviz.uses TroveArrayModifiableDBIDs oneway - - «create»
* @apiviz.uses TroveHashSetModifiableDBIDs oneway - - «create»
+ * @apiviz.uses IntegerArrayDBIDs oneway - - «create»
*/
abstract class AbstractIntegerDBIDFactory implements DBIDFactory {
/**
@@ -67,9 +67,10 @@ abstract class AbstractIntegerDBIDFactory implements DBIDFactory {
@Override
public void assignVar(DBIDVar var, int val) {
- if (var instanceof IntegerDBIDVar) {
- ((IntegerDBIDVar)var).internalSetIndex(val);
- } else {
+ if(var instanceof IntegerDBIDVar) {
+ ((IntegerDBIDVar) var).internalSetIndex(val);
+ }
+ else {
var.set(new IntegerDBID(val));
}
}
@@ -139,7 +140,7 @@ abstract class AbstractIntegerDBIDFactory implements DBIDFactory {
@SuppressWarnings("unchecked")
@Override
public <D extends Distance<D>> DistanceDBIDPair<D> newDistancePair(D val, DBIDRef id) {
- if (val instanceof DoubleDistance) {
+ if(val instanceof DoubleDistance) {
return (DistanceDBIDPair<D>) new DoubleDistanceIntegerDBIDPair(((DoubleDistance) val).doubleValue(), id.internalGetIndex());
}
return new DistanceIntegerDBIDPair<>(val, id.internalGetIndex());
@@ -149,12 +150,12 @@ abstract class AbstractIntegerDBIDFactory implements DBIDFactory {
public DoubleDistanceDBIDPair newDistancePair(double val, DBIDRef id) {
return new DoubleDistanceIntegerDBIDPair(val, id.internalGetIndex());
}
-
+
@SuppressWarnings("unchecked")
@Override
public <D extends Distance<D>> KNNHeap<D> newHeap(D factory, int k) {
- if (factory instanceof DoubleDistance) {
- return (KNNHeap<D>) new DoubleDistanceIntegerDBIDKNNListHeap(k);
+ if(factory instanceof DoubleDistance) {
+ return (KNNHeap<D>) newDoubleDistanceHeap(k);
}
return new DistanceDBIDPairKNNHeap<>(k);
}
@@ -162,24 +163,34 @@ abstract class AbstractIntegerDBIDFactory implements DBIDFactory {
@SuppressWarnings("unchecked")
@Override
public <D extends Distance<D>> KNNHeap<D> newHeap(KNNList<D> exist) {
- if (exist instanceof DoubleDistanceKNNList) {
- DoubleDistanceKNNHeap heap = new DoubleDistanceIntegerDBIDKNNListHeap(exist.getK());
+ if(exist instanceof DoubleDistanceKNNList) {
+ DoubleDistanceKNNHeap heap = newDoubleDistanceHeap(exist.getK());
// Insert backwards, as this will produce a proper heap
- for (int i = exist.size() - 1; i >= 0; i--) {
- heap.add((DoubleDistanceDBIDPair) exist.get(i));
+ for(int i = exist.size() - 1; i >= 0; i--) {
+ heap.insert((DoubleDistanceDBIDPair) exist.get(i));
}
return (KNNHeap<D>) heap;
- } else {
+ }
+ else {
DistanceDBIDPairKNNHeap<D> heap = new DistanceDBIDPairKNNHeap<>(exist.getK());
// Insert backwards, as this will produce a proper heap
- for (int i = exist.size() - 1; i >= 0; i--) {
- heap.add(exist.get(i));
+ for(int i = exist.size() - 1; i >= 0; i--) {
+ heap.insert(exist.get(i));
}
return heap;
}
}
@Override
+ public DoubleDistanceKNNHeap newDoubleDistanceHeap(int k) {
+ // TODO: benchmark threshold!
+ if(k > 1000) {
+ return new DoubleDistanceIntegerDBIDKNNHeap(k);
+ }
+ return new DoubleDistanceIntegerDBIDSortedKNNList(k);
+ }
+
+ @Override
public ByteBufferSerializer<DBID> getDBIDSerializer() {
return IntegerDBID.DYNAMIC_SERIALIZER;
}
diff --git a/src/de/lmu/ifi/dbs/elki/database/ids/integer/ArrayModifiableIntegerDBIDs.java b/src/de/lmu/ifi/dbs/elki/database/ids/integer/ArrayModifiableIntegerDBIDs.java
index dfff45b4..2fc94ddb 100644
--- a/src/de/lmu/ifi/dbs/elki/database/ids/integer/ArrayModifiableIntegerDBIDs.java
+++ b/src/de/lmu/ifi/dbs/elki/database/ids/integer/ArrayModifiableIntegerDBIDs.java
@@ -47,7 +47,7 @@ public class ArrayModifiableIntegerDBIDs implements ArrayModifiableDBIDs, Intege
/**
* Occupied size.
*/
- private int size = 0;
+ private int size;
/**
* Initial size.
@@ -57,11 +57,12 @@ public class ArrayModifiableIntegerDBIDs implements ArrayModifiableDBIDs, Intege
/**
* Constructor.
*
- * @param size Initial size
+ * @param isize Initial size
*/
- protected ArrayModifiableIntegerDBIDs(int size) {
+ protected ArrayModifiableIntegerDBIDs(int isize) {
super();
- this.store = new int[size];
+ this.store = new int[isize < 3 ? 3 : isize];
+ // default this.size = 0;
}
/**
@@ -70,6 +71,7 @@ public class ArrayModifiableIntegerDBIDs implements ArrayModifiableDBIDs, Intege
protected ArrayModifiableIntegerDBIDs() {
super();
this.store = new int[INITIAL_SIZE];
+ // default: this.size = 0;
}
/**
@@ -79,7 +81,15 @@ public class ArrayModifiableIntegerDBIDs implements ArrayModifiableDBIDs, Intege
*/
protected ArrayModifiableIntegerDBIDs(DBIDs existing) {
this(existing.size());
- this.addDBIDs(existing);
+ if (existing instanceof IntegerDBIDRange) {
+ IntegerDBIDRange range = (IntegerDBIDRange) existing;
+ for (int i = 0; i < range.len; i++) {
+ store[i] = range.start + i;
+ }
+ size = range.len;
+ } else {
+ this.addDBIDs(existing);
+ }
}
@Override
@@ -99,10 +109,9 @@ public class ArrayModifiableIntegerDBIDs implements ArrayModifiableDBIDs, Intege
@Override
public void assignVar(int index, DBIDVar var) {
- if(var instanceof IntegerDBIDVar) {
+ if (var instanceof IntegerDBIDVar) {
((IntegerDBIDVar) var).internalSetIndex(store[index]);
- }
- else {
+ } else {
// less efficient, involves object creation.
var.set(get(index));
}
@@ -114,23 +123,32 @@ public class ArrayModifiableIntegerDBIDs implements ArrayModifiableDBIDs, Intege
* @param minsize Desired size
*/
private void ensureSize(int minsize) {
- int asize = store.length;
- // Ensure a minimum size, to not run into an infinite loop below!
- if (asize < 2) {
- asize = 2;
- }
- while(asize < minsize) {
- asize = (asize >> 1) + asize;
+ if (minsize <= store.length) {
+ return;
}
- if(asize > store.length) {
- store = Arrays.copyOf(store, asize);
+ int asize = store.length;
+ while (asize < minsize) {
+ asize = (asize >>> 1) + asize;
}
+ final int[] prev = store;
+ store = new int[asize];
+ System.arraycopy(prev, 0, store, 0, size);
+ }
+
+ /**
+ * Grow array by 50%.
+ */
+ private void grow() {
+ final int newsize = store.length + (store.length >>> 1);
+ final int[] prev = store;
+ store = new int[newsize];
+ System.arraycopy(prev, 0, store, 0, size);
}
@Override
public boolean addDBIDs(DBIDs ids) {
ensureSize(size + ids.size());
- for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
store[size] = iter.internalGetIndex();
++size;
}
@@ -140,11 +158,11 @@ public class ArrayModifiableIntegerDBIDs implements ArrayModifiableDBIDs, Intege
@Override
public boolean removeDBIDs(DBIDs ids) {
boolean success = false;
- for(DBIDIter id = ids.iter(); id.valid(); id.advance()) {
+ for (DBIDIter id = ids.iter(); id.valid(); id.advance()) {
int rm = id.internalGetIndex();
// TODO: when sorted, use binary search!
- for(int i = 0; i < size; i++) {
- if(store[i] == rm) {
+ for (int i = 0; i < size; i++) {
+ if (store[i] == rm) {
--size;
store[i] = store[size];
success = true;
@@ -157,8 +175,8 @@ public class ArrayModifiableIntegerDBIDs implements ArrayModifiableDBIDs, Intege
@Override
public boolean add(DBIDRef e) {
- if(size == store.length) {
- ensureSize(size + 1);
+ if (size == store.length) {
+ grow();
}
store[size] = e.internalGetIndex();
++size;
@@ -169,8 +187,8 @@ public class ArrayModifiableIntegerDBIDs implements ArrayModifiableDBIDs, Intege
public boolean remove(DBIDRef o) {
int rm = o.internalGetIndex();
// TODO: when sorted, use binary search!
- for(int i = 0; i < size; i++) {
- if(store[i] == rm) {
+ for (int i = 0; i < size; i++) {
+ if (store[i] == rm) {
--size;
store[i] = store[size];
return true;
@@ -190,7 +208,7 @@ public class ArrayModifiableIntegerDBIDs implements ArrayModifiableDBIDs, Intege
public DBID remove(int index) {
DBID ret = new IntegerDBID(store[index]);
--size;
- if(size > 0) {
+ if (size > 0) {
store[index] = store[size];
}
return ret;
@@ -210,8 +228,8 @@ public class ArrayModifiableIntegerDBIDs implements ArrayModifiableDBIDs, Intege
public boolean contains(DBIDRef o) {
// TODO: recognize sorted arrays, then use binary search?
int oid = o.internalGetIndex();
- for(int i = 0; i < size; i++) {
- if(store[i] == oid) {
+ for (int i = 0; i < size; i++) {
+ if (store[i] == oid) {
return true;
}
}
@@ -241,7 +259,12 @@ public class ArrayModifiableIntegerDBIDs implements ArrayModifiableDBIDs, Intege
}
@Override
- public IntegerDBIDArrayMIter iter() {
+ public Slice slice(int begin, int end) {
+ return new Slice(begin, end);
+ }
+
+ @Override
+ public Itr iter() {
return new Itr();
}
@@ -303,4 +326,131 @@ public class ArrayModifiableIntegerDBIDs implements ArrayModifiableDBIDs, Intege
return Integer.toString(internalGetIndex()) + "@" + pos;
}
}
+
+ /**
+ * Slice of an array.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ private class Slice implements IntegerArrayDBIDs {
+ /**
+ * Slice positions.
+ */
+ final int begin, end;
+
+ /**
+ * Constructor.
+ *
+ * @param begin Begin, inclusive
+ * @param end End, exclusive
+ */
+ public Slice(int begin, int end) {
+ super();
+ this.begin = begin;
+ this.end = end;
+ }
+
+ @Override
+ public int size() {
+ return end - begin;
+ }
+
+ @Override
+ public boolean contains(DBIDRef o) {
+ // TODO: recognize sorted arrays, then use binary search?
+ int oid = o.internalGetIndex();
+ for (int i = begin; i < end; i++) {
+ if (store[i] == oid) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ @Override
+ public boolean isEmpty() {
+ return begin == end;
+ }
+
+ @Override
+ public DBID get(int i) {
+ return ArrayModifiableIntegerDBIDs.this.get(begin + i);
+ }
+
+ @Override
+ public void assignVar(int index, DBIDVar var) {
+ ArrayModifiableIntegerDBIDs.this.assignVar(begin + index, var);
+ }
+
+ @Override
+ public int binarySearch(DBIDRef key) {
+ return Arrays.binarySearch(store, begin, end, key.internalGetIndex()) - begin;
+ }
+
+ @Override
+ public SliceItr iter() {
+ return new SliceItr();
+ }
+
+ @Override
+ public Slice slice(int begin, int end) {
+ return new Slice(begin + begin, begin + end);
+ }
+
+ /**
+ * Iterator class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ private class SliceItr implements IntegerDBIDArrayIter {
+ /**
+ * Iterator position.
+ */
+ int pos = begin;
+
+ @Override
+ public int internalGetIndex() {
+ return store[pos];
+ }
+
+ @Override
+ public boolean valid() {
+ return pos < end && pos >= begin;
+ }
+
+ @Override
+ public void advance() {
+ ++pos;
+ }
+
+ @Override
+ public int getOffset() {
+ return pos;
+ }
+
+ @Override
+ public void advance(int count) {
+ pos += count;
+ }
+
+ @Override
+ public void retract() {
+ --pos;
+ }
+
+ @Override
+ public void seek(int off) {
+ pos = off;
+ }
+
+ @Override
+ public String toString() {
+ return Integer.toString(internalGetIndex()) + "@" + pos;
+ }
+ }
+ }
}
diff --git a/src/de/lmu/ifi/dbs/elki/database/ids/integer/ArrayStaticIntegerDBIDs.java b/src/de/lmu/ifi/dbs/elki/database/ids/integer/ArrayStaticIntegerDBIDs.java
index 4b4b5a42..fcb426ac 100644
--- a/src/de/lmu/ifi/dbs/elki/database/ids/integer/ArrayStaticIntegerDBIDs.java
+++ b/src/de/lmu/ifi/dbs/elki/database/ids/integer/ArrayStaticIntegerDBIDs.java
@@ -36,14 +36,12 @@ import de.lmu.ifi.dbs.elki.logging.LoggingUtil;
* Static (no modifications allowed) set of Database Object IDs.
*
* @author Erich Schubert
- *
- * @apiviz.has IntegerDBID
*/
public class ArrayStaticIntegerDBIDs implements IntegerArrayStaticDBIDs {
/**
* The actual storage.
*/
- protected int[] ids;
+ protected int[] store;
/**
* Constructor.
@@ -52,12 +50,58 @@ public class ArrayStaticIntegerDBIDs implements IntegerArrayStaticDBIDs {
*/
public ArrayStaticIntegerDBIDs(int... ids) {
super();
- this.ids = ids;
+ this.store = ids;
+ }
+
+ @Override
+ public int size() {
+ return store.length;
+ }
+
+ @Override
+ public boolean isEmpty() {
+ return store.length == 0;
+ }
+
+ @Override
+ public boolean contains(DBIDRef o) {
+ final int oid = DBIDUtil.asInteger(o);
+ for (int i = 0; i < store.length; i++) {
+ if (store[i] == oid) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ @Override
+ public DBID get(int i) {
+ return DBIDFactory.FACTORY.importInteger(store[i]);
+ }
+
+ @Override
+ public void assignVar(int i, DBIDVar var) {
+ if (var instanceof IntegerDBIDVar) {
+ ((IntegerDBIDVar) var).internalSetIndex(store[i]);
+ } else {
+ // Much less efficient:
+ var.set(get(i));
+ }
+ }
+
+ @Override
+ public int binarySearch(DBIDRef key) {
+ return Arrays.binarySearch(store, DBIDUtil.asInteger(key));
}
@Override
- public IntegerDBIDArrayIter iter() {
- return new DBIDItr();
+ public Itr iter() {
+ return new Itr();
+ }
+
+ @Override
+ public Slice slice(int begin, int end) {
+ return new Slice(begin, end);
}
/**
@@ -67,7 +111,7 @@ public class ArrayStaticIntegerDBIDs implements IntegerArrayStaticDBIDs {
*
* @apiviz.exclude
*/
- protected class DBIDItr implements IntegerDBIDArrayIter {
+ protected class Itr implements IntegerDBIDArrayIter {
/**
* Position within array.
*/
@@ -75,7 +119,7 @@ public class ArrayStaticIntegerDBIDs implements IntegerArrayStaticDBIDs {
@Override
public boolean valid() {
- return pos < ids.length && pos >= 0;
+ return pos < store.length && pos >= 0;
}
@Override
@@ -105,12 +149,12 @@ public class ArrayStaticIntegerDBIDs implements IntegerArrayStaticDBIDs {
@Override
public int internalGetIndex() {
- return ids[pos];
+ return store[pos];
}
@Override
public boolean equals(Object other) {
- if(other instanceof DBID) {
+ if (other instanceof DBID) {
LoggingUtil.warning("Programming error detected: DBIDItr.equals(DBID). Use sameDBID()!", new Throwable());
}
return super.equals(other);
@@ -122,44 +166,130 @@ public class ArrayStaticIntegerDBIDs implements IntegerArrayStaticDBIDs {
}
}
- @Override
- public int size() {
- return ids.length;
- }
+ /**
+ * Slice of an array.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ private class Slice implements IntegerArrayDBIDs {
+ /**
+ * Slice positions.
+ */
+ final int begin, end;
- @Override
- public boolean isEmpty() {
- return ids.length == 0;
- }
+ /**
+ * Constructor.
+ *
+ * @param begin Begin, inclusive
+ * @param end End, exclusive
+ */
+ public Slice(int begin, int end) {
+ super();
+ this.begin = begin;
+ this.end = end;
+ }
- @Override
- public boolean contains(DBIDRef o) {
- final int oid = DBIDUtil.asInteger(o);
- for(int i = 0; i < ids.length; i++) {
- if(ids[i] == oid) {
- return true;
+ @Override
+ public int size() {
+ return end - begin;
+ }
+
+ @Override
+ public boolean contains(DBIDRef o) {
+ // TODO: recognize sorted arrays, then use binary search?
+ int oid = o.internalGetIndex();
+ for (int i = begin; i < end; i++) {
+ if (store[i] == oid) {
+ return true;
+ }
}
+ return false;
}
- return false;
- }
- @Override
- public DBID get(int i) {
- return DBIDFactory.FACTORY.importInteger(ids[i]);
- }
+ @Override
+ public boolean isEmpty() {
+ return begin == end;
+ }
- @Override
- public void assignVar(int i, DBIDVar var) {
- if (var instanceof IntegerDBIDVar) {
- ((IntegerDBIDVar)var).internalSetIndex(ids[i]);
- } else {
- // Much less efficient:
- var.set(get(i));
+ @Override
+ public DBID get(int i) {
+ return ArrayStaticIntegerDBIDs.this.get(begin + i);
}
- }
- @Override
- public int binarySearch(DBIDRef key) {
- return Arrays.binarySearch(ids, DBIDUtil.asInteger(key));
+ @Override
+ public void assignVar(int index, DBIDVar var) {
+ ArrayStaticIntegerDBIDs.this.assignVar(begin + index, var);
+ }
+
+ @Override
+ public int binarySearch(DBIDRef key) {
+ return Arrays.binarySearch(store, begin, end, key.internalGetIndex()) - begin;
+ }
+
+ @Override
+ public SliceItr iter() {
+ return new SliceItr();
+ }
+
+ @Override
+ public Slice slice(int begin, int end) {
+ return new Slice(begin + begin, begin + end);
+ }
+
+ /**
+ * Iterator class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ private class SliceItr implements IntegerDBIDArrayIter {
+ /**
+ * Iterator position.
+ */
+ int pos = begin;
+
+ @Override
+ public int internalGetIndex() {
+ return store[pos];
+ }
+
+ @Override
+ public boolean valid() {
+ return pos < end && pos >= begin;
+ }
+
+ @Override
+ public void advance() {
+ ++pos;
+ }
+
+ @Override
+ public int getOffset() {
+ return pos;
+ }
+
+ @Override
+ public void advance(int count) {
+ pos += count;
+ }
+
+ @Override
+ public void retract() {
+ --pos;
+ }
+
+ @Override
+ public void seek(int off) {
+ pos = begin + off;
+ }
+
+ @Override
+ public String toString() {
+ return Integer.toString(internalGetIndex()) + "@" + pos;
+ }
+ }
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/database/ids/integer/DoubleDistanceIntegerDBIDKNNHeap.java b/src/de/lmu/ifi/dbs/elki/database/ids/integer/DoubleDistanceIntegerDBIDKNNHeap.java
index 6c88c2d8..96babaa3 100644
--- a/src/de/lmu/ifi/dbs/elki/database/ids/integer/DoubleDistanceIntegerDBIDKNNHeap.java
+++ b/src/de/lmu/ifi/dbs/elki/database/ids/integer/DoubleDistanceIntegerDBIDKNNHeap.java
@@ -90,7 +90,7 @@ public class DoubleDistanceIntegerDBIDKNNHeap implements DoubleDistanceKNNHeap {
@Override
@Deprecated
public DoubleDistance getKNNDistance() {
- if (heap.size() < k) {
+ if(heap.size() < k) {
return DoubleDistance.INFINITE_DISTANCE;
}
return new DoubleDistance(kdist);
@@ -103,41 +103,92 @@ public class DoubleDistanceIntegerDBIDKNNHeap implements DoubleDistanceKNNHeap {
@Override
@Deprecated
- public void add(DoubleDistance distance, DBIDRef id) {
- add(distance.doubleValue(), id);
+ public void insert(DoubleDistance distance, DBIDRef id) {
+ final double dist = distance.doubleValue();
+ final int iid = id.internalGetIndex();
+ if(heap.size() < k) {
+ heap.add(dist, iid);
+ if(heap.size() >= k) {
+ kdist = heap.peekKey();
+ }
+ return;
+ }
+ // Tied with top:
+ if(dist >= kdist) {
+ if(dist == kdist) {
+ addToTies(iid);
+ }
+ return;
+ }
+ // Old top element: (kdist, previd)
+ updateHeap(dist, iid);
}
@Override
@Deprecated
- public void add(Double distance, DBIDRef id) {
- add(distance.doubleValue(), id);
+ public void insert(Double distance, DBIDRef id) {
+ final double dist = distance.doubleValue();
+ final int iid = id.internalGetIndex();
+ if(heap.size() < k) {
+ heap.add(dist, iid);
+ if(heap.size() >= k) {
+ kdist = heap.peekKey();
+ }
+ return;
+ }
+ // Tied with top:
+ if(dist >= kdist) {
+ if(dist == kdist) {
+ addToTies(iid);
+ }
+ return;
+ }
+ // Old top element: (kdist, previd)
+ updateHeap(dist, iid);
}
@Override
- public final void add(final double distance, final DBIDRef id) {
- if (distance > kdist) {
- return;
- }
+ public final double insert(final double distance, final DBIDRef id) {
final int iid = id.internalGetIndex();
- if (heap.size() < k) {
+ if(heap.size() < k) {
heap.add(distance, iid);
- if (heap.size() >= k) {
+ if(heap.size() >= k) {
kdist = heap.peekKey();
}
- return;
+ return kdist;
}
// Tied with top:
- if (distance >= kdist) {
- addToTies(iid);
- return;
+ if(distance >= kdist) {
+ if(distance == kdist) {
+ addToTies(iid);
+ }
+ return kdist;
}
// Old top element: (kdist, previd)
updateHeap(distance, iid);
+ return kdist;
}
@Override
- public void add(DoubleDistanceDBIDPair e) {
- add(e.doubleDistance(), e);
+ public void insert(final DoubleDistanceDBIDPair e) {
+ final double distance = e.doubleDistance();
+ final int iid = e.internalGetIndex();
+ if(heap.size() < k) {
+ heap.add(distance, iid);
+ if(heap.size() >= k) {
+ kdist = heap.peekKey();
+ }
+ return;
+ }
+ // Tied with top:
+ if(distance >= kdist) {
+ if(distance == kdist) {
+ addToTies(iid);
+ }
+ return;
+ }
+ // Old top element: (kdist, previd)
+ updateHeap(distance, iid);
}
/**
@@ -152,9 +203,10 @@ public class DoubleDistanceIntegerDBIDKNNHeap implements DoubleDistanceKNNHeap {
heap.replaceTopElement(distance, iid);
kdist = heap.peekKey();
// If the kdist improved, zap ties.
- if (kdist < prevdist) {
+ if(kdist < prevdist) {
numties = 0;
- } else {
+ }
+ else {
addToTies(previd);
}
}
@@ -165,7 +217,7 @@ public class DoubleDistanceIntegerDBIDKNNHeap implements DoubleDistanceKNNHeap {
* @param id Id to add
*/
private final void addToTies(int id) {
- if (ties.length == numties) {
+ if(ties.length == numties) {
ties = Arrays.copyOf(ties, (ties.length << 1) + 1); // grow.
}
ties[numties] = id;
@@ -175,10 +227,11 @@ public class DoubleDistanceIntegerDBIDKNNHeap implements DoubleDistanceKNNHeap {
@Override
public DoubleDistanceIntegerDBIDPair poll() {
final DoubleDistanceIntegerDBIDPair ret;
- if (numties > 0) {
+ if(numties > 0) {
ret = new DoubleDistanceIntegerDBIDPair(kdist, ties[numties - 1]);
--numties;
- } else {
+ }
+ else {
ret = new DoubleDistanceIntegerDBIDPair(heap.peekKey(), heap.peekValue());
heap.poll();
}
@@ -189,16 +242,17 @@ public class DoubleDistanceIntegerDBIDKNNHeap implements DoubleDistanceKNNHeap {
* Pop the topmost element.
*/
protected void pop() {
- if (numties > 0) {
+ if(numties > 0) {
--numties;
- } else {
+ }
+ else {
heap.poll();
}
}
@Override
public DoubleDistanceIntegerDBIDPair peek() {
- if (numties > 0) {
+ if(numties > 0) {
return new DoubleDistanceIntegerDBIDPair(kdist, ties[numties - 1]);
}
return new DoubleDistanceIntegerDBIDPair(heap.peekKey(), heap.peekValue());
@@ -222,7 +276,21 @@ public class DoubleDistanceIntegerDBIDKNNHeap implements DoubleDistanceKNNHeap {
@Override
public DoubleDistanceIntegerDBIDKNNList toKNNList() {
- return new DoubleDistanceIntegerDBIDKNNList(this);
+ final int hsize = heap.size();
+ DoubleDistanceIntegerDBIDKNNList ret = new DoubleDistanceIntegerDBIDKNNList(k, hsize + numties);
+ // Add ties:
+ for(int i = 0; i < numties; i++) {
+ ret.dists[hsize + i] = kdist;
+ ret.ids[hsize + i] = ties[i];
+ }
+ for(int j = hsize - 1; j >= 0; j--) {
+ ret.dists[j] = heap.peekKey();
+ ret.ids[j] = heap.peekValue();
+ heap.poll();
+ }
+ ret.size = hsize + numties;
+ ret.sort();
+ return ret;
}
/**
@@ -231,9 +299,10 @@ public class DoubleDistanceIntegerDBIDKNNHeap implements DoubleDistanceKNNHeap {
* @return distance
*/
protected double peekDistance() {
- if (numties > 0) {
+ if(numties > 0) {
return kdist;
- } else {
+ }
+ else {
return heap.peekKey();
}
}
@@ -244,7 +313,7 @@ public class DoubleDistanceIntegerDBIDKNNHeap implements DoubleDistanceKNNHeap {
* @return internal id
*/
protected int peekInternalDBID() {
- if (numties > 0) {
+ if(numties > 0) {
return ties[numties - 1];
}
return heap.peekValue();
diff --git a/src/de/lmu/ifi/dbs/elki/database/ids/integer/DoubleDistanceIntegerDBIDKNNList.java b/src/de/lmu/ifi/dbs/elki/database/ids/integer/DoubleDistanceIntegerDBIDKNNList.java
index a74497e8..f6515d88 100644
--- a/src/de/lmu/ifi/dbs/elki/database/ids/integer/DoubleDistanceIntegerDBIDKNNList.java
+++ b/src/de/lmu/ifi/dbs/elki/database/ids/integer/DoubleDistanceIntegerDBIDKNNList.java
@@ -22,47 +22,21 @@ package de.lmu.ifi.dbs.elki.database.ids.integer;
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import java.util.Arrays;
-
-import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDPair;
import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter;
import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList;
-import de.lmu.ifi.dbs.elki.database.ids.distance.ModifiableDoubleDistanceDBIDList;
import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
/**
- * Class to store double distance, integer DBID results.
+ * kNN list, but without automatic sorting. Use with care, as others may expect
+ * the results to be sorted!
*
* @author Erich Schubert
- *
- * @apiviz.uses DoubleIntegerArrayQuickSort
*/
-public class DoubleDistanceIntegerDBIDKNNList implements ModifiableDoubleDistanceDBIDList, DoubleDistanceKNNList, IntegerDBIDs {
- /**
- * Initial size allocation.
- */
- private static final int INITIAL_SIZE = 21;
-
+public class DoubleDistanceIntegerDBIDKNNList extends DoubleDistanceIntegerDBIDList implements DoubleDistanceKNNList {
/**
* The k value this list was generated for.
*/
- int k;
-
- /**
- * The size
- */
- int size;
-
- /**
- * Distance values
- */
- double[] dists;
-
- /**
- * DBIDs
- */
- int[] ids;
+ final int k;
/**
* Constructor.
@@ -70,8 +44,6 @@ public class DoubleDistanceIntegerDBIDKNNList implements ModifiableDoubleDistanc
public DoubleDistanceIntegerDBIDKNNList() {
super();
this.k = -1;
- this.dists = new double[INITIAL_SIZE];
- this.ids = new int[INITIAL_SIZE];
}
/**
@@ -80,75 +52,20 @@ public class DoubleDistanceIntegerDBIDKNNList implements ModifiableDoubleDistanc
* @param k K parameter
* @param size Actual size
*/
- public DoubleDistanceIntegerDBIDKNNList(int k, int size) {
- super();
+ public DoubleDistanceIntegerDBIDKNNList(final int k, int size) {
+ super(size);
this.k = k;
- if (size > 0) {
- this.dists = new double[size];
- this.ids = new int[size];
- } else {
- this.dists = new double[INITIAL_SIZE];
- this.ids = new int[INITIAL_SIZE];
- }
- }
-
- /**
- * Constructor from heap.
- *
- * @param heap KNN heap.
- */
- public DoubleDistanceIntegerDBIDKNNList(DoubleDistanceIntegerDBIDKNNHeap heap) {
- super();
- this.k = heap.getK();
- this.size = heap.size();
- this.dists = new double[size];
- this.ids = new int[size];
- for (int i = size - 1; i >= 0; i--) {
- dists[i] = heap.peekDistance();
- ids[i] = heap.peekInternalDBID();
- heap.pop();
- }
- }
-
- @Override
- public DoubleDistanceIntegerDBIDListIter iter() {
- return new Itr();
- }
-
- @Override
- public boolean contains(DBIDRef o) {
- final int q = o.internalGetIndex();
- for (int i = 0; i < size; i++) {
- if (q == ids[i]) {
- return true;
- }
- }
- return false;
- }
-
- @Override
- public boolean isEmpty() {
- return size == 0;
- }
-
- @Override
- public int size() {
- return size;
}
@Override
public int getK() {
- if (k <= 0) {
- return size - 1;
- }
return k;
}
- @Override
- public DoubleDistanceIntegerDBIDPair get(int index) {
- return new DoubleDistanceIntegerDBIDPair(dists[index], ids[index]);
- }
-
+ /**
+ * @deprecated Since you know this is a double distance heap, use
+ * {@link #doubleKNNDistance()}
+ */
@Override
@Deprecated
public DoubleDistance getKNNDistance() {
@@ -157,65 +74,7 @@ public class DoubleDistanceIntegerDBIDKNNList implements ModifiableDoubleDistanc
@Override
public double doubleKNNDistance() {
- if (k <= 0) {
- return dists[size - 1];
- }
- if (size < k) {
- return Double.POSITIVE_INFINITY;
- }
- return dists[k - 1];
- }
-
- /**
- * Add an entry, consisting of distance and internal index.
- *
- * @param dist Distance
- * @param id Internal index
- */
- protected void add(double dist, int id) {
- if (size == dists.length) {
- final int newlength = (dists.length << 1) + 1;
- dists = Arrays.copyOf(dists, newlength);
- ids = Arrays.copyOf(ids, newlength);
- }
- dists[size] = dist;
- ids[size] = id;
- ++size;
- }
-
- @Override
- @Deprecated
- public void add(DoubleDistance dist, DBIDRef id) {
- add(dist.doubleValue(), id);
- }
-
- @Override
- public void add(double dist, DBIDRef id) {
- add(dist, id.internalGetIndex());
- }
-
- @Override
- public void add(DoubleDistanceDBIDPair pair) {
- add(pair.doubleDistance(), pair.internalGetIndex());
- }
-
- @Override
- public void sort() {
- DoubleIntegerArrayQuickSort.sort(dists, ids, 0, size);
- }
-
- /**
- * Reverse the list.
- */
- protected void reverse() {
- for (int i = 0, j = size - 1; i < j; i++, j--) {
- double tmpd = dists[j];
- dists[j] = dists[i];
- dists[i] = tmpd;
- int tmpi = ids[j];
- ids[j] = ids[i];
- ids[i] = tmpi;
- }
+ return (size >= k) ? dists[k - 1] : Double.POSITIVE_INFINITY;
}
@Override
@@ -232,67 +91,4 @@ public class DoubleDistanceIntegerDBIDKNNList implements ModifiableDoubleDistanc
buf.append(']');
return buf.toString();
}
-
- /**
- * List iterator.
- *
- * @author Erich Schubert
- *
- * @apiviz.exclude
- */
- private class Itr implements DoubleDistanceIntegerDBIDListIter {
- int offset = 0;
-
- @Override
- public boolean valid() {
- return offset < size;
- }
-
- @Override
- public void advance() {
- ++offset;
- }
-
- @Override
- public int getOffset() {
- return offset;
- }
-
- @Override
- public void advance(int count) {
- offset += count;
- }
-
- @Override
- public void retract() {
- offset--;
- }
-
- @Override
- public void seek(int off) {
- offset = off;
- }
-
- @Override
- public int internalGetIndex() {
- return ids[offset];
- }
-
- @Override
- public double doubleDistance() {
- return dists[offset];
- }
-
- @Override
- public DoubleDistanceDBIDPair getDistancePair() {
- return new DoubleDistanceIntegerDBIDPair(dists[offset], ids[offset]);
- }
-
- @Override
- @Deprecated
- public DoubleDistance getDistance() {
- return new DoubleDistance(dists[offset]);
- }
-
- }
}
diff --git a/src/de/lmu/ifi/dbs/elki/database/ids/integer/DoubleDistanceIntegerDBIDKNNListHeap.java b/src/de/lmu/ifi/dbs/elki/database/ids/integer/DoubleDistanceIntegerDBIDList.java
index ffc2266e..0db0204c 100644
--- a/src/de/lmu/ifi/dbs/elki/database/ids/integer/DoubleDistanceIntegerDBIDKNNListHeap.java
+++ b/src/de/lmu/ifi/dbs/elki/database/ids/integer/DoubleDistanceIntegerDBIDList.java
@@ -22,13 +22,11 @@ package de.lmu.ifi.dbs.elki.database.ids.integer;
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import java.util.Arrays;
import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter;
import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDPair;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNHeap;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList;
+import de.lmu.ifi.dbs.elki.database.ids.distance.ModifiableDoubleDistanceDBIDList;
import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
/**
@@ -38,11 +36,11 @@ import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
*
* @apiviz.uses DoubleIntegerArrayQuickSort
*/
-public class DoubleDistanceIntegerDBIDKNNListHeap implements DoubleDistanceKNNHeap, DoubleDistanceKNNList, IntegerDBIDs {
+public class DoubleDistanceIntegerDBIDList implements ModifiableDoubleDistanceDBIDList, IntegerDBIDs {
/**
- * The k value this list was generated for.
+ * Initial size allocation.
*/
- int k;
+ private static final int INITIAL_SIZE = 21;
/**
* The size
@@ -60,28 +58,44 @@ public class DoubleDistanceIntegerDBIDKNNListHeap implements DoubleDistanceKNNHe
int[] ids;
/**
+ * Empty.
+ */
+ private static final double[] EMPTY_DISTS = new double[0];
+
+ /**
+ * Empty.
+ */
+ private static final int[] EMPTY_IDS = new int[0];
+
+ /**
+ * Constructor.
+ */
+ public DoubleDistanceIntegerDBIDList() {
+ dists = EMPTY_DISTS;
+ ids = EMPTY_IDS;
+ }
+
+ /**
* Constructor.
*
- * @param k K parameter
+ * @param size Initial size
*/
- public DoubleDistanceIntegerDBIDKNNListHeap(int k) {
- super();
- this.k = k;
- this.size = 0;
- this.dists = new double[k + 1];
- this.ids = new int[k + 1];
+ public DoubleDistanceIntegerDBIDList(int size) {
+ this.dists = new double[size];
+ this.ids = new int[size];
+ // This is default anyway: this.size = 0;
}
@Override
- public DoubleDistanceIntegerDBIDListIter iter() {
+ public Itr iter() {
return new Itr();
}
@Override
public boolean contains(DBIDRef o) {
final int q = o.internalGetIndex();
- for(int i = 0; i < size; i++) {
- if(q == ids[i]) {
+ for (int i = 0; i < size; i++) {
+ if (q == ids[i]) {
return true;
}
}
@@ -99,151 +113,123 @@ public class DoubleDistanceIntegerDBIDKNNListHeap implements DoubleDistanceKNNHe
}
@Override
- public int getK() {
- return k;
- }
-
- @Override
public DoubleDistanceIntegerDBIDPair get(int index) {
return new DoubleDistanceIntegerDBIDPair(dists[index], ids[index]);
}
- @Override
- @Deprecated
- public DoubleDistance getKNNDistance() {
- return new DoubleDistance(doubleKNNDistance());
- }
-
- @Override
- public double doubleKNNDistance() {
- if(size < k) {
- return Double.POSITIVE_INFINITY;
- }
- return dists[k - 1];
- }
-
/**
* Add an entry, consisting of distance and internal index.
*
* @param dist Distance
* @param id Internal index
*/
- protected void append(double dist, int id) {
- ensureSize(size + 1);
+ protected void addInternal(double dist, int id) {
+ if (size == dists.length) {
+ grow();
+ }
dists[size] = dist;
ids[size] = id;
++size;
}
/**
- * Add a new element to the heap/list.
- *
- * @param dist Distance
- * @param id Object ID
+ * Grow the data storage.
*/
- protected void add(double dist, int id) {
- if(size < k) {
- dists[size] = dist;
- ids[size] = id;
- ++size;
- if(size == k) {
- sort();
- }
- return;
- }
- if (dist > dists[size - 1]) {
+ protected void grow() {
+ if (dists == EMPTY_DISTS) {
+ dists = new double[INITIAL_SIZE];
+ ids = new int[INITIAL_SIZE];
return;
}
- // Ensure we have enough space.
- ensureSize(size + 1);
- // Insertion sort:
- int pos = size;
- while(pos > 0 && dists[pos - 1] > dist) {
- dists[pos] = dists[pos - 1];
- ids[pos] = ids[pos - 1];
- --pos;
- }
- dists[pos] = dist;
- ids[pos] = id;
- ++size;
- // Truncate if necessary:
- if(dists[k] > dists[k - 1]) {
- size = k;
- }
- }
-
- /**
- * Ensure we have enough space.
- *
- * @param size Desired size
- */
- private void ensureSize(int size) {
- if(size > dists.length) {
- final int newlength = Math.max(size, (dists.length << 1) + 1);
- dists = Arrays.copyOf(dists, newlength);
- ids = Arrays.copyOf(ids, newlength);
- }
+ final int len = dists.length;
+ final int newlength = len + (len >> 1);
+ double[] odists = dists;
+ dists = new double[newlength];
+ System.arraycopy(odists, 0, dists, 0, odists.length);
+ int[] oids = ids;
+ ids = new int[newlength];
+ System.arraycopy(oids, 0, ids, 0, oids.length);
}
@Override
@Deprecated
public void add(DoubleDistance dist, DBIDRef id) {
- add(dist.doubleValue(), id);
- }
-
- @Override
- @Deprecated
- public void add(Double dist, DBIDRef id) {
- add(dist.doubleValue(), id);
+ addInternal(dist.doubleValue(), id.internalGetIndex());
}
@Override
public void add(double dist, DBIDRef id) {
- add(dist, id.internalGetIndex());
+ addInternal(dist, id.internalGetIndex());
}
@Override
public void add(DoubleDistanceDBIDPair pair) {
- add(pair.doubleDistance(), pair.internalGetIndex());
- }
-
- /**
- * Sort the current contents of the list.
- */
- protected void sort() {
- DoubleIntegerArrayQuickSort.sort(dists, ids, 0, size);
+ addInternal(pair.doubleDistance(), pair.internalGetIndex());
}
@Override
public void clear() {
size = 0;
- Arrays.fill(dists, Double.NaN);
- Arrays.fill(ids, -1);
+ // TODO: put NaN/-1 everywhere, or don't care?
}
@Override
- public DoubleDistanceIntegerDBIDPair poll() {
- return new DoubleDistanceIntegerDBIDPair(dists[k], ids[k]);
+ public void sort() {
+ DoubleIntegerArrayQuickSort.sort(dists, ids, 0, size);
}
- @Override
- public DoubleDistanceIntegerDBIDPair peek() {
- return new DoubleDistanceIntegerDBIDPair(dists[k], ids[k]);
+ /**
+ * Reverse the list.
+ */
+ protected void reverse() {
+ for (int i = 0, j = size - 1; i < j; i++, j--) {
+ double tmpd = dists[j];
+ dists[j] = dists[i];
+ dists[i] = tmpd;
+ int tmpi = ids[j];
+ ids[j] = ids[i];
+ ids[i] = tmpi;
+ }
}
- @Override
- public DoubleDistanceKNNList toKNNList() {
- return this;
+ /**
+ * Truncate the list to the given size.
+ *
+ * @param newsize New size
+ */
+ public void truncate(int newsize) {
+ if (newsize < size) {
+ double[] odists = dists;
+ dists = new double[newsize];
+ System.arraycopy(odists, 0, dists, 0, newsize);
+ int[] oids = ids;
+ ids = new int[newsize];
+ System.arraycopy(oids, 0, ids, 0, newsize);
+ size = newsize;
+ }
+ }
+
+ /**
+ * Get the distance of the object at position pos.
+ *
+ * Usually, you should be using an iterator instead. This part of the API is
+ * not stable.
+ *
+ * @param pos Position
+ * @return Double distance.
+ */
+ public double getDoubleDistance(int pos) {
+ return dists[pos];
}
@Override
public String toString() {
StringBuilder buf = new StringBuilder();
- buf.append("kNNListHeap[");
- for(DoubleDistanceDBIDListIter iter = this.iter(); iter.valid();) {
+ buf.append("DistanceDBIDList[");
+ for (DoubleDistanceDBIDListIter iter = this.iter(); iter.valid();) {
buf.append(iter.doubleDistance()).append(':').append(iter.internalGetIndex());
iter.advance();
- if(iter.valid()) {
+ if (iter.valid()) {
buf.append(',');
}
}
@@ -259,8 +245,18 @@ public class DoubleDistanceIntegerDBIDKNNListHeap implements DoubleDistanceKNNHe
* @apiviz.exclude
*/
private class Itr implements DoubleDistanceIntegerDBIDListIter {
+ /**
+ * Current offset.
+ */
int offset = 0;
+ /**
+ * Constructor.
+ */
+ private Itr() {
+ super();
+ }
+
@Override
public boolean valid() {
return offset < size;
@@ -283,7 +279,7 @@ public class DoubleDistanceIntegerDBIDKNNListHeap implements DoubleDistanceKNNHe
@Override
public void retract() {
- offset--;
+ --offset;
}
@Override
diff --git a/src/de/lmu/ifi/dbs/elki/database/ids/integer/DoubleDistanceIntegerDBIDPairKNNListHeap.java b/src/de/lmu/ifi/dbs/elki/database/ids/integer/DoubleDistanceIntegerDBIDPairKNNListHeap.java
new file mode 100644
index 00000000..aa0a9739
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/database/ids/integer/DoubleDistanceIntegerDBIDPairKNNListHeap.java
@@ -0,0 +1,339 @@
+package de.lmu.ifi.dbs.elki.database.ids.integer;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDPair;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNHeap;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
+
+/**
+ * Finalized KNN List.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.composedOf DoubleDistanceDBIDPair
+ * @apiviz.has DoubleDistanceDBIDListIter
+ */
+public class DoubleDistanceIntegerDBIDPairKNNListHeap implements DoubleDistanceKNNList, DoubleDistanceKNNHeap {
+ /**
+ * The value of k this was materialized for.
+ */
+ private final int k;
+
+ /**
+ * The actual data array.
+ */
+ private DoubleDistanceIntegerDBIDPair[] data;
+
+ /**
+ * Current size
+ */
+ private int size;
+
+ /**
+ * Constructor.
+ *
+ * @param k K parameter
+ */
+ public DoubleDistanceIntegerDBIDPairKNNListHeap(int k) {
+ super();
+ this.data = new DoubleDistanceIntegerDBIDPair[k + 5];
+ this.k = k;
+ this.size = 0;
+ }
+
+ @Override
+ public void clear() {
+ for(int i = 0; i < size; i++) {
+ data[i] = null; // discard
+ }
+ size = 0;
+ }
+
+ @Override
+ public double insert(double distance, DBIDRef id) {
+ final int kminus1 = k - 1;
+ if(size < k || distance <= data[kminus1].doubleDistance()) {
+ // Ensure we have enough space.
+ if(size > data.length) {
+ grow();
+ }
+ insertionSort(new DoubleDistanceIntegerDBIDPair(distance, id.internalGetIndex()));
+ // Truncate if necessary:
+ if(size > k && data[k].doubleDistance() > data[kminus1].doubleDistance()) {
+ truncate();
+ }
+ }
+ return (size < k) ? Double.POSITIVE_INFINITY : get(kminus1).doubleDistance();
+ }
+
+ private void truncate() {
+ for(int i = k; i < size; i++) {
+ data[i] = null; // discard
+ }
+ size = k;
+ }
+
+ @Override
+ @Deprecated
+ public void insert(Double distance, DBIDRef id) {
+ final int kminus1 = k - 1;
+ if(size < k || distance.doubleValue() <= data[kminus1].doubleDistance()) {
+ // Ensure we have enough space.
+ if(size > data.length) {
+ grow();
+ }
+ insertionSort(new DoubleDistanceIntegerDBIDPair(distance.doubleValue(), id.internalGetIndex()));
+ // Truncate if necessary:
+ if(size > k && data[k].doubleDistance() > data[kminus1].doubleDistance()) {
+ truncate();
+ }
+ }
+ }
+
+ @Override
+ @Deprecated
+ public void insert(DoubleDistance dist, DBIDRef id) {
+ final int kminus1 = k - 1;
+ if(size < k || dist.doubleValue() <= data[kminus1].doubleDistance()) {
+ // Ensure we have enough space.
+ if(size > data.length) {
+ grow();
+ }
+ insertionSort(new DoubleDistanceIntegerDBIDPair(dist.doubleValue(), id.internalGetIndex()));
+ // Truncate if necessary:
+ if(size > k && data[k].doubleDistance() > data[kminus1].doubleDistance()) {
+ truncate();
+ }
+ }
+ }
+
+ @Override
+ public void insert(DoubleDistanceDBIDPair e) {
+ final int kminus1 = k - 1;
+ final double dist = e.doubleDistance();
+ if(size < k || dist <= data[kminus1].doubleDistance()) {
+ // Ensure we have enough space.
+ if(size > data.length) {
+ grow();
+ }
+ if(e instanceof DoubleDistanceIntegerDBIDPair) {
+ insertionSort((DoubleDistanceIntegerDBIDPair) e);
+ }
+ else {
+ insertionSort(new DoubleDistanceIntegerDBIDPair(dist, e.internalGetIndex()));
+ }
+ // Truncate if necessary:
+ if(size > k && data[k].doubleDistance() > data[kminus1].doubleDistance()) {
+ truncate();
+ }
+ }
+ }
+
+ /**
+ * Perform insertion sort.
+ *
+ * @param obj Object to insert
+ */
+ private void insertionSort(DoubleDistanceIntegerDBIDPair obj) {
+ // Insertion sort:
+ int pos = size;
+ while(pos > 0) {
+ final int prev = pos - 1;
+ DoubleDistanceIntegerDBIDPair pobj = data[prev];
+ if(pobj.doubleDistance() <= obj.doubleDistance()) {
+ break;
+ }
+ data[pos] = pobj;
+ pos = prev;
+ }
+ data[pos] = obj;
+ ++size;
+ }
+
+ private void grow() {
+ final DoubleDistanceIntegerDBIDPair[] old = data;
+ data = new DoubleDistanceIntegerDBIDPair[data.length + (data.length >> 1)];
+ System.arraycopy(old, 0, data, 0, old.length);
+ }
+
+ @Override
+ public DoubleDistanceIntegerDBIDPair poll() {
+ assert (size > 0);
+ return data[size--];
+ }
+
+ @Override
+ public DoubleDistanceIntegerDBIDPair peek() {
+ assert (size > 0);
+ return data[size - 1];
+ }
+
+ @Override
+ public DoubleDistanceKNNList toKNNList() {
+ return this;
+ }
+
+ @Override
+ public int getK() {
+ return k;
+ }
+
+ @Override
+ @Deprecated
+ public DoubleDistance getKNNDistance() {
+ if(size < k) {
+ return DoubleDistance.INFINITE_DISTANCE;
+ }
+ return get(k - 1).getDistance();
+ }
+
+ @Override
+ public double doubleKNNDistance() {
+ if(size < k) {
+ return Double.POSITIVE_INFINITY;
+ }
+ return get(k - 1).doubleDistance();
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder buf = new StringBuilder();
+ buf.append("kNNList[");
+ for(DoubleDistanceDBIDListIter iter = this.iter(); iter.valid();) {
+ buf.append(iter.doubleDistance()).append(':').append(DBIDUtil.toString(iter));
+ iter.advance();
+ if(iter.valid()) {
+ buf.append(',');
+ }
+ }
+ buf.append(']');
+ return buf.toString();
+ }
+
+ @Override
+ public DoubleDistanceIntegerDBIDPair get(int index) {
+ return data[index];
+ }
+
+ @Override
+ public DoubleDistanceIntegerDBIDListIter iter() {
+ return new Itr();
+ }
+
+ @Override
+ public int size() {
+ return size;
+ }
+
+ @Override
+ public boolean contains(DBIDRef o) {
+ for(DBIDIter iter = iter(); iter.valid(); iter.advance()) {
+ if(DBIDUtil.equal(iter, o)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ @Override
+ public boolean isEmpty() {
+ return size == 0;
+ }
+
+ /**
+ * Iterator.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ private class Itr implements DoubleDistanceIntegerDBIDListIter {
+ /**
+ * Cursor position.
+ */
+ private int pos = 0;
+
+ @Override
+ public int internalGetIndex() {
+ return get(pos).internalGetIndex();
+ }
+
+ @Override
+ public boolean valid() {
+ return pos < size;
+ }
+
+ @Override
+ public void advance() {
+ pos++;
+ }
+
+ /**
+ * {@inheritDoc}
+ *
+ * @deprecated use {@link #doubleDistance}!
+ */
+ @Override
+ @Deprecated
+ public DoubleDistance getDistance() {
+ return get(pos).getDistance();
+ }
+
+ @Override
+ public double doubleDistance() {
+ return get(pos).doubleDistance();
+ }
+
+ @Override
+ public DoubleDistanceIntegerDBIDPair getDistancePair() {
+ return get(pos);
+ }
+
+ @Override
+ public int getOffset() {
+ return pos;
+ }
+
+ @Override
+ public void advance(int count) {
+ pos += count;
+ }
+
+ @Override
+ public void retract() {
+ --pos;
+ }
+
+ @Override
+ public void seek(int off) {
+ pos = off;
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/database/ids/integer/DoubleDistanceIntegerDBIDPairList.java b/src/de/lmu/ifi/dbs/elki/database/ids/integer/DoubleDistanceIntegerDBIDPairList.java
new file mode 100644
index 00000000..5b33d3f9
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/database/ids/integer/DoubleDistanceIntegerDBIDPairList.java
@@ -0,0 +1,234 @@
+package de.lmu.ifi.dbs.elki.database.ids.integer;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+import java.util.Arrays;
+
+import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDPair;
+import de.lmu.ifi.dbs.elki.database.ids.distance.ModifiableDoubleDistanceDBIDList;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultUtil;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
+
+/**
+ * Class to store double distance, integer DBID results.
+ *
+ * @author Erich Schubert
+ */
+public class DoubleDistanceIntegerDBIDPairList implements ModifiableDoubleDistanceDBIDList, IntegerDBIDs {
+ /**
+ * The size
+ */
+ int size;
+
+ /**
+ * Distance values
+ */
+ DoubleDistanceIntegerDBIDPair[] data;
+
+ /**
+ * Constructor.
+ */
+ public DoubleDistanceIntegerDBIDPairList() {
+ super();
+ this.data = new DoubleDistanceIntegerDBIDPair[21];
+ }
+
+ /**
+ * Constructor.
+ *
+ * @param size Initial size
+ */
+ public DoubleDistanceIntegerDBIDPairList(int size) {
+ super();
+ if (size > 0) {
+ size = 21;
+ }
+ this.data = new DoubleDistanceIntegerDBIDPair[size];
+ }
+
+ @Override
+ public DoubleDistanceIntegerDBIDListIter iter() {
+ return new Itr();
+ }
+
+ @Override
+ public boolean contains(DBIDRef o) {
+ final int q = o.internalGetIndex();
+ for (int i = 0; i < size; i++) {
+ if (q == data[i].internalGetIndex()) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ @Override
+ public boolean isEmpty() {
+ return size == 0;
+ }
+
+ @Override
+ public int size() {
+ return size;
+ }
+
+ @Override
+ public DoubleDistanceIntegerDBIDPair get(int index) {
+ return data[index];
+ }
+
+ /**
+ * Add an entry, consisting of distance and internal index.
+ *
+ * @param pair entry
+ */
+ protected void addInternal(DoubleDistanceIntegerDBIDPair pair) {
+ if (size == data.length) {
+ DoubleDistanceIntegerDBIDPair[] old = data;
+ data = new DoubleDistanceIntegerDBIDPair[(data.length << 1) + 1];
+ System.arraycopy(old, 0, data, 0, old.length);
+ }
+ data[size++] = pair;
+ }
+
+ @Override
+ @Deprecated
+ public void add(DoubleDistance dist, DBIDRef id) {
+ add(dist.doubleValue(), id);
+ }
+
+ @Override
+ public void add(double dist, DBIDRef id) {
+ addInternal(new DoubleDistanceIntegerDBIDPair(dist, id.internalGetIndex()));
+ }
+
+ @Override
+ public void add(DoubleDistanceDBIDPair pair) {
+ if (pair instanceof DoubleDistanceIntegerDBIDPair) {
+ addInternal((DoubleDistanceIntegerDBIDPair) pair);
+ } else {
+ addInternal(new DoubleDistanceIntegerDBIDPair(pair.doubleDistance(), pair.internalGetIndex()));
+ }
+ }
+
+ @Override
+ public void clear() {
+ Arrays.fill(data, null);
+ size = 0;
+ }
+
+ @Override
+ public void sort() {
+ Arrays.sort(data, 0, size, DistanceDBIDResultUtil.distanceComparator());
+ }
+
+ /**
+ * Reverse the list.
+ */
+ protected void reverse() {
+ for (int i = 0, j = size - 1; i < j; i++, j--) {
+ DoubleDistanceIntegerDBIDPair tmpd = data[j];
+ data[j] = data[i];
+ data[i] = tmpd;
+ }
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder buf = new StringBuilder();
+ buf.append("kNNList[");
+ for (DoubleDistanceDBIDListIter iter = this.iter(); iter.valid();) {
+ buf.append(iter.doubleDistance()).append(':').append(iter.internalGetIndex());
+ iter.advance();
+ if (iter.valid()) {
+ buf.append(',');
+ }
+ }
+ buf.append(']');
+ return buf.toString();
+ }
+
+ /**
+ * List iterator.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ private class Itr implements DoubleDistanceIntegerDBIDListIter {
+ int offset = 0;
+
+ @Override
+ public boolean valid() {
+ return offset < size;
+ }
+
+ @Override
+ public void advance() {
+ ++offset;
+ }
+
+ @Override
+ public int getOffset() {
+ return offset;
+ }
+
+ @Override
+ public void advance(int count) {
+ offset += count;
+ }
+
+ @Override
+ public void retract() {
+ offset--;
+ }
+
+ @Override
+ public void seek(int off) {
+ offset = off;
+ }
+
+ @Override
+ public int internalGetIndex() {
+ return data[offset].internalGetIndex();
+ }
+
+ @Override
+ public double doubleDistance() {
+ return data[offset].doubleDistance();
+ }
+
+ @Override
+ public DoubleDistanceDBIDPair getDistancePair() {
+ return data[offset];
+ }
+
+ @Override
+ @Deprecated
+ public DoubleDistance getDistance() {
+ return new DoubleDistance(data[offset].doubleDistance());
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/database/ids/integer/DoubleDistanceIntegerDBIDSortedKNNList.java b/src/de/lmu/ifi/dbs/elki/database/ids/integer/DoubleDistanceIntegerDBIDSortedKNNList.java
new file mode 100644
index 00000000..c4c60bc0
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/database/ids/integer/DoubleDistanceIntegerDBIDSortedKNNList.java
@@ -0,0 +1,157 @@
+package de.lmu.ifi.dbs.elki.database.ids.integer;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDPair;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNHeap;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
+
+/**
+ * Track the k nearest neighbors, with insertion sort to ensure the correct
+ * order.
+ *
+ * @author Erich Schubert
+ */
+public class DoubleDistanceIntegerDBIDSortedKNNList extends DoubleDistanceIntegerDBIDKNNList implements DoubleDistanceKNNHeap {
+ /**
+ * Constructor.
+ *
+ * @param k K parameter
+ */
+ public DoubleDistanceIntegerDBIDSortedKNNList(int k) {
+ super(k, k + 11);
+ }
+
+ /**
+ * Add a new element to the heap/list.
+ *
+ * @param dist Distance
+ * @param id Object ID
+ */
+ @Override
+ protected final void addInternal(final double dist, final int id) {
+ if(size >= k && dist > dists[k - 1]) {
+ return;
+ }
+ insertionSort(dist, id);
+ }
+
+ /**
+ * Insertion sort a single object.
+ *
+ * @param dist New distance
+ * @param id New id
+ */
+ private void insertionSort(final double dist, final int id) {
+ // Ensure we have enough space.
+ if(size == dists.length) {
+ grow();
+ }
+ int pos = size;
+ while(pos > 0) {
+ final int pre = pos - 1;
+ final double predist = dists[pre];
+ if(predist <= dist) {
+ break;
+ }
+ dists[pos] = predist;
+ ids[pos] = ids[pre];
+ pos = pre;
+ }
+ dists[pos] = dist;
+ ids[pos] = id;
+ ++size;
+ if(size > k && dists[k] > dists[k - 1]) {
+ size = k; // truncate
+ }
+ return;
+ }
+
+ @Override
+ public double insert(double dist, DBIDRef id) {
+ final int kminus1 = k - 1;
+ final int iid = id.internalGetIndex();
+ if(size >= k && dist > dists[kminus1]) {
+ return (size >= k) ? dists[kminus1] : Double.POSITIVE_INFINITY;
+ }
+ insertionSort(dist, iid);
+ return (size >= k) ? dists[kminus1] : Double.POSITIVE_INFINITY;
+ }
+
+ @Override
+ public void add(double dist, DBIDRef id) {
+ addInternal(dist, id.internalGetIndex());
+ }
+
+ @Override
+ @Deprecated
+ public void insert(Double dist, DBIDRef id) {
+ addInternal(dist.doubleValue(), id.internalGetIndex());
+ }
+
+ @Override
+ public void insert(DoubleDistanceDBIDPair e) {
+ addInternal(e.doubleDistance(), e.internalGetIndex());
+ }
+
+ @Override
+ @Deprecated
+ public void insert(DoubleDistance dist, DBIDRef id) {
+ addInternal(dist.doubleValue(), id.internalGetIndex());
+ }
+
+ @Override
+ public DoubleDistanceIntegerDBIDPair poll() {
+ final int last = size - 1;
+ return new DoubleDistanceIntegerDBIDPair(dists[last], ids[last]);
+ }
+
+ @Override
+ public DoubleDistanceIntegerDBIDPair peek() {
+ final int last = size - 1;
+ return new DoubleDistanceIntegerDBIDPair(dists[last], ids[last]);
+ }
+
+ @Override
+ public DoubleDistanceKNNList toKNNList() {
+ return this;
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder buf = new StringBuilder();
+ buf.append("kNNListHeap[");
+ for(DoubleDistanceDBIDListIter iter = this.iter(); iter.valid();) {
+ buf.append(iter.doubleDistance()).append(':').append(iter.internalGetIndex());
+ iter.advance();
+ if(iter.valid()) {
+ buf.append(',');
+ }
+ }
+ buf.append(']');
+ return buf.toString();
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/database/ids/integer/IntegerArrayDBIDs.java b/src/de/lmu/ifi/dbs/elki/database/ids/integer/IntegerArrayDBIDs.java
index 61a12b3f..286bedf9 100644
--- a/src/de/lmu/ifi/dbs/elki/database/ids/integer/IntegerArrayDBIDs.java
+++ b/src/de/lmu/ifi/dbs/elki/database/ids/integer/IntegerArrayDBIDs.java
@@ -33,4 +33,7 @@ import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
public interface IntegerArrayDBIDs extends IntegerDBIDs, ArrayDBIDs {
@Override
IntegerDBIDArrayIter iter();
+
+ @Override
+ IntegerArrayDBIDs slice(int begin, int end);
}
diff --git a/src/de/lmu/ifi/dbs/elki/database/ids/integer/IntegerDBID.java b/src/de/lmu/ifi/dbs/elki/database/ids/integer/IntegerDBID.java
index 40243695..6b7ec5ac 100644
--- a/src/de/lmu/ifi/dbs/elki/database/ids/integer/IntegerDBID.java
+++ b/src/de/lmu/ifi/dbs/elki/database/ids/integer/IntegerDBID.java
@@ -25,9 +25,11 @@ package de.lmu.ifi.dbs.elki.database.ids.integer;
import java.nio.ByteBuffer;
+import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDVar;
import de.lmu.ifi.dbs.elki.logging.LoggingUtil;
import de.lmu.ifi.dbs.elki.persistent.ByteArrayUtil;
@@ -47,7 +49,6 @@ import de.lmu.ifi.dbs.elki.persistent.FixedSizeByteBufferSerializer;
*
* @author Erich Schubert
*
- * @apiviz.landmark
* @apiviz.composedOf DynamicSerializer
* @apiviz.composedOf StaticSerializer
*/
@@ -88,6 +89,16 @@ final class IntegerDBID implements DBID, IntegerDBIDRef {
}
@Override
+ public int size() {
+ return 1;
+ }
+
+ @Override
+ public boolean isEmpty() {
+ return false;
+ }
+
+ @Override
public String toString() {
return Integer.toString(id);
}
@@ -120,8 +131,8 @@ final class IntegerDBID implements DBID, IntegerDBIDRef {
}
@Override
- public DBIDArrayIter iter() {
- return new DBIDItr();
+ public Itr iter() {
+ return new Itr();
}
@Override
@@ -146,16 +157,20 @@ final class IntegerDBID implements DBID, IntegerDBIDRef {
}
@Override
- public int size() {
- return 1;
- }
-
- @Override
public int binarySearch(DBIDRef key) {
final int other = key.internalGetIndex();
return (other == id) ? 0 : (other < id) ? -1 : -2;
}
+ @Override
+ public ArrayDBIDs slice(int begin, int end) {
+ if (begin == 0 && end == 1) {
+ return this;
+ } else {
+ return DBIDUtil.EMPTYDBIDS;
+ }
+ }
+
/**
* Pseudo iterator for DBIDs interface.
*
@@ -163,7 +178,7 @@ final class IntegerDBID implements DBID, IntegerDBIDRef {
*
* @apiviz.exclude
*/
- protected class DBIDItr implements DBIDArrayIter, IntegerDBIDRef {
+ protected class Itr implements DBIDArrayIter, IntegerDBIDRef {
/**
* Iterator position: We use an integer so we can support retract().
*/
@@ -224,11 +239,6 @@ final class IntegerDBID implements DBID, IntegerDBIDRef {
}
}
- @Override
- public boolean isEmpty() {
- return false;
- }
-
/**
* Dynamic sized serializer, using varint.
*
diff --git a/src/de/lmu/ifi/dbs/elki/database/ids/integer/IntegerDBIDArrayQuickSort.java b/src/de/lmu/ifi/dbs/elki/database/ids/integer/IntegerDBIDArrayQuickSort.java
index 90a97609..d6cadf60 100644
--- a/src/de/lmu/ifi/dbs/elki/database/ids/integer/IntegerDBIDArrayQuickSort.java
+++ b/src/de/lmu/ifi/dbs/elki/database/ids/integer/IntegerDBIDArrayQuickSort.java
@@ -42,7 +42,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
*
* @author Erich Schubert
*
- * @apiviz.uses TroveArrayModifiableDBIDs
+ * @apiviz.uses IntegerArrayDBIDs
*/
@Reference(authors = "Vladimir Yaroslavskiy", title = "Dual-Pivot Quicksort", booktitle = "http://iaroslavski.narod.ru/quicksort/", url = "http://iaroslavski.narod.ru/quicksort/")
class IntegerDBIDArrayQuickSort {
diff --git a/src/de/lmu/ifi/dbs/elki/database/ids/integer/IntegerDBIDIter.java b/src/de/lmu/ifi/dbs/elki/database/ids/integer/IntegerDBIDIter.java
index cc241475..721e6e4e 100644
--- a/src/de/lmu/ifi/dbs/elki/database/ids/integer/IntegerDBIDIter.java
+++ b/src/de/lmu/ifi/dbs/elki/database/ids/integer/IntegerDBIDIter.java
@@ -28,6 +28,8 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
* Iterator for integer DBIDs.
*
* @author Erich Schubert
+ *
+ * @apiviz.landmark
*/
public interface IntegerDBIDIter extends IntegerDBIDRef, DBIDIter {
// Empty
diff --git a/src/de/lmu/ifi/dbs/elki/database/ids/integer/IntegerDBIDPair.java b/src/de/lmu/ifi/dbs/elki/database/ids/integer/IntegerDBIDPair.java
index 1b1ab154..12e9b685 100644
--- a/src/de/lmu/ifi/dbs/elki/database/ids/integer/IntegerDBIDPair.java
+++ b/src/de/lmu/ifi/dbs/elki/database/ids/integer/IntegerDBIDPair.java
@@ -29,8 +29,6 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDPair;
* DBID pair using two ints for storage.
*
* @author Erich Schubert
- *
- * @apiviz.has IntegerDBID
*/
public class IntegerDBIDPair implements DBIDPair {
/**
diff --git a/src/de/lmu/ifi/dbs/elki/database/ids/integer/IntegerDBIDRange.java b/src/de/lmu/ifi/dbs/elki/database/ids/integer/IntegerDBIDRange.java
index 3ceb163d..e418db5f 100644
--- a/src/de/lmu/ifi/dbs/elki/database/ids/integer/IntegerDBIDRange.java
+++ b/src/de/lmu/ifi/dbs/elki/database/ids/integer/IntegerDBIDRange.java
@@ -23,22 +23,21 @@ package de.lmu.ifi.dbs.elki.database.ids.integer;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDFactory;
import de.lmu.ifi.dbs.elki.database.ids.DBIDRange;
import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDVar;
+import de.lmu.ifi.dbs.elki.database.ids.SetDBIDs;
/**
* Representing a DBID range allocation.
*
* @author Erich Schubert
- *
- * @apiviz.has IntegerDBID
*/
-class IntegerDBIDRange implements DBIDRange {
+final class IntegerDBIDRange implements IntegerDBIDs, DBIDRange, SetDBIDs {
/**
* Start value.
*/
@@ -72,8 +71,77 @@ class IntegerDBIDRange implements DBIDRange {
}
@Override
- public DBIDArrayIter iter() {
- return new DBIDItr(start, len);
+ public boolean contains(DBIDRef o) {
+ int oid = o.internalGetIndex();
+ if (oid < start) {
+ return false;
+ }
+ if (oid >= start + len) {
+ return false;
+ }
+ return true;
+ }
+
+ @Override
+ public DBID get(int i) {
+ if (i > len || i < 0) {
+ throw new ArrayIndexOutOfBoundsException();
+ }
+ return DBIDFactory.FACTORY.importInteger(start + i);
+ }
+
+ /**
+ * For storage array offsets.
+ *
+ * @param dbid ID reference
+ * @return array offset
+ */
+ @Override
+ public int getOffset(DBIDRef dbid) {
+ return dbid.internalGetIndex() - start;
+ }
+
+ @Override
+ public void assignVar(int index, DBIDVar var) {
+ if (var instanceof IntegerDBIDVar) {
+ ((IntegerDBIDVar) var).internalSetIndex(start + index);
+ } else {
+ // Much less efficient:
+ var.set(get(index));
+ }
+ }
+
+ @Override
+ public int binarySearch(DBIDRef key) {
+ int keyid = DBIDUtil.asInteger(key);
+ if (keyid < start) {
+ return -1;
+ }
+ final int off = keyid - start;
+ if (off < len) {
+ return off;
+ }
+ return -(len + 1);
+ }
+
+ @Override
+ public String toString() {
+ return "[" + start + " to " + (start + len - 1) + "]";
+ }
+
+ @Override
+ public final int mapDBIDToOffset(DBIDRef dbid) {
+ return dbid.internalGetIndex() - start;
+ }
+
+ @Override
+ public ArrayDBIDs slice(int begin, int end) {
+ return new IntegerDBIDRange(begin + start, end - begin);
+ }
+
+ @Override
+ public Itr iter() {
+ return new Itr(start, len);
}
/**
@@ -83,30 +151,29 @@ class IntegerDBIDRange implements DBIDRange {
*
* @apiviz.exclude
*/
- protected static class DBIDItr implements DBIDArrayIter, IntegerDBIDRef {
+ private final static class Itr implements IntegerDBIDArrayIter {
/**
* Current position.
*/
- int pos = 0;
-
+ private int pos;
+
/**
* Interval length.
*/
- final int len;
+ final private int len;
/**
* Interval start.
*/
- final int start;
-
+ final private int start;
+
/**
* Constructor.
- *
+ *
* @param start Interval start
* @param len Interval length
*/
- DBIDItr(int start, int len) {
- super();
+ public Itr(final int start, final int len) {
this.start = start;
this.len = len;
}
@@ -118,7 +185,7 @@ class IntegerDBIDRange implements DBIDRange {
@Override
public void advance() {
- pos++;
+ ++pos;
}
@Override
@@ -128,7 +195,7 @@ class IntegerDBIDRange implements DBIDRange {
@Override
public void retract() {
- pos--;
+ --pos;
}
@Override
@@ -156,68 +223,4 @@ class IntegerDBIDRange implements DBIDRange {
return Integer.toString(internalGetIndex());
}
}
-
- @Override
- public boolean contains(DBIDRef o) {
- int oid = DBIDUtil.asInteger(o);
- if(oid < start) {
- return false;
- }
- if(oid >= start + len) {
- return false;
- }
- return true;
- }
-
- @Override
- public DBID get(int i) {
- if(i > len || i < 0) {
- throw new ArrayIndexOutOfBoundsException();
- }
- return DBIDFactory.FACTORY.importInteger(start + i);
- }
-
- /**
- * For storage array offsets.
- *
- * @param dbid ID reference
- * @return array offset
- */
- @Override
- public int getOffset(DBIDRef dbid) {
- return dbid.internalGetIndex() - start;
- }
-
- @Override
- public void assignVar(int index, DBIDVar var) {
- if (var instanceof IntegerDBIDVar) {
- ((IntegerDBIDVar)var).internalSetIndex(start + index);
- } else {
- // Much less efficient:
- var.set(get(index));
- }
- }
-
- @Override
- public int binarySearch(DBIDRef key) {
- int keyid = DBIDUtil.asInteger(key);
- if(keyid < start) {
- return -1;
- }
- final int off = keyid - start;
- if(off < len) {
- return off;
- }
- return -(len + 1);
- }
-
- @Override
- public String toString() {
- return "[" + start + " to " + (start + len - 1) + "]";
- }
-
- @Override
- public int mapDBIDToOffset(DBIDRef dbid) {
- return dbid.internalGetIndex() - start;
- }
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/database/ids/integer/IntegerDBIDRef.java b/src/de/lmu/ifi/dbs/elki/database/ids/integer/IntegerDBIDRef.java
index dc63b117..0e1e82a9 100644
--- a/src/de/lmu/ifi/dbs/elki/database/ids/integer/IntegerDBIDRef.java
+++ b/src/de/lmu/ifi/dbs/elki/database/ids/integer/IntegerDBIDRef.java
@@ -29,6 +29,8 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
* DBID reference that references an integer value.
*
* @author Erich Schubert
+ *
+ * @apiviz.landmark
*/
interface IntegerDBIDRef extends DBIDRef {
/**
diff --git a/src/de/lmu/ifi/dbs/elki/database/ids/integer/IntegerDBIDVar.java b/src/de/lmu/ifi/dbs/elki/database/ids/integer/IntegerDBIDVar.java
index 57d400df..9120c0d7 100644
--- a/src/de/lmu/ifi/dbs/elki/database/ids/integer/IntegerDBIDVar.java
+++ b/src/de/lmu/ifi/dbs/elki/database/ids/integer/IntegerDBIDVar.java
@@ -23,8 +23,10 @@ package de.lmu.ifi.dbs.elki.database.ids.integer;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDVar;
import de.lmu.ifi.dbs.elki.logging.LoggingUtil;
@@ -87,13 +89,13 @@ class IntegerDBIDVar implements DBIDVar, IntegerDBIDs {
}
@Override
- public IntegerDBIDArrayIter iter() {
- return new DBIDItr();
+ public int size() {
+ return 1;
}
@Override
- public int size() {
- return 1;
+ public boolean isEmpty() {
+ return false;
}
@Override
@@ -107,6 +109,35 @@ class IntegerDBIDVar implements DBIDVar, IntegerDBIDs {
return id == o.internalGetIndex();
}
+ @Override
+ public void assignVar(int i, DBIDVar var) {
+ if (var instanceof IntegerDBIDVar) {
+ ((IntegerDBIDVar) var).internalSetIndex(i);
+ } else {
+ // Much less efficient:
+ var.set(get(i));
+ }
+ }
+
+ @Override
+ public ArrayDBIDs slice(int begin, int end) {
+ if (begin == 0 && end == 1) {
+ return this;
+ } else {
+ return DBIDUtil.EMPTYDBIDS;
+ }
+ }
+
+ @Override
+ public String toString() {
+ return Integer.toString(id);
+ }
+
+ @Override
+ public Itr iter() {
+ return new Itr();
+ }
+
/**
* Pseudo iterator for DBIDs interface.
*
@@ -114,53 +145,53 @@ class IntegerDBIDVar implements DBIDVar, IntegerDBIDs {
*
* @apiviz.exclude
*/
- protected class DBIDItr implements IntegerDBIDArrayIter, IntegerDBIDRef {
+ protected class Itr implements IntegerDBIDArrayIter, IntegerDBIDRef {
/**
* Iterator position: We use an integer so we can support retract().
*/
int pos = 0;
-
+
@Override
public void advance() {
pos++;
}
-
+
@Override
public void advance(int count) {
pos += count;
}
-
+
@Override
public void retract() {
pos--;
}
-
+
@Override
public void seek(int off) {
pos = off;
}
-
+
@Override
public int getOffset() {
return pos;
}
-
+
@Override
public int internalGetIndex() {
return IntegerDBIDVar.this.id;
}
-
+
@Override
public boolean valid() {
return (pos == 0);
}
-
+
@Override
public int hashCode() {
// Override, because we also are overriding equals.
return super.hashCode();
}
-
+
@Override
public boolean equals(Object other) {
if (other instanceof DBID) {
@@ -168,30 +199,10 @@ class IntegerDBIDVar implements DBIDVar, IntegerDBIDs {
}
return super.equals(other);
}
-
+
@Override
public String toString() {
return Integer.toString(internalGetIndex());
}
}
-
- @Override
- public boolean isEmpty() {
- return false;
- }
-
- @Override
- public void assignVar(int i, DBIDVar var) {
- if (var instanceof IntegerDBIDVar) {
- ((IntegerDBIDVar) var).internalSetIndex(i);
- } else {
- // Much less efficient:
- var.set(get(i));
- }
- }
-
- @Override
- public String toString() {
- return Integer.toString(id);
- }
}
diff --git a/src/de/lmu/ifi/dbs/elki/database/ids/integer/IntegerDBIDs.java b/src/de/lmu/ifi/dbs/elki/database/ids/integer/IntegerDBIDs.java
index 4acf91e6..14faa72a 100644
--- a/src/de/lmu/ifi/dbs/elki/database/ids/integer/IntegerDBIDs.java
+++ b/src/de/lmu/ifi/dbs/elki/database/ids/integer/IntegerDBIDs.java
@@ -28,6 +28,8 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
* Integer DBID collection.
*
* @author Erich Schubert
+ *
+ * @apiviz.has IntegerDBIDIter
*/
public interface IntegerDBIDs extends DBIDs {
@Override
diff --git a/src/de/lmu/ifi/dbs/elki/database/ids/integer/TroveArrayDBIDs.java b/src/de/lmu/ifi/dbs/elki/database/ids/integer/TroveArrayDBIDs.java
deleted file mode 100644
index 6980176a..00000000
--- a/src/de/lmu/ifi/dbs/elki/database/ids/integer/TroveArrayDBIDs.java
+++ /dev/null
@@ -1,192 +0,0 @@
-package de.lmu.ifi.dbs.elki.database.ids.integer;
-
-/*
- This file is part of ELKI:
- Environment for Developing KDD-Applications Supported by Index-Structures
-
- Copyright (C) 2013
- Ludwig-Maximilians-Universität München
- Lehr- und Forschungseinheit für Datenbanksysteme
- ELKI Development Team
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-import gnu.trove.list.TIntList;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDVar;
-import de.lmu.ifi.dbs.elki.logging.LoggingUtil;
-
-/**
- * Abstract base class for GNU Trove array based lists.
- *
- * @author Erich Schubert
- *
- * @apiviz.has IntegerDBID
- * @apiviz.has DBIDItr
- */
-public abstract class TroveArrayDBIDs implements IntegerArrayDBIDs {
- /**
- * Get the array store.
- *
- * @return the store
- */
- protected abstract TIntList getStore();
-
- @Override
- public IntegerDBIDArrayMIter iter() {
- return new DBIDItr(getStore());
- }
-
- @Override
- public DBID get(int index) {
- return new IntegerDBID(getStore().get(index));
- }
-
- @Override
- public void assignVar(int index, DBIDVar var) {
- if (var instanceof IntegerDBIDVar) {
- ((IntegerDBIDVar)var).internalSetIndex(getStore().get(index));
- } else {
- // Much less efficient:
- var.set(get(index));
- }
- }
-
- @Override
- public int size() {
- return getStore().size();
- }
-
- @Override
- public boolean isEmpty() {
- return getStore().isEmpty();
- }
-
- @Override
- public boolean contains(DBIDRef o) {
- return getStore().contains(DBIDUtil.asInteger(o));
- }
-
- @Override
- public int binarySearch(DBIDRef key) {
- return getStore().binarySearch(DBIDUtil.asInteger(key));
- }
-
- @Override
- public String toString() {
- StringBuilder buf = new StringBuilder();
- buf.append('[');
- for(DBIDIter iter = iter(); iter.valid(); iter.advance()) {
- if(buf.length() > 1) {
- buf.append(", ");
- }
- buf.append(((IntegerDBIDRef) iter).internalGetIndex());
- }
- buf.append(']');
- return buf.toString();
- }
-
- /**
- * Iterate over a Trove IntList, ELKI/C-style.
- *
- * @author Erich Schubert
- *
- * @apiviz.exclude
- */
- protected static class DBIDItr implements IntegerDBIDArrayMIter {
- /**
- * Current position.
- */
- int pos = 0;
-
- /**
- * The actual store we use.
- */
- TIntList store;
-
- /**
- * Constructor.
- *
- * @param store The actual trove store
- */
- public DBIDItr(TIntList store) {
- super();
- this.store = store;
- }
-
- @Override
- public boolean valid() {
- return pos < store.size() && pos >= 0;
- }
-
- @Override
- public void advance() {
- pos++;
- }
-
- @Override
- public void advance(int count) {
- pos += count;
- }
-
- @Override
- public void retract() {
- pos--;
- }
-
- @Override
- public void seek(int off) {
- pos = off;
- }
-
- @Override
- public int getOffset() {
- return pos;
- }
-
- @Override
- public int internalGetIndex() {
- return store.get(pos);
- }
-
- @Override
- public void remove() {
- store.removeAt(pos);
- pos--;
- }
-
- @Override
- public int hashCode() {
- // Since we add a warning to 'equals', we also override hashCode.
- return super.hashCode();
- }
-
- @Override
- public boolean equals(Object other) {
- if(other instanceof DBID) {
- LoggingUtil.warning("Programming error detected: DBIDItr.equals(DBID). Use DBIDUtil.equal(iter, id)!", new Throwable());
- }
- return super.equals(other);
- }
-
- @Override
- public String toString() {
- return Integer.toString(internalGetIndex());
- }
- }
-} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/database/ids/integer/TroveArrayModifiableDBIDs.java b/src/de/lmu/ifi/dbs/elki/database/ids/integer/TroveArrayModifiableDBIDs.java
deleted file mode 100644
index 41191b10..00000000
--- a/src/de/lmu/ifi/dbs/elki/database/ids/integer/TroveArrayModifiableDBIDs.java
+++ /dev/null
@@ -1,155 +0,0 @@
-package de.lmu.ifi.dbs.elki.database.ids.integer;
-
-/*
- This file is part of ELKI:
- Environment for Developing KDD-Applications Supported by Index-Structures
-
- Copyright (C) 2013
- Ludwig-Maximilians-Universität München
- Lehr- und Forschungseinheit für Datenbanksysteme
- ELKI Development Team
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-import gnu.trove.list.array.TIntArrayList;
-
-import java.util.Comparator;
-
-import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-
-/**
- * Class using a GNU Trove int array list as storage.
- *
- * @author Erich Schubert
- */
-class TroveArrayModifiableDBIDs extends TroveArrayDBIDs implements ArrayModifiableDBIDs {
- /**
- * The actual trove array list
- */
- private TIntArrayList store;
-
- /**
- * Constructor.
- *
- * @param size Initial size
- */
- protected TroveArrayModifiableDBIDs(int size) {
- super();
- this.store = new TIntArrayList(size);
- }
-
- /**
- * Constructor.
- */
- protected TroveArrayModifiableDBIDs() {
- super();
- this.store = new TIntArrayList();
- }
-
- /**
- * Constructor.
- *
- * @param existing Existing ids
- */
- protected TroveArrayModifiableDBIDs(DBIDs existing) {
- this(existing.size());
- this.addDBIDs(existing);
- }
-
- @Override
- protected TIntArrayList getStore() {
- return store;
- }
-
- @Override
- public boolean addDBIDs(DBIDs ids) {
- boolean success = false;
- store.ensureCapacity(ids.size());
- for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
- success |= store.add(DBIDUtil.asInteger(iter));
- }
- return success;
- }
-
- @Override
- public boolean removeDBIDs(DBIDs ids) {
- boolean success = false;
- for (DBIDIter id = ids.iter(); id.valid(); id.advance()) {
- success |= store.remove(DBIDUtil.asInteger(id));
- }
- return success;
- }
-
- @Override
- public boolean add(DBIDRef e) {
- return store.add(DBIDUtil.asInteger(e));
- }
-
- @Override
- public boolean remove(DBIDRef o) {
- return store.remove(DBIDUtil.asInteger(o));
- }
-
- @Override
- public DBID set(int index, DBIDRef element) {
- int prev = store.set(index, DBIDUtil.asInteger(element));
- return new IntegerDBID(prev);
- }
-
- @Override
- public DBID remove(int index) {
- return new IntegerDBID(store.removeAt(index));
- }
-
- @Override
- public void clear() {
- store.clear();
- }
-
- @Override
- public void sort() {
- store.sort();
- }
-
- @Override
- public void sort(Comparator<? super DBIDRef> comparator) {
- // TODO: we no longer produce a lot of DBIDs anymore, but it would be even
- // cooler if we could access store._data directly...
- int[] data = store.toArray();
- IntegerDBIDArrayQuickSort.sort(data, comparator);
- store.clear();
- store.add(data);
- }
-
- @Override
- public void sort(int start, int end, Comparator<? super DBIDRef> comparator) {
- // TODO: we no longer produce a lot of DBIDs anymore, but it would be even
- // cooler if we could access store._data directly...
- int[] data = store.toArray();
- IntegerDBIDArrayQuickSort.sort(data, start, end, comparator);
- store.clear();
- store.add(data);
- }
-
- @Override
- public void swap(int a, int b) {
- store.set(a, store.set(b, store.get(a)));
- }
-}
diff --git a/src/de/lmu/ifi/dbs/elki/database/ids/integer/TroveHashSetModifiableDBIDs.java b/src/de/lmu/ifi/dbs/elki/database/ids/integer/TroveHashSetModifiableDBIDs.java
index 9ebdccea..35276606 100644
--- a/src/de/lmu/ifi/dbs/elki/database/ids/integer/TroveHashSetModifiableDBIDs.java
+++ b/src/de/lmu/ifi/dbs/elki/database/ids/integer/TroveHashSetModifiableDBIDs.java
@@ -1,16 +1,5 @@
package de.lmu.ifi.dbs.elki.database.ids.integer;
-import gnu.trove.impl.hash.THashPrimitiveIterator;
-import gnu.trove.impl.hash.TIntHash;
-import gnu.trove.set.hash.TIntHashSet;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDMIter;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.utilities.datastructures.iterator.Iter;
-
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
@@ -34,13 +23,23 @@ import de.lmu.ifi.dbs.elki.utilities.datastructures.iterator.Iter;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+import gnu.trove.impl.hash.THashPrimitiveIterator;
+import gnu.trove.impl.hash.TIntHash;
+import gnu.trove.set.hash.TIntHashSet;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDMIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.iterator.Iter;
+
/**
* Implementation using GNU Trove Int Hash Sets.
*
* @author Erich Schubert
*
- * @apiviz.has IntegerDBID
- * @apiviz.has DBIDItr
+ * @apiviz.has Itr
*/
class TroveHashSetModifiableDBIDs implements HashSetModifiableDBIDs, IntegerDBIDs {
/**
@@ -77,15 +76,15 @@ class TroveHashSetModifiableDBIDs implements HashSetModifiableDBIDs, IntegerDBID
}
@Override
- public IntegerDBIDMIter iter() {
- return new DBIDItr(store);
+ public Itr iter() {
+ return new Itr(store);
}
@Override
public boolean addDBIDs(DBIDs ids) {
store.ensureCapacity(ids.size());
boolean success = false;
- for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
success |= store.add(DBIDUtil.asInteger(iter));
}
return success;
@@ -94,7 +93,7 @@ class TroveHashSetModifiableDBIDs implements HashSetModifiableDBIDs, IntegerDBID
@Override
public boolean removeDBIDs(DBIDs ids) {
boolean success = false;
- for (DBIDIter id = ids.iter(); id.valid(); id.advance()) {
+ for(DBIDIter id = ids.iter(); id.valid(); id.advance()) {
success |= store.remove(DBIDUtil.asInteger(id));
}
return success;
@@ -113,8 +112,8 @@ class TroveHashSetModifiableDBIDs implements HashSetModifiableDBIDs, IntegerDBID
@Override
public boolean retainAll(DBIDs set) {
boolean modified = false;
- for (DBIDMIter it = iter(); it.valid(); it.advance()) {
- if (!set.contains(it)) {
+ for(DBIDMIter it = iter(); it.valid(); it.advance()) {
+ if(!set.contains(it)) {
it.remove();
modified = true;
}
@@ -146,8 +145,8 @@ class TroveHashSetModifiableDBIDs implements HashSetModifiableDBIDs, IntegerDBID
public String toString() {
StringBuilder buf = new StringBuilder();
buf.append('[');
- for (DBIDIter iter = iter(); iter.valid(); iter.advance()) {
- if (buf.length() > 1) {
+ for(DBIDIter iter = iter(); iter.valid(); iter.advance()) {
+ if(buf.length() > 1) {
buf.append(", ");
}
buf.append(iter.toString());
@@ -163,7 +162,7 @@ class TroveHashSetModifiableDBIDs implements HashSetModifiableDBIDs, IntegerDBID
*
* @apiviz.exclude
*/
- protected static class DBIDItr implements IntegerDBIDMIter {
+ protected static class Itr implements IntegerDBIDMIter {
/**
* The actual iterator. We don't have multi inheritance.
*/
@@ -174,7 +173,7 @@ class TroveHashSetModifiableDBIDs implements HashSetModifiableDBIDs, IntegerDBID
*
* @param hash Trove hash
*/
- public DBIDItr(TIntHash hash) {
+ public Itr(TIntHash hash) {
super();
this.it = new TIntHashItr(hash);
}
diff --git a/src/de/lmu/ifi/dbs/elki/database/ids/integer/UnmodifiableIntegerArrayDBIDs.java b/src/de/lmu/ifi/dbs/elki/database/ids/integer/UnmodifiableIntegerArrayDBIDs.java
index ba30f54f..d1c37ab9 100644
--- a/src/de/lmu/ifi/dbs/elki/database/ids/integer/UnmodifiableIntegerArrayDBIDs.java
+++ b/src/de/lmu/ifi/dbs/elki/database/ids/integer/UnmodifiableIntegerArrayDBIDs.java
@@ -33,7 +33,7 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDVar;
*
* @author Erich Schubert
*
- * @apiviz.uses TroveArrayDBIDs
+ * @apiviz.uses IntegerArrayDBIDs
* @apiviz.has UnmodifiableDBIDIter
*/
public class UnmodifiableIntegerArrayDBIDs implements IntegerArrayStaticDBIDs {
@@ -96,6 +96,11 @@ public class UnmodifiableIntegerArrayDBIDs implements IntegerArrayStaticDBIDs {
return inner.binarySearch(key);
}
+ @Override
+ public IntegerArrayDBIDs slice(int begin, int end) {
+ return new UnmodifiableIntegerArrayDBIDs(inner.slice(begin, end));
+ }
+
/**
* Make an existing DBIDMIter unmodifiable.
*
@@ -151,7 +156,7 @@ public class UnmodifiableIntegerArrayDBIDs implements IntegerArrayStaticDBIDs {
public int internalGetIndex() {
return it.internalGetIndex();
}
-
+
@Override
public String toString() {
return it.toString();
diff --git a/src/de/lmu/ifi/dbs/elki/database/query/distance/PrimitiveDistanceSimilarityQuery.java b/src/de/lmu/ifi/dbs/elki/database/query/distance/PrimitiveDistanceSimilarityQuery.java
index 5ca62a73..52f79f06 100644
--- a/src/de/lmu/ifi/dbs/elki/database/query/distance/PrimitiveDistanceSimilarityQuery.java
+++ b/src/de/lmu/ifi/dbs/elki/database/query/distance/PrimitiveDistanceSimilarityQuery.java
@@ -35,7 +35,7 @@ import de.lmu.ifi.dbs.elki.distance.similarityfunction.PrimitiveSimilarityFuncti
*
* @author Erich Schubert
*
- * @apiviz.uses de.lmu.ifi.dbs.elki.distance.similarityfunction.PrimitiveSimilarityFunction
+ * @apiviz.uses PrimitiveSimilarityFunction
*
* @param <O> Object type
* @param <D> Distance type
@@ -83,4 +83,9 @@ public class PrimitiveDistanceSimilarityQuery<O, D extends Distance<D>> extends
public D similarity(O o1, O o2) {
return this.similarityFunction.similarity(o1, o2);
}
-} \ No newline at end of file
+
+ @Override
+ public PrimitiveSimilarityFunction<? super O, D> getSimilarityFunction() {
+ return similarityFunction;
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/database/query/knn/DoubleOptimizedDistanceKNNQuery.java b/src/de/lmu/ifi/dbs/elki/database/query/knn/DoubleOptimizedDistanceKNNQuery.java
new file mode 100644
index 00000000..4391b745
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/database/query/knn/DoubleOptimizedDistanceKNNQuery.java
@@ -0,0 +1,91 @@
+package de.lmu.ifi.dbs.elki.database.query.knn;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.database.ids.DBIDFactory;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNHeap;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList;
+import de.lmu.ifi.dbs.elki.database.query.LinearScanQuery;
+import de.lmu.ifi.dbs.elki.database.query.distance.PrimitiveDistanceQuery;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
+
+/**
+ * Optimized linear scan query for {@link PrimitiveDoubleDistanceFunction}s.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.uses PrimitiveDoubleDistanceFunction
+ *
+ * @param <O> Object type
+ */
+public class DoubleOptimizedDistanceKNNQuery<O> extends AbstractDistanceKNNQuery<O, DoubleDistance> implements LinearScanQuery {
+ /**
+ * Raw distance function.
+ */
+ PrimitiveDoubleDistanceFunction<O> rawdist;
+
+ /**
+ * Constructor.newDoubleDistanceHeap
+ *
+ * @param distanceQuery Distance function to use
+ */
+ @SuppressWarnings("unchecked")
+ public DoubleOptimizedDistanceKNNQuery(PrimitiveDistanceQuery<O, DoubleDistance> distanceQuery) {
+ super(distanceQuery);
+ if(!(distanceQuery.getDistanceFunction() instanceof PrimitiveDoubleDistanceFunction)) {
+ throw new UnsupportedOperationException("DoubleOptimizedKNNQuery instantiated for non-PrimitiveDoubleDistanceFunction!");
+ }
+ rawdist = (PrimitiveDoubleDistanceFunction<O>) distanceQuery.getDistanceFunction();
+ }
+
+ @Override
+ public DoubleDistanceKNNList getKNNForDBID(DBIDRef id, int k) {
+ final Relation<? extends O> relation = this.relation;
+ DoubleDistanceKNNHeap heap = DBIDFactory.FACTORY.newDoubleDistanceHeap(k);
+ linearScan(relation, relation.iterDBIDs(), rawdist, relation.get(id), heap);
+ return heap.toKNNList();
+ }
+
+ @Override
+ public DoubleDistanceKNNList getKNNForObject(O obj, int k) {
+ DoubleDistanceKNNHeap heap = DBIDFactory.FACTORY.newDoubleDistanceHeap(k);
+ linearScan(relation, relation.iterDBIDs(), rawdist, obj, heap);
+ return heap.toKNNList();
+ }
+
+ private static <O> void linearScan(Relation<? extends O> relation, DBIDIter iter, PrimitiveDoubleDistanceFunction<? super O> rawdist, final O obj, DoubleDistanceKNNHeap heap) {
+ double kdist = Double.POSITIVE_INFINITY;
+ while(iter.valid()) {
+ final double dist = rawdist.doubleDistance(obj, relation.get(iter));
+ if(dist <= kdist) {
+ kdist = heap.insert(dist, iter);
+ }
+ iter.advance();
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/database/query/knn/DoubleOptimizedKNNQuery.java b/src/de/lmu/ifi/dbs/elki/database/query/knn/DoubleOptimizedKNNQuery.java
deleted file mode 100644
index b18db8f0..00000000
--- a/src/de/lmu/ifi/dbs/elki/database/query/knn/DoubleOptimizedKNNQuery.java
+++ /dev/null
@@ -1,309 +0,0 @@
-package de.lmu.ifi.dbs.elki.database.query.knn;
-
-/*
- This file is part of ELKI:
- Environment for Developing KDD-Applications Supported by Index-Structures
-
- Copyright (C) 2013
- Ludwig-Maximilians-Universität München
- Lehr- und Forschungseinheit für Datenbanksysteme
- ELKI Development Team
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-import java.util.ArrayList;
-
-import de.lmu.ifi.dbs.elki.database.ids.DBIDFactory;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDPair;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDPair;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNHeap;
-import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
-import de.lmu.ifi.dbs.elki.database.ids.generic.DistanceDBIDPairKNNList;
-import de.lmu.ifi.dbs.elki.database.ids.generic.DoubleDistanceDBIDPairKNNHeap;
-import de.lmu.ifi.dbs.elki.database.ids.generic.DoubleDistanceDBIDPairKNNList;
-import de.lmu.ifi.dbs.elki.database.ids.integer.DoubleDistanceIntegerDBIDKNNList;
-import de.lmu.ifi.dbs.elki.database.ids.integer.DoubleDistanceIntegerDBIDKNNListHeap;
-import de.lmu.ifi.dbs.elki.database.query.distance.PrimitiveDistanceQuery;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultUtil;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
-import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap;
-import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TiedTopBoundedHeap;
-
-/**
- * Optimized linear scan query for {@link PrimitiveDoubleDistanceFunction}s.
- *
- * @author Erich Schubert
- *
- * @apiviz.uses PrimitiveDoubleDistanceFunction
- *
- * @param <O> Object type
- */
-public class DoubleOptimizedKNNQuery<O> extends LinearScanKNNQuery<O, DoubleDistance> {
- /**
- * Raw distance function.
- */
- PrimitiveDoubleDistanceFunction<O> rawdist;
-
- /**
- * Constructor.
- *
- * @param distanceQuery Distance function to use
- */
- @SuppressWarnings("unchecked")
- public DoubleOptimizedKNNQuery(PrimitiveDistanceQuery<O, DoubleDistance> distanceQuery) {
- super(distanceQuery);
- if(!(distanceQuery.getDistanceFunction() instanceof PrimitiveDoubleDistanceFunction)) {
- throw new UnsupportedOperationException("DoubleOptimizedKNNQuery instantiated for non-PrimitiveDoubleDistanceFunction!");
- }
- rawdist = (PrimitiveDoubleDistanceFunction<O>) distanceQuery.getDistanceFunction();
- }
-
- @Override
- public KNNList<DoubleDistance> getKNNForDBID(DBIDRef id, int k) {
- return getKNNForObject(relation.get(id), k);
- }
-
- @Override
- public KNNList<DoubleDistance> getKNNForObject(O obj, int k) {
- return getKNNForObjectBenchmarked(obj, k);
- }
-
- /**
- * This is the straightforward implementation using the optimized heap.
- *
- * @param obj Query object
- * @param k Desired number of neighbors
- * @return kNN result
- */
- KNNList<DoubleDistance> getKNNForObjectKNNHeap(O obj, int k) {
- // Optimization for double distances.
- final DoubleDistanceKNNHeap heap = (DoubleDistanceKNNHeap) DBIDUtil.newHeap(DoubleDistance.FACTORY, k);
- for(DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
- heap.add(rawdist.doubleDistance(obj, relation.get(iter)), iter);
- }
- return heap.toKNNList();
- }
-
- /**
- * This is the cleaner, supposedly faster implementation.
- *
- * @param obj Query object
- * @param k Desired number of neighbors
- * @return kNN result
- */
- KNNList<DoubleDistance> getKNNForObjectClean(O obj, int k) {
- // Optimization for double distances.
- final TiedTopBoundedHeap<DoubleDistanceDBIDPair> heap = new TiedTopBoundedHeap<>(k, DoubleDistanceDBIDPairKNNHeap.COMPARATOR);
- final DBIDIter iter = relation.iterDBIDs();
-
- // First k elements don't need checking.
- double max = 0.;
- for(int i = 0; i < k && iter.valid(); i++, iter.advance()) {
- final double doubleDistance = rawdist.doubleDistance(obj, relation.get(iter));
- heap.add(DBIDFactory.FACTORY.newDistancePair(doubleDistance, iter));
- max = Math.max(max, doubleDistance);
- }
- // Remaining elements
- for(; iter.valid(); iter.advance()) {
- final double doubleDistance = rawdist.doubleDistance(obj, relation.get(iter));
- if(doubleDistance <= max) {
- heap.add(DBIDFactory.FACTORY.newDistancePair(doubleDistance, iter));
- }
- if(doubleDistance < max) {
- max = heap.peek().doubleDistance();
- }
- }
- return new DoubleDistanceDBIDPairKNNList(heap, k);
- }
-
- /**
- * It does not make sense, but this version is faster in our larger
- * benchmarks. Apparently, some JIT optimization kicks in better.
- *
- * @param obj Query object
- * @param k Desired number of neighbors
- * @return kNN result
- */
- KNNList<DoubleDistance> getKNNForObjectBenchmarked(O obj, int k) {
- // THIS SHOULD BE SLOWER THAN THE VERSION ABOVE, BUT ISN'T!
- final TiedTopBoundedHeap<DistanceDBIDPair<DoubleDistance>> heap = new TiedTopBoundedHeap<>(k, DistanceDBIDResultUtil.BY_REVERSE_DISTANCE);
- final DBIDIter iter = relation.iterDBIDs();
- // First k elements don't need checking.
- double max = 0.;
- for(int i = 0; i < k && iter.valid(); i++, iter.advance()) {
- final double doubleDistance = rawdist.doubleDistance(obj, relation.get(iter));
- heap.add(DBIDFactory.FACTORY.newDistancePair(new DoubleDistance(doubleDistance), iter));
- max = Math.max(max, doubleDistance);
- }
- // Remaining elements
- for(; iter.valid(); iter.advance()) {
- final double doubleDistance = rawdist.doubleDistance(obj, relation.get(iter));
- if(doubleDistance <= max) {
- heap.add(DBIDFactory.FACTORY.newDistancePair(new DoubleDistance(doubleDistance), iter));
- }
- if(doubleDistance < max) {
- max = heap.peek().getDistance().doubleValue();
- }
- }
- return new DistanceDBIDPairKNNList<>(heap, k);
- }
-
- /**
- * Another attempt at getting a faster knn heap.
- *
- * @param obj Query object
- * @param k Desired number of neighbors
- * @return kNN result
- */
- KNNList<DoubleDistance> getKNNForObjectBenchmarked2(O obj, int k) {
- final Heap<DoubleDistanceDBIDPair> heap = new Heap<>(k, DistanceDBIDResultUtil.BY_REVERSE_DISTANCE);
- final ArrayList<DoubleDistanceDBIDPair> ties = new ArrayList<>();
- final DBIDIter iter = relation.iterDBIDs();
- // First k elements don't need checking.
- double max = 0.;
- for(int i = 0; i < k && iter.valid(); i++, iter.advance()) {
- final double doubleDistance = rawdist.distance(obj, relation.get(iter)).doubleValue();
- heap.add(DBIDFactory.FACTORY.newDistancePair(doubleDistance, iter));
- max = Math.max(max, doubleDistance);
- }
- // Remaining elements
- for(; iter.valid(); iter.advance()) {
- final double doubleDistance = rawdist.distance(obj, relation.get(iter)).doubleValue();
- if(doubleDistance <= max) {
- if(doubleDistance < max) {
- DoubleDistanceDBIDPair prev = heap.replaceTopElement(DBIDFactory.FACTORY.newDistancePair(doubleDistance, iter));
- double newkdist = heap.peek().doubleDistance();
- if(newkdist < max) {
- max = newkdist;
- ties.clear();
- }
- else {
- ties.add(prev);
- }
- }
- else {
- ties.add(DBIDFactory.FACTORY.newDistancePair(doubleDistance, iter));
- }
- }
- }
-
- DoubleDistanceIntegerDBIDKNNList ret = new DoubleDistanceIntegerDBIDKNNList(k, k + ties.size());
- for(DoubleDistanceDBIDPair pair : ties) {
- ret.add(pair);
- }
- while(!heap.isEmpty()) {
- ret.add(heap.poll());
- }
- ret.sort(); // Actually, reverse.
- return ret;
- }
-
- /**
- * Next attempt at exploiting the JIT fully.
- *
- * @param obj Query object
- * @param k Desired number of neighbors
- * @return kNN result
- */
- KNNList<DoubleDistance> getKNNForObjectBenchmarked3(O obj, int k) {
- DoubleDistanceKNNHeap heap = new DoubleDistanceIntegerDBIDKNNListHeap(k);
- final DBIDIter iter = relation.iterDBIDs();
- // First k elements don't need checking.
- double max = 0.;
- for(int i = 0; i < k && iter.valid(); i++, iter.advance()) {
- final double doubleDistance = rawdist.doubleDistance(obj, relation.get(iter));
- heap.add(doubleDistance, iter);
- max = Math.max(max, doubleDistance);
- }
- // Remaining elements
- for(; iter.valid(); iter.advance()) {
- final double doubleDistance = rawdist.doubleDistance(obj, relation.get(iter));
- if(doubleDistance <= max) {
- heap.add(doubleDistance, iter);
- }
- if(doubleDistance < max) {
- max = heap.peek().doubleDistance();
- }
- }
- return heap.toKNNList();
- }
-
- /**
- * Next attempt at exploiting the JIT fully.
- *
- * @param obj Query object
- * @param k Desired number of neighbors
- * @return kNN result
- */
- KNNList<DoubleDistance> getKNNForObjectBenchmarked4(O obj, int k) {
- DoubleDistanceKNNHeap heap = new DoubleDistanceIntegerDBIDKNNListHeap(k);
- final DBIDIter iter = relation.iterDBIDs();
- // First k elements don't need checking.
- for(int i = 0; i < k && iter.valid(); i++, iter.advance()) {
- final double doubleDistance = rawdist.doubleDistance(obj, relation.get(iter));
- heap.add(doubleDistance, iter);
- }
- double max = heap.doubleKNNDistance();
- // Remaining elements
- for(; iter.valid(); iter.advance()) {
- final double doubleDistance = rawdist.doubleDistance(obj, relation.get(iter));
- if(doubleDistance <= max) {
- heap.add(doubleDistance, iter);
- max = heap.doubleKNNDistance();
- }
- }
- return heap.toKNNList();
- }
-
- /**
- * Next attempt at exploiting the JIT fully.
- *
- * @param obj Query object
- * @param k Desired number of neighbors
- * @return kNN result
- */
- KNNList<DoubleDistance> getKNNForObjectBenchmarked5(O obj, int k) {
- DoubleDistanceKNNHeap heap = new DoubleDistanceIntegerDBIDKNNListHeap(k);
- double max = Double.POSITIVE_INFINITY;
- for(DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
- final double distance = rawdist.doubleDistance(obj, relation.get(iter));
- if(distance <= max) {
- heap.add(distance, iter);
- max = heap.doubleKNNDistance();
- }
- }
- return heap.toKNNList();
- }
-
- /**
- * Next attempt at exploiting the JIT fully.
- *
- * @param obj Query object
- * @param k Desired number of neighbors
- * @return kNN result
- */
- KNNList<DoubleDistance> getKNNForObjectBenchmarked6(O obj, int k) {
- DoubleDistanceKNNHeap heap = new DoubleDistanceIntegerDBIDKNNListHeap(k);
- for(DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
- final double distance = rawdist.doubleDistance(obj, relation.get(iter));
- heap.add(distance, iter);
- }
- return heap.toKNNList();
- }
-}
diff --git a/src/de/lmu/ifi/dbs/elki/database/query/knn/LinearScanKNNQuery.java b/src/de/lmu/ifi/dbs/elki/database/query/knn/LinearScanDistanceKNNQuery.java
index 485330ea..395db289 100644
--- a/src/de/lmu/ifi/dbs/elki/database/query/knn/LinearScanKNNQuery.java
+++ b/src/de/lmu/ifi/dbs/elki/database/query/knn/LinearScanDistanceKNNQuery.java
@@ -45,13 +45,13 @@ import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
* @apiviz.landmark
* @apiviz.has DistanceQuery
*/
-public class LinearScanKNNQuery<O, D extends Distance<D>> extends AbstractDistanceKNNQuery<O, D> implements LinearScanQuery {
+public class LinearScanDistanceKNNQuery<O, D extends Distance<D>> extends AbstractDistanceKNNQuery<O, D> implements LinearScanQuery {
/**
* Constructor.
*
* @param distanceQuery Distance function to use
*/
- public LinearScanKNNQuery(DistanceQuery<O, D> distanceQuery) {
+ public LinearScanDistanceKNNQuery(DistanceQuery<O, D> distanceQuery) {
super(distanceQuery);
}
@@ -63,11 +63,11 @@ public class LinearScanKNNQuery<O, D extends Distance<D>> extends AbstractDistan
*/
private void linearScanBatchKNN(ArrayDBIDs ids, List<KNNHeap<D>> heaps) {
// The distance is computed on database IDs
- for (DBIDIter iter = relation.getDBIDs().iter(); iter.valid(); iter.advance()) {
+ for(DBIDIter iter = relation.getDBIDs().iter(); iter.valid(); iter.advance()) {
int index = 0;
- for (DBIDIter iter2 = ids.iter(); iter2.valid(); iter2.advance()) {
+ for(DBIDIter iter2 = ids.iter(); iter2.valid(); iter2.advance()) {
KNNHeap<D> heap = heaps.get(index);
- heap.add(distanceQuery.distance(iter2, iter), iter);
+ heap.insert(distanceQuery.distance(iter2, iter), iter);
index++;
}
}
@@ -75,27 +75,18 @@ public class LinearScanKNNQuery<O, D extends Distance<D>> extends AbstractDistan
@Override
public KNNList<D> getKNNForDBID(DBIDRef id, int k) {
+ if(PrimitiveDistanceQuery.class.isInstance(distanceQuery)) {
+ // This should have yielded a LinearScanPrimitiveDistanceKNNQuery class!
+ return getKNNForObject(relation.get(id), k);
+ }
KNNHeap<D> heap = DBIDUtil.newHeap(distanceQuery.getDistanceFactory(), k);
D max = distanceQuery.getDistanceFactory().infiniteDistance();
- if (PrimitiveDistanceQuery.class.isInstance(distanceQuery)) {
- O obj = relation.get(id);
- for (DBIDIter iter = relation.getDBIDs().iter(); iter.valid(); iter.advance()) {
- final D dist = distanceQuery.distance(obj, relation.get(iter));
- if (max.compareTo(dist) > 0) {
- heap.add(dist, iter);
- if (heap.size() >= k) {
- max = heap.getKNNDistance();
- }
- }
- }
- } else {
- for (DBIDIter iter = relation.getDBIDs().iter(); iter.valid(); iter.advance()) {
- final D dist = distanceQuery.distance(id, iter);
- if (max.compareTo(dist) > 0) {
- heap.add(dist, iter);
- if (heap.size() >= k) {
- max = heap.getKNNDistance();
- }
+ for(DBIDIter iter = relation.getDBIDs().iter(); iter.valid(); iter.advance()) {
+ final D dist = distanceQuery.distance(id, iter);
+ if(max.compareTo(dist) > 0) {
+ heap.insert(dist, iter);
+ if(heap.size() >= k) {
+ max = heap.getKNNDistance();
}
}
}
@@ -106,13 +97,13 @@ public class LinearScanKNNQuery<O, D extends Distance<D>> extends AbstractDistan
public List<KNNList<D>> getKNNForBulkDBIDs(ArrayDBIDs ids, int k) {
final int size = ids.size();
final List<KNNHeap<D>> heaps = new ArrayList<>(size);
- for (int i = 0; i < size; i++) {
+ for(int i = 0; i < size; i++) {
heaps.add(DBIDUtil.newHeap(distanceQuery.getDistanceFactory(), k));
}
linearScanBatchKNN(ids, heaps);
// Serialize heaps
List<KNNList<D>> result = new ArrayList<>(size);
- for (KNNHeap<D> heap : heaps) {
+ for(KNNHeap<D> heap : heaps) {
result.add(heap.toKNNList());
}
return result;
@@ -121,8 +112,8 @@ public class LinearScanKNNQuery<O, D extends Distance<D>> extends AbstractDistan
@Override
public KNNList<D> getKNNForObject(O obj, int k) {
KNNHeap<D> heap = DBIDUtil.newHeap(distanceQuery.getDistanceFactory(), k);
- for (DBIDIter iter = relation.getDBIDs().iter(); iter.valid(); iter.advance()) {
- heap.add(distanceQuery.distance(obj, iter), iter);
+ for(DBIDIter iter = relation.getDBIDs().iter(); iter.valid(); iter.advance()) {
+ heap.insert(distanceQuery.distance(obj, iter), iter);
}
return heap.toKNNList();
}
diff --git a/src/de/lmu/ifi/dbs/elki/database/query/knn/LinearScanPrimitiveDistanceKNNQuery.java b/src/de/lmu/ifi/dbs/elki/database/query/knn/LinearScanPrimitiveDistanceKNNQuery.java
index 3f3fbc3f..59a6d6e3 100644
--- a/src/de/lmu/ifi/dbs/elki/database/query/knn/LinearScanPrimitiveDistanceKNNQuery.java
+++ b/src/de/lmu/ifi/dbs/elki/database/query/knn/LinearScanPrimitiveDistanceKNNQuery.java
@@ -32,6 +32,7 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.distance.KNNHeap;
import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
+import de.lmu.ifi.dbs.elki.database.query.LinearScanQuery;
import de.lmu.ifi.dbs.elki.database.query.distance.PrimitiveDistanceQuery;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
@@ -45,7 +46,7 @@ import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
*
* @apiviz.uses PrimitiveDistanceQuery
*/
-public class LinearScanPrimitiveDistanceKNNQuery<O, D extends Distance<D>> extends LinearScanKNNQuery<O, D> {
+public class LinearScanPrimitiveDistanceKNNQuery<O, D extends Distance<D>> extends AbstractDistanceKNNQuery<O, D> implements LinearScanQuery {
/**
* Constructor.
*
@@ -68,14 +69,28 @@ public class LinearScanPrimitiveDistanceKNNQuery<O, D extends Distance<D>> exten
O candidate = relation.get(iter);
for(int index = 0; index < size; index++) {
O object = objs.get(index);
- heaps.get(index).add(distanceQuery.distance(object, candidate), iter);
+ heaps.get(index).insert(distanceQuery.distance(object, candidate), iter);
}
}
}
@Override
public KNNList<D> getKNNForDBID(DBIDRef id, int k) {
- return getKNNForObject(relation.get(id), k);
+ final O obj = relation.get(id);
+ KNNHeap<D> heap = DBIDUtil.newHeap(distanceQuery.getDistanceFactory(), k);
+ for(DBIDIter iter = relation.getDBIDs().iter(); iter.valid(); iter.advance()) {
+ heap.insert(distanceQuery.distance(obj, iter), iter);
+ }
+ return heap.toKNNList();
+ }
+
+ @Override
+ public KNNList<D> getKNNForObject(O obj, int k) {
+ KNNHeap<D> heap = DBIDUtil.newHeap(distanceQuery.getDistanceFactory(), k);
+ for(DBIDIter iter = relation.getDBIDs().iter(); iter.valid(); iter.advance()) {
+ heap.insert(distanceQuery.distance(obj, iter), iter);
+ }
+ return heap.toKNNList();
}
@Override
diff --git a/src/de/lmu/ifi/dbs/elki/database/query/range/AbstractDistanceRangeQuery.java b/src/de/lmu/ifi/dbs/elki/database/query/range/AbstractDistanceRangeQuery.java
index c348653b..531fa09d 100644
--- a/src/de/lmu/ifi/dbs/elki/database/query/range/AbstractDistanceRangeQuery.java
+++ b/src/de/lmu/ifi/dbs/elki/database/query/range/AbstractDistanceRangeQuery.java
@@ -61,4 +61,9 @@ public abstract class AbstractDistanceRangeQuery<O, D extends Distance<D>> exten
@Override
abstract public DistanceDBIDList<D> getRangeForObject(O obj, D range);
+
+ @Override
+ public D getDistanceFactory() {
+ return distanceQuery.getDistanceFactory();
+ }
}
diff --git a/src/de/lmu/ifi/dbs/elki/database/query/range/DoubleOptimizedRangeQuery.java b/src/de/lmu/ifi/dbs/elki/database/query/range/DoubleOptimizedDistanceRangeQuery.java
index 209e8950..90b867e3 100644
--- a/src/de/lmu/ifi/dbs/elki/database/query/range/DoubleOptimizedRangeQuery.java
+++ b/src/de/lmu/ifi/dbs/elki/database/query/range/DoubleOptimizedDistanceRangeQuery.java
@@ -27,9 +27,10 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDList;
import de.lmu.ifi.dbs.elki.database.ids.distance.ModifiableDoubleDistanceDBIDList;
-import de.lmu.ifi.dbs.elki.database.ids.integer.DoubleDistanceIntegerDBIDKNNList;
+import de.lmu.ifi.dbs.elki.database.ids.integer.DoubleDistanceIntegerDBIDList;
import de.lmu.ifi.dbs.elki.database.query.LinearScanQuery;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
@@ -42,11 +43,11 @@ import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
*
* @param <O> Database object type
*/
-public class DoubleOptimizedRangeQuery<O> extends LinearScanRangeQuery<O, DoubleDistance> implements LinearScanQuery {
+public class DoubleOptimizedDistanceRangeQuery<O> extends AbstractDistanceRangeQuery<O, DoubleDistance> implements LinearScanQuery {
/**
* Raw distance function.
*/
- PrimitiveDoubleDistanceFunction<O> rawdist;
+ PrimitiveDoubleDistanceFunction<? super O> rawdist;
/**
* Constructor.
@@ -54,9 +55,9 @@ public class DoubleOptimizedRangeQuery<O> extends LinearScanRangeQuery<O, Double
* @param distanceQuery Distance function to use
*/
@SuppressWarnings("unchecked")
- public DoubleOptimizedRangeQuery(DistanceQuery<O, DoubleDistance> distanceQuery) {
+ public DoubleOptimizedDistanceRangeQuery(DistanceQuery<O, DoubleDistance> distanceQuery) {
super(distanceQuery);
- if (!(distanceQuery.getDistanceFunction() instanceof PrimitiveDoubleDistanceFunction)) {
+ if(!(distanceQuery.getDistanceFunction() instanceof PrimitiveDoubleDistanceFunction)) {
throw new UnsupportedOperationException("DoubleOptimizedRangeQuery instantiated for non-PrimitiveDoubleDistanceFunction!");
}
rawdist = (PrimitiveDoubleDistanceFunction<O>) distanceQuery.getDistanceFunction();
@@ -64,32 +65,28 @@ public class DoubleOptimizedRangeQuery<O> extends LinearScanRangeQuery<O, Double
@Override
public DistanceDBIDList<DoubleDistance> getRangeForDBID(DBIDRef id, DoubleDistance range) {
- double epsilon = range.doubleValue();
-
- O qo = relation.get(id);
- ModifiableDoubleDistanceDBIDList result = new DoubleDistanceIntegerDBIDKNNList();
- for (DBIDIter iter = relation.getDBIDs().iter(); iter.valid(); iter.advance()) {
- double doubleDistance = rawdist.doubleDistance(qo, relation.get(iter));
- if (doubleDistance <= epsilon) {
- result.add(doubleDistance, iter);
- }
- }
+ final Relation<? extends O> relation = this.relation;
+ DoubleDistanceIntegerDBIDList result = new DoubleDistanceIntegerDBIDList();
+ linearScan(relation, relation.iterDBIDs(), rawdist, relation.get(id), range.doubleValue(), result);
result.sort();
return result;
}
@Override
public DistanceDBIDList<DoubleDistance> getRangeForObject(O obj, DoubleDistance range) {
- double epsilon = range.doubleValue();
+ DoubleDistanceIntegerDBIDList result = new DoubleDistanceIntegerDBIDList();
+ linearScan(relation, relation.iterDBIDs(), rawdist, obj, range.doubleValue(), result);
+ result.sort();
+ return result;
+ }
- ModifiableDoubleDistanceDBIDList result = new DoubleDistanceIntegerDBIDKNNList();
- for (DBIDIter iter = relation.getDBIDs().iter(); iter.valid(); iter.advance()) {
- double doubleDistance = rawdist.doubleDistance(obj, relation.get(iter));
- if (doubleDistance <= epsilon) {
+ private static <O> void linearScan(Relation<? extends O> relation, DBIDIter iter, PrimitiveDoubleDistanceFunction<? super O> rawdist, O obj, double range, ModifiableDoubleDistanceDBIDList result) {
+ while(iter.valid()) {
+ final double doubleDistance = rawdist.doubleDistance(obj, relation.get(iter));
+ if(doubleDistance <= range) {
result.add(doubleDistance, iter);
}
+ iter.advance();
}
- result.sort();
- return result;
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/database/query/range/LinearScanRangeQuery.java b/src/de/lmu/ifi/dbs/elki/database/query/range/LinearScanDistanceRangeQuery.java
index c886c537..8830dd45 100644
--- a/src/de/lmu/ifi/dbs/elki/database/query/range/LinearScanRangeQuery.java
+++ b/src/de/lmu/ifi/dbs/elki/database/query/range/LinearScanDistanceRangeQuery.java
@@ -42,13 +42,13 @@ import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
* @param <O> Database object type
* @param <D> Distance type
*/
-public class LinearScanRangeQuery<O, D extends Distance<D>> extends AbstractDistanceRangeQuery<O, D> implements LinearScanQuery {
+public class LinearScanDistanceRangeQuery<O, D extends Distance<D>> extends AbstractDistanceRangeQuery<O, D> implements LinearScanQuery {
/**
* Constructor.
*
* @param distanceQuery Distance function to use
*/
- public LinearScanRangeQuery(DistanceQuery<O, D> distanceQuery) {
+ public LinearScanDistanceRangeQuery(DistanceQuery<O, D> distanceQuery) {
super(distanceQuery);
}
diff --git a/src/de/lmu/ifi/dbs/elki/database/query/range/LinearScanPrimitiveDistanceRangeQuery.java b/src/de/lmu/ifi/dbs/elki/database/query/range/LinearScanPrimitiveDistanceRangeQuery.java
index 3b18c282..ab4dd2be 100644
--- a/src/de/lmu/ifi/dbs/elki/database/query/range/LinearScanPrimitiveDistanceRangeQuery.java
+++ b/src/de/lmu/ifi/dbs/elki/database/query/range/LinearScanPrimitiveDistanceRangeQuery.java
@@ -23,8 +23,10 @@ package de.lmu.ifi.dbs.elki.database.query.range;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDList;
+import de.lmu.ifi.dbs.elki.database.ids.generic.GenericDistanceDBIDList;
import de.lmu.ifi.dbs.elki.database.query.distance.PrimitiveDistanceQuery;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
@@ -41,7 +43,7 @@ import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
* @param <O> Database object type
* @param <D> Distance type
*/
-public class LinearScanPrimitiveDistanceRangeQuery<O, D extends Distance<D>> extends LinearScanRangeQuery<O, D> {
+public class LinearScanPrimitiveDistanceRangeQuery<O, D extends Distance<D>> extends AbstractDistanceRangeQuery<O, D> {
/**
* Constructor.
*
@@ -54,6 +56,28 @@ public class LinearScanPrimitiveDistanceRangeQuery<O, D extends Distance<D>> ext
@Override
public DistanceDBIDList<D> getRangeForDBID(DBIDRef id, D range) {
// Note: subtle optimization. Get "id" only once!
- return getRangeForObject(relation.get(id), range);
+ final O obj = relation.get(id);
+ GenericDistanceDBIDList<D> result = new GenericDistanceDBIDList<>();
+ for(DBIDIter iter = relation.getDBIDs().iter(); iter.valid(); iter.advance()) {
+ D currentDistance = distanceQuery.distance(obj, iter);
+ if(currentDistance.compareTo(range) <= 0) {
+ result.add(currentDistance, iter);
+ }
+ }
+ result.sort();
+ return result;
+ }
+
+ @Override
+ public DistanceDBIDList<D> getRangeForObject(O obj, D range) {
+ GenericDistanceDBIDList<D> result = new GenericDistanceDBIDList<>();
+ for(DBIDIter iter = relation.getDBIDs().iter(); iter.valid(); iter.advance()) {
+ D currentDistance = distanceQuery.distance(obj, iter);
+ if(currentDistance.compareTo(range) <= 0) {
+ result.add(currentDistance, iter);
+ }
+ }
+ result.sort();
+ return result;
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/database/query/range/RangeQuery.java b/src/de/lmu/ifi/dbs/elki/database/query/range/RangeQuery.java
index 0ec40dde..3f21c5f8 100644
--- a/src/de/lmu/ifi/dbs/elki/database/query/range/RangeQuery.java
+++ b/src/de/lmu/ifi/dbs/elki/database/query/range/RangeQuery.java
@@ -34,7 +34,7 @@ import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
* @author Erich Schubert
*
* @apiviz.landmark
- * @apiviz.uses DistanceDBIDResult oneway - - «create»
+ * @apiviz.uses DistanceDBIDList oneway - - «create»
*
* @param <O> Object type
* @param <D> Distance type
@@ -57,4 +57,11 @@ public interface RangeQuery<O, D extends Distance<D>> extends DatabaseQuery {
* @return neighbors
*/
public DistanceDBIDList<D> getRangeForObject(O obj, D range);
-} \ No newline at end of file
+
+ /**
+ * Get the distance factory for the given distance type.
+ *
+ * @return Distance factory.
+ */
+ public D getDistanceFactory();
+}
diff --git a/src/de/lmu/ifi/dbs/elki/database/query/similarity/PrimitiveSimilarityQuery.java b/src/de/lmu/ifi/dbs/elki/database/query/similarity/PrimitiveSimilarityQuery.java
index b98482a8..58020239 100644
--- a/src/de/lmu/ifi/dbs/elki/database/query/similarity/PrimitiveSimilarityQuery.java
+++ b/src/de/lmu/ifi/dbs/elki/database/query/similarity/PrimitiveSimilarityQuery.java
@@ -32,7 +32,7 @@ import de.lmu.ifi.dbs.elki.distance.similarityfunction.PrimitiveSimilarityFuncti
* Run a database query in a database context.
*
* @author Erich Schubert
- *
+ *
* @param <O> Database object type.
* @param <D> Distance result type.
*/
@@ -41,7 +41,7 @@ public class PrimitiveSimilarityQuery<O, D extends Distance<D>> extends Abstract
* The distance function we use.
*/
final protected PrimitiveSimilarityFunction<? super O, D> similarityFunction;
-
+
/**
* Constructor.
*
@@ -87,4 +87,9 @@ public class PrimitiveSimilarityQuery<O, D extends Distance<D>> extends Abstract
public D getDistanceFactory() {
return similarityFunction.getDistanceFactory();
}
-} \ No newline at end of file
+
+ @Override
+ public PrimitiveSimilarityFunction<? super O, D> getSimilarityFunction() {
+ return similarityFunction;
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/database/query/similarity/SimilarityQuery.java b/src/de/lmu/ifi/dbs/elki/database/query/similarity/SimilarityQuery.java
index 3e02edc1..80347fb7 100644
--- a/src/de/lmu/ifi/dbs/elki/database/query/similarity/SimilarityQuery.java
+++ b/src/de/lmu/ifi/dbs/elki/database/query/similarity/SimilarityQuery.java
@@ -27,6 +27,7 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
+import de.lmu.ifi.dbs.elki.distance.similarityfunction.SimilarityFunction;
/**
* A similarity query serves as adapter layer for database and primitive
@@ -50,7 +51,7 @@ public interface SimilarityQuery<O, D extends Distance<?>> extends DatabaseQuery
* @return the similarity between the two objects specified by their object
* ids
*/
- public abstract D similarity(DBIDRef id1, DBIDRef id2);
+ D similarity(DBIDRef id1, DBIDRef id2);
/**
* Returns the similarity between the two objects specified by their object
@@ -61,7 +62,7 @@ public interface SimilarityQuery<O, D extends Distance<?>> extends DatabaseQuery
* @return the similarity between the two objects specified by their object
* ids
*/
- public abstract D similarity(O o1, DBIDRef id2);
+ D similarity(O o1, DBIDRef id2);
/**
* Returns the similarity between the two objects specified by their object
@@ -72,7 +73,7 @@ public interface SimilarityQuery<O, D extends Distance<?>> extends DatabaseQuery
* @return the similarity between the two objects specified by their object
* ids
*/
- public abstract D similarity(DBIDRef id1, O o2);
+ D similarity(DBIDRef id1, O o2);
/**
* Returns the similarity between the two objects specified by their object
@@ -83,19 +84,26 @@ public interface SimilarityQuery<O, D extends Distance<?>> extends DatabaseQuery
* @return the similarity between the two objects specified by their object
* ids
*/
- public abstract D similarity(O o1, O o2);
+ D similarity(O o1, O o2);
/**
* Method to get the distance functions factory.
*
* @return Factory for distance objects
*/
- public abstract D getDistanceFactory();
-
+ D getDistanceFactory();
+
/**
* Access the underlying data query.
*
* @return data query in use
*/
- public abstract Relation<? extends O> getRelation();
+ Relation<? extends O> getRelation();
+
+ /**
+ * Get the inner similarity function.
+ *
+ * @return Similarity function
+ */
+ SimilarityFunction<? super O, D> getSimilarityFunction();
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/database/relation/RelationUtil.java b/src/de/lmu/ifi/dbs/elki/database/relation/RelationUtil.java
index b6549d6c..662767b7 100644
--- a/src/de/lmu/ifi/dbs/elki/database/relation/RelationUtil.java
+++ b/src/de/lmu/ifi/dbs/elki/database/relation/RelationUtil.java
@@ -26,6 +26,8 @@ package de.lmu.ifi.dbs.elki.database.relation;
import de.lmu.ifi.dbs.elki.data.FeatureVector;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
+import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter;
/**
* Utility functions for handling database relation.
@@ -87,6 +89,32 @@ public final class RelationUtil {
}
/**
+ * <em>Copy</em> a relation into a double matrix.
+ *
+ * This is <em>not recommended</em> unless you need to modify the data
+ * temporarily.
+ *
+ * @param relation Relation
+ * @param ids IDs, with well-defined order (i.e. array)
+ * @return Data matrix
+ */
+ public static double[][] relationAsMatrix(final Relation<? extends NumberVector<?>> relation, ArrayDBIDs ids) {
+ final int rowdim = ids.size();
+ final int coldim = dimensionality(relation);
+ double[][] mat = new double[rowdim][coldim];
+ int r = 0;
+ for (DBIDArrayIter iter = ids.iter(); iter.valid(); iter.advance(), r++) {
+ NumberVector<?> vec = relation.get(iter);
+ double[] row = mat[r];
+ for (int c = 0; c < coldim; c++) {
+ row[c] = vec.doubleValue(c);
+ }
+ }
+ assert (r == rowdim);
+ return mat;
+ }
+
+ /**
* Get the column name or produce a generic label "Column XY".
*
* @param rel Relation
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/AbstractDatabaseConnection.java b/src/de/lmu/ifi/dbs/elki/datasource/AbstractDatabaseConnection.java
index e9d9933f..77cdb12c 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/AbstractDatabaseConnection.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/AbstractDatabaseConnection.java
@@ -53,22 +53,6 @@ public abstract class AbstractDatabaseConnection implements DatabaseConnection {
public static final String LABEL_CONCATENATION = " ";
/**
- * Filters to apply to the input data.
- * <p>
- * Key: {@code -dbc.filter}
- * </p>
- */
- public static final OptionID FILTERS_ID = new OptionID("dbc.filter", "The filters to apply to the input data.");
-
- /**
- * Parameter to specify the parser to provide a database.
- * <p>
- * Key: {@code -dbc.parser}
- * </p>
- */
- public static final OptionID PARSER_ID = new OptionID("dbc.parser", "Parser to provide the database.");
-
- /**
* The filters to invoke
*/
protected List<ObjectFilter> filters;
@@ -184,6 +168,22 @@ public abstract class AbstractDatabaseConnection implements DatabaseConnection {
*/
public abstract static class Parameterizer extends AbstractParameterizer {
/**
+ * Filters to apply to the input data.
+ * <p>
+ * Key: {@code -dbc.filter}
+ * </p>
+ */
+ public static final OptionID FILTERS_ID = new OptionID("dbc.filter", "The filters to apply to the input data.");
+
+ /**
+ * Parameter to specify the parser to provide a database.
+ * <p>
+ * Key: {@code -dbc.parser}
+ * </p>
+ */
+ public static final OptionID PARSER_ID = new OptionID("dbc.parser", "Parser to provide the database.");
+
+ /**
* Filters
*/
protected List<ObjectFilter> filters;
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/ConcatenateFilesDatabaseConnection.java b/src/de/lmu/ifi/dbs/elki/datasource/ConcatenateFilesDatabaseConnection.java
index c701efc9..22aadc08 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/ConcatenateFilesDatabaseConnection.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/ConcatenateFilesDatabaseConnection.java
@@ -162,7 +162,7 @@ public class ConcatenateFilesDatabaseConnection extends AbstractDatabaseConnecti
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- FileListParameter filesP = new FileListParameter(FileBasedDatabaseConnection.INPUT_ID, FilesType.INPUT_FILES);
+ FileListParameter filesP = new FileListParameter(FileBasedDatabaseConnection.Parameterizer.INPUT_ID, FilesType.INPUT_FILES);
if (config.grab(filesP)) {
files = filesP.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/FileBasedDatabaseConnection.java b/src/de/lmu/ifi/dbs/elki/datasource/FileBasedDatabaseConnection.java
index 193fc551..f2ced700 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/FileBasedDatabaseConnection.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/FileBasedDatabaseConnection.java
@@ -24,6 +24,7 @@ package de.lmu.ifi.dbs.elki.datasource;
*/
import java.io.BufferedInputStream;
+import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
@@ -32,8 +33,8 @@ import java.util.List;
import de.lmu.ifi.dbs.elki.datasource.filter.ObjectFilter;
import de.lmu.ifi.dbs.elki.datasource.parser.Parser;
import de.lmu.ifi.dbs.elki.utilities.FileUtil;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.WrongParameterValueException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.FileParameter;
@@ -41,19 +42,11 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.FileParameter;
* Provides a file based database connection based on the parser to be set.
*
* @author Arthur Zimek
- *
+ *
* @apiviz.landmark
*/
public class FileBasedDatabaseConnection extends InputStreamDatabaseConnection {
/**
- * Parameter that specifies the name of the input file to be parsed.
- * <p>
- * Key: {@code -dbc.in}
- * </p>
- */
- public static final OptionID INPUT_ID = new OptionID("dbc.in", "The name of the input file to be parsed.");
-
- /**
* Constructor.
*
* @param filters Filters, can be null
@@ -73,27 +66,37 @@ public class FileBasedDatabaseConnection extends InputStreamDatabaseConnection {
* @apiviz.exclude
*/
public static class Parameterizer extends InputStreamDatabaseConnection.Parameterizer {
- protected InputStream inputStream;
+ /**
+ * Parameter that specifies the name of the input file to be parsed.
+ * <p>
+ * Key: {@code -dbc.in}
+ * </p>
+ */
+ public static final OptionID INPUT_ID = new OptionID("dbc.in", "The name of the input file to be parsed.");
+
+ /**
+ * Input stream to process.
+ */
+ protected File infile;
@Override
protected void makeOptions(Parameterization config) {
// Add the input file first, for usability reasons.
final FileParameter inputParam = new FileParameter(INPUT_ID, FileParameter.FileType.INPUT_FILE);
if(config.grab(inputParam)) {
- try {
- inputStream = new BufferedInputStream(FileUtil.tryGzipInput(new FileInputStream(inputParam.getValue())));
- }
- catch(IOException e) {
- config.reportError(new WrongParameterValueException(inputParam, inputParam.getValue().getPath(), e));
- inputStream = null;
- }
+ infile = inputParam.getValue();
}
super.makeOptions(config);
}
@Override
protected FileBasedDatabaseConnection makeInstance() {
- return new FileBasedDatabaseConnection(filters, parser, inputStream);
+ try {
+ return new FileBasedDatabaseConnection(filters, parser, new BufferedInputStream(FileUtil.tryGzipInput(new FileInputStream(infile))));
+ }
+ catch(IOException e) {
+ throw new AbortException("Input file could not be opened.", e);
+ }
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/GeneratorXMLDatabaseConnection.java b/src/de/lmu/ifi/dbs/elki/datasource/GeneratorXMLDatabaseConnection.java
index dcda0765..57bfd6bc 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/GeneratorXMLDatabaseConnection.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/GeneratorXMLDatabaseConnection.java
@@ -55,6 +55,7 @@ import de.lmu.ifi.dbs.elki.math.statistics.distribution.GammaDistribution;
import de.lmu.ifi.dbs.elki.math.statistics.distribution.HaltonUniformDistribution;
import de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution;
import de.lmu.ifi.dbs.elki.math.statistics.distribution.UniformDistribution;
+import de.lmu.ifi.dbs.elki.utilities.FormatUtil;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.utilities.exceptions.UnableToComplyException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
@@ -341,7 +342,7 @@ public class GeneratorXMLDatabaseConnection implements DatabaseConnection {
String dcostr = ((Element) cur).getAttribute(ATTR_DENSITY);
if (dcostr != null && dcostr.length() > 0) {
- overweight = Double.parseDouble(dcostr);
+ overweight = FormatUtil.parseDouble(dcostr);
}
if (size < 0) {
@@ -394,11 +395,11 @@ public class GeneratorXMLDatabaseConnection implements DatabaseConnection {
String minstr = ((Element) cur).getAttribute(ATTR_MIN);
if (minstr != null && minstr.length() > 0) {
- min = Double.parseDouble(minstr);
+ min = FormatUtil.parseDouble(minstr);
}
String maxstr = ((Element) cur).getAttribute(ATTR_MAX);
if (maxstr != null && maxstr.length() > 0) {
- max = Double.parseDouble(maxstr);
+ max = FormatUtil.parseDouble(maxstr);
}
// *** new uniform generator
@@ -428,11 +429,11 @@ public class GeneratorXMLDatabaseConnection implements DatabaseConnection {
double stddev = 1.0;
String meanstr = ((Element) cur).getAttribute(ATTR_MEAN);
if (meanstr != null && meanstr.length() > 0) {
- mean = Double.parseDouble(meanstr);
+ mean = FormatUtil.parseDouble(meanstr);
}
String stddevstr = ((Element) cur).getAttribute(ATTR_STDDEV);
if (stddevstr != null && stddevstr.length() > 0) {
- stddev = Double.parseDouble(stddevstr);
+ stddev = FormatUtil.parseDouble(stddevstr);
}
// *** New normal distribution generator
@@ -462,11 +463,11 @@ public class GeneratorXMLDatabaseConnection implements DatabaseConnection {
double theta = 1.0;
String kstr = ((Element) cur).getAttribute(ATTR_K);
if (kstr != null && kstr.length() > 0) {
- k = Double.parseDouble(kstr);
+ k = FormatUtil.parseDouble(kstr);
}
String thetastr = ((Element) cur).getAttribute(ATTR_THETA);
if (thetastr != null && thetastr.length() > 0) {
- theta = Double.parseDouble(thetastr);
+ theta = FormatUtil.parseDouble(thetastr);
}
// *** New normal distribution generator
@@ -497,11 +498,11 @@ public class GeneratorXMLDatabaseConnection implements DatabaseConnection {
String minstr = ((Element) cur).getAttribute(ATTR_MIN);
if (minstr != null && minstr.length() > 0) {
- min = Double.parseDouble(minstr);
+ min = FormatUtil.parseDouble(minstr);
}
String maxstr = ((Element) cur).getAttribute(ATTR_MAX);
if (maxstr != null && maxstr.length() > 0) {
- max = Double.parseDouble(maxstr);
+ max = FormatUtil.parseDouble(maxstr);
}
// *** new uniform generator
@@ -541,7 +542,7 @@ public class GeneratorXMLDatabaseConnection implements DatabaseConnection {
}
String anstr = ((Element) cur).getAttribute(ATTR_ANGLE);
if (anstr != null && anstr.length() > 0) {
- angle = Double.parseDouble(anstr);
+ angle = FormatUtil.parseDouble(anstr);
}
if (axis1 <= 0 || axis1 > cluster.getDim()) {
throw new UnableToComplyException("Invalid axis1 number given in specification file.");
@@ -709,7 +710,7 @@ public class GeneratorXMLDatabaseConnection implements DatabaseConnection {
double[] d = new double[entries.length];
for (int i = 0; i < entries.length; i++) {
try {
- d[i] = Double.parseDouble(entries[i]);
+ d[i] = FormatUtil.parseDouble(entries[i]);
} catch (NumberFormatException e) {
throw new UnableToComplyException("Could not parse vector.");
}
@@ -749,7 +750,7 @@ public class GeneratorXMLDatabaseConnection implements DatabaseConnection {
specfile = cfgparam.getValue();
}
// Cluster size scaling
- final DoubleParameter scalepar = new DoubleParameter(SIZE_SCALE_ID, Double.valueOf(1.0));
+ final DoubleParameter scalepar = new DoubleParameter(SIZE_SCALE_ID, 1.);
if (config.grab(scalepar)) {
sizescale = scalepar.getValue().doubleValue();
}
@@ -757,7 +758,7 @@ public class GeneratorXMLDatabaseConnection implements DatabaseConnection {
final RandomParameter rndP = new RandomParameter(RANDOMSEED_ID);
if (config.grab(rndP)) {
// TODO: use RandomFactory in cluster
- clusterRandom = rndP.getValue().getRandom();
+ clusterRandom = rndP.getValue().getSingleThreadedRandom();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/LabelJoinDatabaseConnection.java b/src/de/lmu/ifi/dbs/elki/datasource/LabelJoinDatabaseConnection.java
index 375afa14..fc28b989 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/LabelJoinDatabaseConnection.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/LabelJoinDatabaseConnection.java
@@ -108,7 +108,9 @@ public class LabelJoinDatabaseConnection extends AbstractDatabaseConnection impl
LOG.warning("Duplicate label encountered: " + data + " in rows " + old + " and " + i);
}
} else if (data instanceof LabelList) {
- for (String lbl : (LabelList) data) {
+ final LabelList ll = (LabelList) data;
+ for(int j = 0; j < ll.size(); j++) {
+ String lbl = ll.get(j);
int old = labelmap.put(lbl, i);
if (old != -1) {
LOG.warning("Duplicate label encountered: " + lbl + " in rows " + old + " and " + i);
@@ -166,8 +168,9 @@ public class LabelJoinDatabaseConnection extends AbstractDatabaseConnection impl
if (data instanceof String) {
row = labelmap.get(data);
} else if (data instanceof LabelList) {
- for (String lbl : (LabelList) data) {
- row = labelmap.get(lbl);
+ final LabelList ll = (LabelList) data;
+ for(int j = 0; j < ll.size(); j++) {
+ row = labelmap.get(ll.get(j));
if (row >= 0) {
break;
}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/RandomDoubleVectorDatabaseConnection.java b/src/de/lmu/ifi/dbs/elki/datasource/RandomDoubleVectorDatabaseConnection.java
index 84891a86..2f3a943b 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/RandomDoubleVectorDatabaseConnection.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/RandomDoubleVectorDatabaseConnection.java
@@ -86,7 +86,7 @@ public class RandomDoubleVectorDatabaseConnection extends AbstractDatabaseConnec
List<DoubleVector> vectors = new ArrayList<>(size);
// Setup random generator
- final Random rand = rnd.getRandom();
+ final Random rand = rnd.getSingleThreadedRandom();
// Produce random vectors
for(int i = 0; i < size; i++) {
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/bundle/MultipleObjectsBundle.java b/src/de/lmu/ifi/dbs/elki/datasource/bundle/MultipleObjectsBundle.java
index 02a24996..37b517dd 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/bundle/MultipleObjectsBundle.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/bundle/MultipleObjectsBundle.java
@@ -36,7 +36,7 @@ import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
* with multiple representations outside of any index structure.
*
* @author Erich Schubert
- *
+ *
* @apiviz.landmark
*/
public class MultipleObjectsBundle implements ObjectBundle {
@@ -102,20 +102,12 @@ public class MultipleObjectsBundle implements ObjectBundle {
@Override
public Object data(int onum, int rnum) {
- if(rnum < 0 || rnum >= meta.size()) {
- throw new ArrayIndexOutOfBoundsException();
- }
return columns.get(rnum).get(onum);
}
@Override
public int dataLength() {
- try {
- return columns.get(0).size();
- }
- catch(IndexOutOfBoundsException e) {
- return 0;
- }
+ return (columns.size() == 0) ? 0 : columns.get(0).size();
}
/**
@@ -220,7 +212,7 @@ public class MultipleObjectsBundle implements ObjectBundle {
boolean stop = false;
while(!stop) {
BundleStreamSource.Event ev = source.nextEvent();
- switch(ev) {
+ switch(ev){
case END_OF_STREAM:
stop = true;
break;
@@ -237,7 +229,7 @@ public class MultipleObjectsBundle implements ObjectBundle {
}
continue;
case NEXT_OBJECT:
- for (int i = 0; i < bundle.metaLength(); i++) {
+ for(int i = 0; i < bundle.metaLength(); i++) {
@SuppressWarnings("unchecked")
final List<Object> col = (List<Object>) bundle.columns.get(i);
col.add(source.data(i));
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/ByLabelFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/ByLabelFilter.java
index 2109761a..66707da6 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/ByLabelFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/ByLabelFilter.java
@@ -112,8 +112,9 @@ public class ByLabelFilter extends AbstractStreamFilter {
Object l = source.data(lblcol);
if(l instanceof LabelList) {
boolean good = false;
- for(String label : (LabelList) l) {
- if(pattern.matcher(label).matches()) {
+ final LabelList ll = (LabelList) l;
+ for(int i = 0; i < ll.size(); i++) {
+ if(pattern.matcher(ll.get(i)).matches()) {
good = true;
break;
}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/ClassLabelFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/ClassLabelFilter.java
index e8dc69c3..020dcb31 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/ClassLabelFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/ClassLabelFilter.java
@@ -34,7 +34,6 @@ import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
@@ -49,23 +48,6 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
*/
public class ClassLabelFilter implements ObjectFilter {
/**
- * Optional parameter that specifies the index of the label to be used as
- * class label, must be an integer equal to or greater than 0.
- * <p>
- * Key: {@code -dbc.classLabelIndex}
- * </p>
- */
- public static final OptionID CLASS_LABEL_INDEX_ID = new OptionID("dbc.classLabelIndex", "The index of the label to be used as class label.");
-
- /**
- * Parameter to specify the class of occurring class labels.
- * <p>
- * Key: {@code -dbc.classLabelClass}
- * </p>
- */
- public static final OptionID CLASS_LABEL_CLASS_ID = new OptionID("dbc.classLabelClass", "Class label class to use.");
-
- /**
* The index of the label to be used as class label, null if no class label is
* specified.
*/
@@ -94,10 +76,10 @@ public class ClassLabelFilter implements ObjectFilter {
// Find a labellist column
boolean done = false;
boolean keeplabelcol = false;
- for (int i = 0; i < objects.metaLength(); i++) {
+ for(int i = 0; i < objects.metaLength(); i++) {
SimpleTypeInformation<?> meta = objects.meta(i);
// Skip non-labellist columns - or if we already had a labellist
- if (done || !LabelList.class.equals(meta.getRestrictionClass())) {
+ if(done || !LabelList.class.equals(meta.getRestrictionClass())) {
bundle.appendColumn(meta, objects.getColumn(i));
continue;
}
@@ -107,28 +89,39 @@ public class ClassLabelFilter implements ObjectFilter {
List<ClassLabel> clscol = new ArrayList<>(objects.dataLength());
List<LabelList> lblcol = new ArrayList<>(objects.dataLength());
+ ArrayList<String> lbuf = new ArrayList<>();
// Split the column
- for (Object obj : objects.getColumn(i)) {
- if (obj != null) {
+ for(Object obj : objects.getColumn(i)) {
+ if(obj != null) {
LabelList ll = (LabelList) obj;
+ int off = (classLabelIndex >= 0) ? classLabelIndex : (ll.size() - classLabelIndex);
try {
- ClassLabel lbl = classLabelFactory.makeFromString(ll.remove(classLabelIndex));
+ ClassLabel lbl = classLabelFactory.makeFromString(ll.get(off));
clscol.add(lbl);
- } catch (Exception e) {
+ }
+ catch(Exception e) {
throw new AbortException("Cannot initialize class labels: " + e.getMessage(), e);
}
- lblcol.add(ll);
- if (ll.size() > 0) {
+ lbuf.clear();
+ for(int j = 0; j < ll.size(); j++) {
+ if(j == off) {
+ continue;
+ }
+ lbuf.add(ll.get(j));
+ }
+ lblcol.add(LabelList.make(lbuf));
+ if(lbuf.size() > 0) {
keeplabelcol = true;
}
- } else {
+ }
+ else {
clscol.add(null);
lblcol.add(null);
}
}
bundle.appendColumn(classLabelFactory.getTypeInformation(), clscol);
// Only add the label column when it's not empty.
- if (keeplabelcol) {
+ if(keeplabelcol) {
bundle.appendColumn(meta, lblcol);
}
}
@@ -144,6 +137,23 @@ public class ClassLabelFilter implements ObjectFilter {
*/
public static class Parameterizer extends AbstractParameterizer {
/**
+ * Optional parameter that specifies the index of the label to be used as
+ * class label, must be an integer equal to or greater than 0.
+ * <p>
+ * Key: {@code -dbc.classLabelIndex}
+ * </p>
+ */
+ public static final OptionID CLASS_LABEL_INDEX_ID = new OptionID("dbc.classLabelIndex", "The index of the label to be used as class label. The first label is 0, negative indexes are relative to the end.");
+
+ /**
+ * Parameter to specify the class of occurring class labels.
+ * <p>
+ * Key: {@code -dbc.classLabelClass}
+ * </p>
+ */
+ public static final OptionID CLASS_LABEL_CLASS_ID = new OptionID("dbc.classLabelClass", "Class label class to use.");
+
+ /**
* The index of the label to be used as class label, null if no class label
* is specified.
*/
@@ -159,12 +169,11 @@ public class ClassLabelFilter implements ObjectFilter {
super.makeOptions(config);
// parameter class label index
final IntParameter classLabelIndexParam = new IntParameter(CLASS_LABEL_INDEX_ID);
- classLabelIndexParam.addConstraint(new GreaterEqualConstraint(0));
final ObjectParameter<ClassLabel.Factory<?>> classlabelClassParam = new ObjectParameter<>(CLASS_LABEL_CLASS_ID, ClassLabel.Factory.class, SimpleClassLabel.Factory.class);
config.grab(classLabelIndexParam);
config.grab(classlabelClassParam);
- if (classLabelIndexParam.isDefined() && classlabelClassParam.isDefined()) {
+ if(classLabelIndexParam.isDefined() && classlabelClassParam.isDefined()) {
classLabelIndex = classLabelIndexParam.intValue();
classLabelFactory = classlabelClassParam.instantiateClass(config);
}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/ClassLabelFromPatternFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/ClassLabelFromPatternFilter.java
index 97624ac8..517eb301 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/ClassLabelFromPatternFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/ClassLabelFromPatternFilter.java
@@ -129,8 +129,9 @@ public class ClassLabelFromPatternFilter extends AbstractStreamFilter {
continue;
}
if (o instanceof LabelList) {
- for (String l : (LabelList) o) {
- if (pattern.matcher(l).find()) {
+ final LabelList ll = (LabelList) o;
+ for(int j = 0; j < ll.size(); j++) {
+ if (pattern.matcher(ll.get(j)).find()) {
return positive;
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/NaNFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/DropNaNFilter.java
index 769f3009..fb9cf83e 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/NaNFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/DropNaNFilter.java
@@ -43,11 +43,11 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
*
* @author Erich Schubert
*/
-public class NaNFilter extends AbstractStreamFilter {
+public class DropNaNFilter extends AbstractStreamFilter {
/**
* Class logger
*/
- private static final Logging LOG = Logging.getLogger(NaNFilter.class);
+ private static final Logging LOG = Logging.getLogger(DropNaNFilter.class);
/**
* Columns to check.
@@ -57,7 +57,7 @@ public class NaNFilter extends AbstractStreamFilter {
/**
* Constructor.
*/
- public NaNFilter() {
+ public DropNaNFilter() {
super();
}
@@ -178,8 +178,8 @@ public class NaNFilter extends AbstractStreamFilter {
*/
public static class Parameterizer extends AbstractParameterizer {
@Override
- protected Object makeInstance() {
- return new NaNFilter();
+ protected DropNaNFilter makeInstance() {
+ return new DropNaNFilter();
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/ExternalIDFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/ExternalIDFilter.java
index 926ebe99..17538dc9 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/ExternalIDFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/ExternalIDFilter.java
@@ -33,7 +33,6 @@ import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -48,15 +47,6 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
// TODO: use a non-string class for external ids?
public class ExternalIDFilter implements ObjectFilter {
/**
- * Parameter that specifies the index of the label to be used as external Id,
- * must be an integer equal to or greater than 0.
- * <p>
- * Key: {@code -dbc.externalIdIndex}
- * </p>
- */
- public static final OptionID EXTERNALID_INDEX_ID = new OptionID("dbc.externalIdIndex", "The index of the label to be used as external Id.");
-
- /**
* The index of the label to be used as external Id.
*/
private final int externalIdIndex;
@@ -77,10 +67,10 @@ public class ExternalIDFilter implements ObjectFilter {
// Find a labellist column
boolean done = false;
boolean keeplabelcol = false;
- for (int i = 0; i < objects.metaLength(); i++) {
+ for(int i = 0; i < objects.metaLength(); i++) {
SimpleTypeInformation<?> meta = objects.meta(i);
// Skip non-labellist columns - or if we already had a labellist
- if (done || !LabelList.class.equals(meta.getRestrictionClass())) {
+ if(done || !LabelList.class.equals(meta.getRestrictionClass())) {
bundle.appendColumn(meta, objects.getColumn(i));
continue;
}
@@ -91,15 +81,25 @@ public class ExternalIDFilter implements ObjectFilter {
List<LabelList> lblcol = new ArrayList<>(objects.dataLength());
// Split the column
- for (Object obj : objects.getColumn(i)) {
- if (obj != null) {
+ ArrayList<String> lbuf = new ArrayList<>();
+ for(Object obj : objects.getColumn(i)) {
+ if(obj != null) {
LabelList ll = (LabelList) obj;
- eidcol.add(new ExternalID(ll.remove(externalIdIndex)));
- lblcol.add(ll);
- if (ll.size() > 0) {
+ int off = externalIdIndex >= 0 ? externalIdIndex : (ll.size() - externalIdIndex);
+ eidcol.add(new ExternalID(ll.get(off)));
+ lbuf.clear();
+ for(int j = 0; j < ll.size(); j++) {
+ if(j == off) {
+ continue;
+ }
+ lbuf.add(ll.get(j));
+ }
+ lblcol.add(LabelList.make(lbuf));
+ if(ll.size() > 0) {
keeplabelcol = true;
}
- } else {
+ }
+ else {
eidcol.add(null);
lblcol.add(null);
}
@@ -107,7 +107,7 @@ public class ExternalIDFilter implements ObjectFilter {
bundle.appendColumn(TypeUtil.EXTERNALID, eidcol);
// Only add the label column when it's not empty.
- if (keeplabelcol) {
+ if(keeplabelcol) {
bundle.appendColumn(meta, lblcol);
}
}
@@ -122,14 +122,22 @@ public class ExternalIDFilter implements ObjectFilter {
* @apiviz.exclude
*/
public static class Parameterizer extends AbstractParameterizer {
+ /**
+ * Parameter that specifies the index of the label to be used as external
+ * Id, starting at 0. Negative numbers are counted from the end.
+ * <p>
+ * Key: {@code -dbc.externalIdIndex}
+ * </p>
+ */
+ public static final OptionID EXTERNALID_INDEX_ID = new OptionID("dbc.externalIdIndex", "The index of the label to be used as external Id. The first label is 0; negative indexes are relative to the end.");
+
int externalIdIndex = -1;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
final IntParameter externalIdIndexParam = new IntParameter(EXTERNALID_INDEX_ID);
- externalIdIndexParam.addConstraint(new GreaterEqualConstraint(0));
- if (config.grab(externalIdIndexParam)) {
+ if(config.grab(externalIdIndexParam)) {
externalIdIndex = externalIdIndexParam.intValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/FixedDBIDsFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/FixedDBIDsFilter.java
index 7f09b905..ce02fc29 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/FixedDBIDsFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/FixedDBIDsFilter.java
@@ -42,14 +42,6 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
*/
public class FixedDBIDsFilter extends AbstractStreamFilter {
/**
- * Optional parameter to specify the first object ID to use.
- * <p>
- * Key: {@code -dbc.startid}
- * </p>
- */
- public static final OptionID IDSTART_ID = new OptionID("dbc.startid", "Object ID to start counting with");
-
- /**
* The filtered meta
*/
BundleMeta meta;
@@ -109,6 +101,13 @@ public class FixedDBIDsFilter extends AbstractStreamFilter {
* @apiviz.exclude
*/
public static class Parameterizer extends AbstractParameterizer {
+ /**
+ * Optional parameter to specify the first object ID to use.
+ * <p>
+ * Key: {@code -dbc.startid}
+ * </p>
+ */
+ public static final OptionID IDSTART_ID = new OptionID("dbc.startid", "Object ID to start counting with");
int startid = -1;
@Override
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/HistogramJitterFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/HistogramJitterFilter.java
index 37f8f8d9..453d294e 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/HistogramJitterFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/HistogramJitterFilter.java
@@ -31,7 +31,7 @@ import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
@@ -75,7 +75,7 @@ public class HistogramJitterFilter<V extends NumberVector<?>> extends AbstractVe
public HistogramJitterFilter(double jitter, RandomFactory rnd) {
super();
this.jitter = jitter;
- this.rnd = new ExponentialDistribution(1, rnd.getRandom());
+ this.rnd = new ExponentialDistribution(1, rnd.getSingleThreadedRandom());
}
@Override
@@ -83,7 +83,7 @@ public class HistogramJitterFilter<V extends NumberVector<?>> extends AbstractVe
final int dim = obj.getDimensionality();
// Compute the total sum.
double osum = 0;
- for (int i = 0; i < dim; i++) {
+ for(int i = 0; i < dim; i++) {
osum += obj.doubleValue(i);
}
// Actual maximum jitter amount:
@@ -91,13 +91,13 @@ public class HistogramJitterFilter<V extends NumberVector<?>> extends AbstractVe
// Generate jitter vector
double[] raw = new double[dim];
double jsum = 0; // Sum of jitter
- for (int i = 0; i < raw.length; i++) {
+ for(int i = 0; i < raw.length; i++) {
raw[i] = rnd.nextRandom() * maxjitter;
jsum += raw[i];
}
final double mix = jsum / osum;
// Combine the two vector
- for (int i = 0; i < raw.length; i++) {
+ for(int i = 0; i < raw.length; i++) {
raw[i] = raw[i] + (1 - mix) * obj.doubleValue(i);
}
return factory.newNumberVector(raw);
@@ -146,12 +146,12 @@ public class HistogramJitterFilter<V extends NumberVector<?>> extends AbstractVe
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
DoubleParameter jitterP = new DoubleParameter(JITTER_ID);
- jitterP.addConstraint(new GreaterEqualConstraint(Double.valueOf(0.0)));
- if (config.grab(jitterP)) {
+ jitterP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE);
+ if(config.grab(jitterP)) {
jitter = jitterP.getValue().doubleValue();
}
RandomParameter rndP = new RandomParameter(SEED_ID);
- if (config.grab(rndP)) {
+ if(config.grab(rndP)) {
rnd = rndP.getValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/RandomSamplingStreamFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/RandomSamplingStreamFilter.java
index 5c8d07d0..a7e44d4d 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/RandomSamplingStreamFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/RandomSamplingStreamFilter.java
@@ -29,8 +29,7 @@ import de.lmu.ifi.dbs.elki.datasource.bundle.BundleMeta;
import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
@@ -60,7 +59,7 @@ public class RandomSamplingStreamFilter extends AbstractStreamFilter {
public RandomSamplingStreamFilter(double prob, RandomFactory rnd) {
super();
this.prob = prob;
- this.random = rnd.getRandom();
+ this.random = rnd.getSingleThreadedRandom();
}
@Override
@@ -75,15 +74,15 @@ public class RandomSamplingStreamFilter extends AbstractStreamFilter {
@Override
public Event nextEvent() {
- while (true) {
+ while(true) {
Event ev = source.nextEvent();
- switch(ev) {
+ switch(ev){
case END_OF_STREAM:
return ev;
case META_CHANGED:
return ev;
case NEXT_OBJECT:
- if (random.nextDouble() < prob) {
+ if(random.nextDouble() < prob) {
return ev;
}
continue;
@@ -123,13 +122,13 @@ public class RandomSamplingStreamFilter extends AbstractStreamFilter {
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
DoubleParameter probP = new DoubleParameter(PROB_ID);
- probP.addConstraint(new GreaterEqualConstraint(0.0));
- probP.addConstraint(new LessEqualConstraint(1.0));
- if (config.grab(probP)) {
+ probP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE);
+ probP.addConstraint(CommonConstraints.LESS_EQUAL_ONE_DOUBLE);
+ if(config.grab(probP)) {
prob = probP.getValue().doubleValue();
}
RandomParameter rndP = new RandomParameter(SEED_ID);
- if (config.grab(rndP)) {
+ if(config.grab(rndP)) {
rnd = rndP.getValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/ReplaceNaNWithRandomFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/ReplaceNaNWithRandomFilter.java
new file mode 100644
index 00000000..9029d8ea
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/ReplaceNaNWithRandomFilter.java
@@ -0,0 +1,220 @@
+package de.lmu.ifi.dbs.elki.datasource.filter;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.ArrayList;
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
+import de.lmu.ifi.dbs.elki.datasource.bundle.BundleMeta;
+import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+
+/**
+ * A filter to replace all NaN values.
+ *
+ * Note: currently, only dense vector columns are supported.
+ *
+ * TODO: add support for sparse vectors.
+ *
+ * @author Erich Schubert
+ */
+public class ReplaceNaNWithRandomFilter extends AbstractStreamFilter {
+ /**
+ * Class logger
+ */
+ private static final Logging LOG = Logging.getLogger(ReplaceNaNWithRandomFilter.class);
+
+ /**
+ * Columns to check.
+ */
+ private NumberVector.Factory<?, ?>[] densecols = null;
+
+ /**
+ * Distribution to generate replacement values with.
+ */
+ private Distribution dist;
+
+ /**
+ * Row cache.
+ */
+ private ArrayList<Object> rows = new ArrayList<>();
+
+ /**
+ * Constructor.
+ */
+ public ReplaceNaNWithRandomFilter(Distribution dist) {
+ super();
+ this.dist = dist;
+ }
+
+ @Override
+ public BundleMeta getMeta() {
+ return source.getMeta();
+ }
+
+ @Override
+ public Object data(int rnum) {
+ return rows.get(rnum);
+ }
+
+ @Override
+ public Event nextEvent() {
+ while (true) {
+ Event ev = source.nextEvent();
+ switch(ev) {
+ case END_OF_STREAM:
+ return ev;
+ case META_CHANGED:
+ updateMeta(source.getMeta());
+ return ev;
+ case NEXT_OBJECT:
+ if (densecols == null) {
+ updateMeta(source.getMeta());
+ }
+ rows.clear();
+ for (int j = 0; j < densecols.length; j++) {
+ Object o = source.data(j);
+ if (densecols[j] != null) {
+ NumberVector<?> v = (NumberVector<?>) o;
+ double[] ro = null; // replacement
+ if (v != null) {
+ for (int i = 0; i < v.getDimensionality(); i++) {
+ if (Double.isNaN(v.doubleValue(i))) {
+ if (ro != null) {
+ ro = v.getColumnVector().getArrayRef();
+ }
+ ro[i] = dist.nextRandom();
+ }
+ }
+ }
+ o = densecols[j].newNumberVector(ro);
+ }
+ rows.add(o);
+ }
+ return ev;
+ }
+ }
+ }
+
+ /**
+ * Process an updated meta record.
+ *
+ * @param meta Meta record
+ */
+ private void updateMeta(BundleMeta meta) {
+ final int cols = meta.size();
+ densecols = new NumberVector.Factory<?, ?>[cols];
+ for (int i = 0; i < cols; i++) {
+ if (TypeUtil.SPARSE_VECTOR_VARIABLE_LENGTH.isAssignableFromType(meta.get(i))) {
+ throw new AbortException("Filtering sparse vectors is not yet supported by this filter. Please contribute.");
+ }
+ if (TypeUtil.FLOAT_VECTOR_FIELD.isAssignableFromType(meta.get(i))) {
+ VectorFieldTypeInformation<?> vmeta = (VectorFieldTypeInformation<?>) meta.get(i);
+ densecols[i] = (NumberVector.Factory<?, ?>) vmeta.getFactory();
+ continue;
+ }
+ if (TypeUtil.DOUBLE_VECTOR_FIELD.isAssignableFromType(meta.get(i))) {
+ VectorFieldTypeInformation<?> vmeta = (VectorFieldTypeInformation<?>) meta.get(i);
+ densecols[i] = (NumberVector.Factory<?, ?>) vmeta.getFactory();
+ continue;
+ }
+ }
+ }
+
+ @Override
+ public MultipleObjectsBundle filter(final MultipleObjectsBundle objects) {
+ if (LOG.isDebuggingFinest()) {
+ LOG.debugFinest("Removing records with NaN values.");
+ }
+
+ updateMeta(objects.meta());
+ MultipleObjectsBundle bundle = new MultipleObjectsBundle();
+ for (int j = 0; j < objects.metaLength(); j++) {
+ bundle.appendColumn(objects.meta(j), new ArrayList<>());
+ }
+ for (int i = 0; i < objects.dataLength(); i++) {
+ final Object[] row = objects.getRow(i);
+ for (int j = 0; j < densecols.length; j++) {
+ if (densecols[j] != null) {
+ NumberVector<?> v = (NumberVector<?>) row[j];
+ double[] ro = null; // replacement
+ if (v != null) {
+ for (int d = 0; d < v.getDimensionality(); d++) {
+ if (Double.isNaN(v.doubleValue(d))) {
+ if (ro != null) {
+ ro = v.getColumnVector().getArrayRef();
+ }
+ ro[d] = dist.nextRandom();
+ }
+ }
+ }
+ row[j] = densecols[j].newNumberVector(ro);
+ }
+ }
+ bundle.appendSimple(row);
+ }
+ return bundle;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ /**
+ * Parameter to specify the distribution to sample replacement values from.
+ */
+ public static final OptionID REPLACEMENT_DISTRIBUTION = new OptionID("nanfilter.replacement", "Distribution to sample replacement values from.");
+
+ /**
+ * Distribution to generate replacement values with.
+ */
+ private Distribution dist;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ ObjectParameter<Distribution> distP = new ObjectParameter<>(REPLACEMENT_DISTRIBUTION, Distribution.class);
+ if (config.grab(distP)) {
+ dist = distP.instantiateClass(config);
+ }
+ }
+
+ @Override
+ protected ReplaceNaNWithRandomFilter makeInstance() {
+ return new ReplaceNaNWithRandomFilter(dist);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/ShuffleObjectsFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/ShuffleObjectsFilter.java
index b8bf968b..8afa8290 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/ShuffleObjectsFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/ShuffleObjectsFilter.java
@@ -76,7 +76,7 @@ public class ShuffleObjectsFilter implements ObjectFilter {
if (LOG.isDebugging()) {
LOG.debug("Shuffling the data set");
}
- final Random random = rnd.getRandom();
+ final Random random = rnd.getSingleThreadedRandom();
final int size = objects.dataLength();
final int[] offsets = new int[size];
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/SplitNumberVectorFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/SplitNumberVectorFilter.java
index 8146bd5b..6ac046ec 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/SplitNumberVectorFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/SplitNumberVectorFilter.java
@@ -35,8 +35,7 @@ import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.ListEachConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntListParameter;
@@ -67,17 +66,17 @@ public class SplitNumberVectorFilter<V extends NumberVector<?>> implements Objec
@Override
public MultipleObjectsBundle filter(MultipleObjectsBundle objects) {
- if (objects.dataLength() == 0) {
+ if(objects.dataLength() == 0) {
return objects;
}
MultipleObjectsBundle bundle = new MultipleObjectsBundle();
- for (int r = 0; r < objects.metaLength(); r++) {
+ for(int r = 0; r < objects.metaLength(); r++) {
@SuppressWarnings("unchecked")
SimpleTypeInformation<Object> type = (SimpleTypeInformation<Object>) objects.meta(r);
@SuppressWarnings("unchecked")
final List<Object> column = (List<Object>) objects.getColumn(r);
- if (!getInputTypeRestriction().isAssignableFromType(type)) {
+ if(!getInputTypeRestriction().isAssignableFromType(type)) {
bundle.appendColumn(type, column);
continue;
}
@@ -98,16 +97,16 @@ public class SplitNumberVectorFilter<V extends NumberVector<?>> implements Objec
int[] odims = new int[vtype.getDimensionality() - dims.length];
{
int i = 0;
- for (int d = 0; d < vtype.getDimensionality(); d++) {
+ for(int d = 0; d < vtype.getDimensionality(); d++) {
boolean found = false;
- for (int j = 0; j < dims.length; j++) {
- if (dims[j] == d) {
+ for(int j = 0; j < dims.length; j++) {
+ if(dims[j] == d) {
found = true;
break;
}
}
- if (!found) {
- if (i >= odims.length) {
+ if(!found) {
+ if(i >= odims.length) {
throw new AbortException("Dimensionalities not proper!");
}
odims[i] = d;
@@ -116,15 +115,15 @@ public class SplitNumberVectorFilter<V extends NumberVector<?>> implements Objec
}
}
// Splitting scan.
- for (int i = 0; i < objects.dataLength(); i++) {
+ for(int i = 0; i < objects.dataLength(); i++) {
@SuppressWarnings("unchecked")
final V obj = (V) column.get(i);
double[] part1 = new double[dims.length];
double[] part2 = new double[obj.getDimensionality() - dims.length];
- for (int d = 0; d < dims.length; d++) {
+ for(int d = 0; d < dims.length; d++) {
part1[d] = obj.doubleValue(dims[d]);
}
- for (int d = 0; d < odims.length; d++) {
+ for(int d = 0; d < odims.length; d++) {
part2[d] = obj.doubleValue(odims[d]);
}
col1.add(factory.newNumberVector(part1));
@@ -142,7 +141,7 @@ public class SplitNumberVectorFilter<V extends NumberVector<?>> implements Objec
private TypeInformation getInputTypeRestriction() {
// Find maximum dimension requested
int m = dims[0];
- for (int i = 1; i < dims.length; i++) {
+ for(int i = 1; i < dims.length; i++) {
m = Math.max(dims[i], m);
}
return new VectorFieldTypeInformation<>(NumberVector.class, m, Integer.MAX_VALUE);
@@ -170,11 +169,11 @@ public class SplitNumberVectorFilter<V extends NumberVector<?>> implements Objec
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
IntListParameter selectedAttributesP = new IntListParameter(SELECTED_ATTRIBUTES_ID);
- selectedAttributesP.addConstraint(new ListEachConstraint<Integer>(new GreaterEqualConstraint(0)));
- if (config.grab(selectedAttributesP)) {
+ selectedAttributesP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_INT_LIST);
+ if(config.grab(selectedAttributesP)) {
List<Integer> dimensionList = selectedAttributesP.getValue();
dims = new int[dimensionList.size()];
- for (int i = 0; i < dimensionList.size(); i++) {
+ for(int i = 0; i < dimensionList.size(); i++) {
dims[i] = dimensionList.get(i).intValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseCDFNormalization.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseCDFNormalization.java
index 8fd46336..dd86cc5a 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseCDFNormalization.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseCDFNormalization.java
@@ -123,7 +123,7 @@ public class AttributeWiseCDFNormalization<V extends NumberVector<?>> implements
// We iterate over dimensions, this kind of filter needs fast random
// access.
- Adapter<V> adapter = new Adapter<>();
+ Adapter adapter = new Adapter();
for (int d = 0; d < dim; d++) {
adapter.dim = d;
if (estimators.size() == 1) {
@@ -208,50 +208,56 @@ public class AttributeWiseCDFNormalization<V extends NumberVector<?>> implements
return result.toString();
}
- private static class Adapter<V extends NumberVector<?>> implements NumberArrayAdapter<Double, List<V>> {
+ /**
+ * Array adapter class for vectors.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ private static class Adapter implements NumberArrayAdapter<Double, List<? extends NumberVector<?>>> {
/**
* Dimension to process.
*/
-
int dim;
@Override
- public int size(List<V> array) {
+ public int size(List<? extends NumberVector<?>> array) {
return array.size();
}
@Override
- public Double get(List<V> array, int off) throws IndexOutOfBoundsException {
+ public Double get(List<? extends NumberVector<?>> array, int off) throws IndexOutOfBoundsException {
return getDouble(array, off);
}
@Override
- public double getDouble(List<V> array, int off) throws IndexOutOfBoundsException {
+ public double getDouble(List<? extends NumberVector<?>> array, int off) throws IndexOutOfBoundsException {
return array.get(off).doubleValue(dim);
}
@Override
- public float getFloat(List<V> array, int off) throws IndexOutOfBoundsException {
+ public float getFloat(List<? extends NumberVector<?>> array, int off) throws IndexOutOfBoundsException {
return array.get(off).floatValue(dim);
}
@Override
- public int getInteger(List<V> array, int off) throws IndexOutOfBoundsException {
+ public int getInteger(List<? extends NumberVector<?>> array, int off) throws IndexOutOfBoundsException {
return array.get(off).intValue(dim);
}
@Override
- public short getShort(List<V> array, int off) throws IndexOutOfBoundsException {
+ public short getShort(List<? extends NumberVector<?>> array, int off) throws IndexOutOfBoundsException {
return array.get(off).shortValue(dim);
}
@Override
- public long getLong(List<V> array, int off) throws IndexOutOfBoundsException {
+ public long getLong(List<? extends NumberVector<?>> array, int off) throws IndexOutOfBoundsException {
return array.get(off).longValue(dim);
}
@Override
- public byte getByte(List<V> array, int off) throws IndexOutOfBoundsException {
+ public byte getByte(List<? extends NumberVector<?>> array, int off) throws IndexOutOfBoundsException {
return array.get(off).byteValue(dim);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseMinMaxNormalization.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseMinMaxNormalization.java
index 31f72660..47b6db5f 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseMinMaxNormalization.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseMinMaxNormalization.java
@@ -23,8 +23,6 @@ package de.lmu.ifi.dbs.elki.datasource.filter.normalization;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import java.util.ArrayList;
-
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
@@ -38,8 +36,6 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.AllOrNoneMustBeS
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.EqualSizeGlobalConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleListParameter;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ListParameter;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Parameter;
/**
* Class to perform and undo a normalization on real vectors with respect to
@@ -97,24 +93,24 @@ public class AttributeWiseMinMaxNormalization<V extends NumberVector<?>> extends
@Override
protected void prepareProcessInstance(V featureVector) {
// First object? Then initialize.
- if (minima.length == 0 || maxima.length == 0) {
+ if(minima.length == 0 || maxima.length == 0) {
int dimensionality = featureVector.getDimensionality();
minima = new double[dimensionality];
maxima = new double[dimensionality];
- for (int i = 0; i < dimensionality; i++) {
+ for(int i = 0; i < dimensionality; i++) {
maxima[i] = -Double.MAX_VALUE;
minima[i] = Double.MAX_VALUE;
}
}
- if (minima.length != featureVector.getDimensionality()) {
+ if(minima.length != featureVector.getDimensionality()) {
throw new IllegalArgumentException("FeatureVectors differ in length.");
}
- for (int d = 0; d < featureVector.getDimensionality(); d++) {
+ for(int d = 0; d < featureVector.getDimensionality(); d++) {
final double val = featureVector.doubleValue(d);
- if (val > maxima[d]) {
+ if(val > maxima[d]) {
maxima[d] = val;
}
- if (val < minima[d]) {
+ if(val < minima[d]) {
minima[d] = val;
}
}
@@ -123,10 +119,10 @@ public class AttributeWiseMinMaxNormalization<V extends NumberVector<?>> extends
@Override
protected V filterSingleObject(V featureVector) {
double[] values = new double[featureVector.getDimensionality()];
- if (minima.length != featureVector.getDimensionality()) {
+ if(minima.length != featureVector.getDimensionality()) {
throw new IllegalArgumentException("FeatureVectors and given Minima/Maxima differ in length.");
}
- for (int d = 0; d < featureVector.getDimensionality(); d++) {
+ for(int d = 0; d < featureVector.getDimensionality(); d++) {
values[d] = (featureVector.doubleValue(d) - minima[d]) / factor(d);
}
return factory.newNumberVector(values);
@@ -134,13 +130,14 @@ public class AttributeWiseMinMaxNormalization<V extends NumberVector<?>> extends
@Override
public V restore(V featureVector) throws NonNumericFeaturesException {
- if (featureVector.getDimensionality() == maxima.length && featureVector.getDimensionality() == minima.length) {
+ if(featureVector.getDimensionality() == maxima.length && featureVector.getDimensionality() == minima.length) {
double[] values = new double[featureVector.getDimensionality()];
- for (int d = 0; d < featureVector.getDimensionality(); d++) {
+ for(int d = 0; d < featureVector.getDimensionality(); d++) {
values[d] = (featureVector.doubleValue(d) * (factor(d)) + minima[d]);
}
return factory.newNumberVector(values);
- } else {
+ }
+ else {
throw new NonNumericFeaturesException("Attributes cannot be resized: current dimensionality: " + featureVector.getDimensionality() + " former dimensionality: " + maxima.length);
}
}
@@ -166,10 +163,10 @@ public class AttributeWiseMinMaxNormalization<V extends NumberVector<?>> extends
int[] row = linearEquationSystem.getRowPermutations();
int[] col = linearEquationSystem.getColumnPermutations();
- for (int i = 0; i < coeff.length; i++) {
- for (int r = 0; r < coeff.length; r++) {
+ for(int i = 0; i < coeff.length; i++) {
+ for(int r = 0; r < coeff.length; r++) {
double sum = 0.0;
- for (int c = 0; c < coeff[0].length; c++) {
+ for(int c = 0; c < coeff[0].length; c++) {
sum += minima[c] * coeff[row[r]][col[c]] / factor(c);
coeff[row[r]][col[c]] = coeff[row[r]][col[c]] / factor(c);
}
@@ -224,23 +221,16 @@ public class AttributeWiseMinMaxNormalization<V extends NumberVector<?>> extends
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
DoubleListParameter minimaP = new DoubleListParameter(MINIMA_ID, true);
- if (config.grab(minimaP)) {
+ if(config.grab(minimaP)) {
minima = ArrayLikeUtil.toPrimitiveDoubleArray(minimaP.getValue());
}
DoubleListParameter maximaP = new DoubleListParameter(MAXIMA_ID, true);
- if (config.grab(maximaP)) {
+ if(config.grab(maximaP)) {
maxima = ArrayLikeUtil.toPrimitiveDoubleArray(maximaP.getValue());
}
- ArrayList<Parameter<?>> global_1 = new ArrayList<>();
- global_1.add(minimaP);
- global_1.add(maximaP);
- config.checkConstraint(new AllOrNoneMustBeSetGlobalConstraint(global_1));
-
- ArrayList<ListParameter<?>> global = new ArrayList<>();
- global.add(minimaP);
- global.add(maximaP);
- config.checkConstraint(new EqualSizeGlobalConstraint(global));
+ config.checkConstraint(new AllOrNoneMustBeSetGlobalConstraint(minimaP, maximaP));
+ config.checkConstraint(new EqualSizeGlobalConstraint(minimaP, maximaP));
}
@Override
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseVarianceNormalization.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseVarianceNormalization.java
index 072d1a68..a24cae25 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseVarianceNormalization.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseVarianceNormalization.java
@@ -23,8 +23,6 @@ package de.lmu.ifi.dbs.elki.datasource.filter.normalization;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import java.util.ArrayList;
-
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
@@ -40,8 +38,6 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.AllOrNoneMustBeS
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.EqualSizeGlobalConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleListParameter;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ListParameter;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Parameter;
/**
* Class to perform and undo a normalization on real vectors with respect to
@@ -186,6 +182,7 @@ public class AttributeWiseVarianceNormalization<V extends NumberVector<?>> exten
/**
* Restore a single dimension.
+ *
* @param d Dimension
* @param val Value
* @return Normalized value
@@ -280,15 +277,8 @@ public class AttributeWiseVarianceNormalization<V extends NumberVector<?>> exten
}
}
- ArrayList<Parameter<?>> global_1 = new ArrayList<>();
- global_1.add(meanP);
- global_1.add(stddevP);
- config.checkConstraint(new AllOrNoneMustBeSetGlobalConstraint(global_1));
-
- ArrayList<ListParameter<?>> global = new ArrayList<>();
- global.add(meanP);
- global.add(stddevP);
- config.checkConstraint(new EqualSizeGlobalConstraint(global));
+ config.checkConstraint(new AllOrNoneMustBeSetGlobalConstraint(meanP, stddevP));
+ config.checkConstraint(new EqualSizeGlobalConstraint(meanP, stddevP));
}
@Override
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/InverseDocumentFrequencyNormalization.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/InverseDocumentFrequencyNormalization.java
index 94bcb32f..21263890 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/InverseDocumentFrequencyNormalization.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/InverseDocumentFrequencyNormalization.java
@@ -26,9 +26,6 @@ package de.lmu.ifi.dbs.elki.datasource.filter.normalization;
import gnu.trove.iterator.TIntDoubleIterator;
import gnu.trove.map.TIntDoubleMap;
import gnu.trove.map.hash.TIntDoubleHashMap;
-
-import java.util.BitSet;
-
import de.lmu.ifi.dbs.elki.data.SparseNumberVector;
import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
@@ -78,10 +75,10 @@ public class InverseDocumentFrequencyNormalization<V extends SparseNumberVector<
@Override
protected void prepareProcessInstance(V featureVector) {
- BitSet b = featureVector.getNotNullMask();
- for(int i = b.nextSetBit(0); i >= 0; i = b.nextSetBit(i + 1)) {
- if(featureVector.doubleValue(i) >= 0.0) {
- idf.put(i, idf.get(i) + 1);
+ for(int it = featureVector.iter(); featureVector.iterValid(it); it = featureVector.iterAdvance(it)) {
+ if(featureVector.iterDoubleValue(it) >= 0.) {
+ final int dim = featureVector.iterDim(it);
+ idf.put(dim, idf.get(dim) + 1);
}
}
objcnt += 1;
@@ -100,20 +97,20 @@ public class InverseDocumentFrequencyNormalization<V extends SparseNumberVector<
@Override
protected V filterSingleObject(V featureVector) {
- BitSet b = featureVector.getNotNullMask();
TIntDoubleHashMap vals = new TIntDoubleHashMap();
- for(int i = b.nextSetBit(0); i >= 0; i = b.nextSetBit(i + 1)) {
- vals.put(i, (float) (featureVector.doubleValue(i) * idf.get(i)));
+ for(int it = featureVector.iter(); featureVector.iterValid(it); it = featureVector.iterAdvance(it)) {
+ final int dim = featureVector.iterDim(it);
+ vals.put(dim, featureVector.iterDoubleValue(it) * idf.get(dim));
}
return ((SparseNumberVector.Factory<V, ?>) factory).newNumberVector(vals, featureVector.getDimensionality());
}
@Override
public V restore(V featureVector) {
- BitSet b = featureVector.getNotNullMask();
TIntDoubleHashMap vals = new TIntDoubleHashMap();
- for(int i = b.nextSetBit(0); i >= 0; i = b.nextSetBit(i + 1)) {
- vals.put(i, (float) (featureVector.doubleValue(i) / idf.get(i)));
+ for(int it = featureVector.iter(); featureVector.iterValid(it); it = featureVector.iterAdvance(it)) {
+ final int dim = featureVector.iterDim(it);
+ vals.put(dim, featureVector.iterDoubleValue(it) / idf.get(dim));
}
return ((SparseNumberVector.Factory<V, ?>) factory).newNumberVector(vals, featureVector.getDimensionality());
}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/TFIDFNormalization.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/TFIDFNormalization.java
index 5110d6fe..09b73aa4 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/TFIDFNormalization.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/TFIDFNormalization.java
@@ -24,9 +24,6 @@ package de.lmu.ifi.dbs.elki.datasource.filter.normalization;
*/
import gnu.trove.map.hash.TIntDoubleHashMap;
-
-import java.util.BitSet;
-
import de.lmu.ifi.dbs.elki.data.SparseNumberVector;
import de.lmu.ifi.dbs.elki.logging.Logging;
@@ -58,17 +55,17 @@ public class TFIDFNormalization<V extends SparseNumberVector<?>> extends Inverse
@Override
protected V filterSingleObject(V featureVector) {
- BitSet b = featureVector.getNotNullMask();
double sum = 0.0;
- for(int i = b.nextSetBit(0); i >= 0; i = b.nextSetBit(i + 1)) {
- sum += featureVector.doubleValue(i);
+ for(int it = featureVector.iter(); featureVector.iterValid(it); it = featureVector.iterAdvance(it)) {
+ sum += featureVector.iterDoubleValue(it);
}
if(sum <= 0) {
sum = 1.0;
}
TIntDoubleHashMap vals = new TIntDoubleHashMap();
- for(int i = b.nextSetBit(0); i >= 0; i = b.nextSetBit(i + 1)) {
- vals.put(i, (float) (featureVector.doubleValue(i) / sum * idf.get(i)));
+ for(int it = featureVector.iter(); featureVector.iterValid(it); it = featureVector.iterAdvance(it)) {
+ final int dim = featureVector.iterDim(it);
+ vals.put(dim, featureVector.iterDoubleValue(it) / sum * idf.get(dim));
}
return ((SparseNumberVector.Factory<V, ?>) factory).newNumberVector(vals, featureVector.getDimensionality());
}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/AbstractSupervisedProjectionVectorFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/AbstractSupervisedProjectionVectorFilter.java
index 742eb977..462db9eb 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/AbstractSupervisedProjectionVectorFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/AbstractSupervisedProjectionVectorFilter.java
@@ -46,7 +46,7 @@ import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayLikeUtil;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -62,7 +62,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
*/
public abstract class AbstractSupervisedProjectionVectorFilter<V extends NumberVector<?>> implements ObjectFilter {
/**
- * r: the dimension to which the data should be reduced
+ * The dimensionality to which the data should be reduced.
*/
protected int tdim;
@@ -79,23 +79,23 @@ public abstract class AbstractSupervisedProjectionVectorFilter<V extends NumberV
@Override
public MultipleObjectsBundle filter(MultipleObjectsBundle objects) {
final int dataLength = objects.dataLength();
- if (dataLength == 0) {
+ if(dataLength == 0) {
return objects;
}
List<? extends ClassLabel> classcolumn = null;
// First of all, identify a class label column.
- for (int r = 0; r < objects.metaLength(); r++) {
+ for(int r = 0; r < objects.metaLength(); r++) {
SimpleTypeInformation<?> type = objects.meta(r);
List<?> column = objects.getColumn(r);
- if (TypeUtil.CLASSLABEL.isAssignableFromType(type)) {
+ if(TypeUtil.CLASSLABEL.isAssignableFromType(type)) {
@SuppressWarnings("unchecked")
final List<? extends ClassLabel> castcolumn = (List<? extends ClassLabel>) column;
classcolumn = castcolumn;
break;
}
}
- if (classcolumn == null) {
+ if(classcolumn == null) {
getLogger().warning("No class label column found (try " + ClassLabelFilter.class.getSimpleName() + ") -- cannot run " + this.getClass().getSimpleName());
return objects;
}
@@ -103,10 +103,10 @@ public abstract class AbstractSupervisedProjectionVectorFilter<V extends NumberV
boolean somesuccess = false;
MultipleObjectsBundle bundle = new MultipleObjectsBundle();
// Secondly, look for columns to train the projection on.
- for (int r = 0; r < objects.metaLength(); r++) {
+ for(int r = 0; r < objects.metaLength(); r++) {
SimpleTypeInformation<?> type = objects.meta(r);
List<?> column = objects.getColumn(r);
- if (!TypeUtil.NUMBER_VECTOR_FIELD.isAssignableFromType(type)) {
+ if(!TypeUtil.NUMBER_VECTOR_FIELD.isAssignableFromType(type)) {
bundle.appendColumn(type, column);
continue;
}
@@ -117,8 +117,8 @@ public abstract class AbstractSupervisedProjectionVectorFilter<V extends NumberV
NumberVector.Factory<V, ?> factory = (NumberVector.Factory<V, ?>) vtype.getFactory();
int dim = vtype.getDimensionality();
- if (tdim > dim) {
- if (getLogger().isVerbose()) {
+ if(tdim > dim) {
+ if(getLogger().isVerbose()) {
getLogger().verbose("Setting projection dimension to original dimension: projection dimension: " + tdim + " larger than original dimension: " + dim);
}
tdim = dim;
@@ -126,21 +126,22 @@ public abstract class AbstractSupervisedProjectionVectorFilter<V extends NumberV
try {
Matrix proj = computeProjectionMatrix(vectorcolumn, classcolumn, dim);
- for (int i = 0; i < dataLength; i++) {
+ for(int i = 0; i < dataLength; i++) {
final Vector pv = proj.times(vectorcolumn.get(i).getColumnVector());
V filteredObj = factory.newNumberVector(pv, ArrayLikeUtil.VECTORADAPTER);
vectorcolumn.set(i, filteredObj);
}
bundle.appendColumn(convertedType(type, factory), column);
somesuccess = true;
- } catch (Exception e) {
+ }
+ catch(Exception e) {
getLogger().error("Projection failed -- continuing with unprojected data!", e);
bundle.appendColumn(type, column);
continue;
}
}
- if (!somesuccess) {
+ if(!somesuccess) {
getLogger().warning("No vector field of fixed dimensionality found.");
return objects;
}
@@ -179,15 +180,15 @@ public abstract class AbstractSupervisedProjectionVectorFilter<V extends NumberV
* Partition the bundle based on the class label.
*
* @param classcolumn
- * @return
+ * @return Partitioned data set.
*/
protected <O> Map<O, TIntList> partition(List<? extends O> classcolumn) {
Map<O, TIntList> classes = new HashMap<>();
Iterator<? extends O> iter = classcolumn.iterator();
- for (int i = 0; iter.hasNext(); i++) {
+ for(int i = 0; iter.hasNext(); i++) {
O lbl = iter.next();
TIntList ids = classes.get(lbl);
- if (ids == null) {
+ if(ids == null) {
ids = new TIntArrayList();
classes.put(lbl, ids);
}
@@ -220,9 +221,9 @@ public abstract class AbstractSupervisedProjectionVectorFilter<V extends NumberV
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
IntParameter dimP = new IntParameter(P_ID, 2);
- dimP.addConstraint(new GreaterConstraint(0));
+ dimP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
- if (config.grab(dimP)) {
+ if(config.grab(dimP)) {
tdim = dimP.getValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/NumberVectorFeatureSelectionFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/NumberVectorFeatureSelectionFilter.java
index 720c88df..e6d0d15d 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/NumberVectorFeatureSelectionFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/NumberVectorFeatureSelectionFilter.java
@@ -35,8 +35,7 @@ import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
import de.lmu.ifi.dbs.elki.datasource.filter.AbstractVectorStreamConversionFilter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.ListEachConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntListParameter;
@@ -150,11 +149,11 @@ public class NumberVectorFeatureSelectionFilter<V extends NumberVector<?>> exten
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
IntListParameter selectedAttributesP = new IntListParameter(SELECTED_ATTRIBUTES_ID);
- selectedAttributesP.addConstraint(new ListEachConstraint<Integer>(new GreaterEqualConstraint(0)));
- if (config.grab(selectedAttributesP)) {
+ selectedAttributesP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_INT_LIST);
+ if(config.grab(selectedAttributesP)) {
selectedAttributes = new BitSet();
List<Integer> dimensionList = selectedAttributesP.getValue();
- for (int d : dimensionList) {
+ for(int d : dimensionList) {
selectedAttributes.set(d);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/NumberVectorRandomFeatureSelectionFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/NumberVectorRandomFeatureSelectionFilter.java
index 9b1ddbff..4086270c 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/NumberVectorRandomFeatureSelectionFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/NumberVectorRandomFeatureSelectionFilter.java
@@ -36,7 +36,7 @@ import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
import de.lmu.ifi.dbs.elki.utilities.Util;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
@@ -106,7 +106,7 @@ public class NumberVectorRandomFeatureSelectionFilter<V extends NumberVector<?>>
*/
void initializeRandomAttributes(SimpleTypeInformation<V> in) {
int d = ((VectorFieldTypeInformation<V>) in).getDimensionality();
- selectedAttributes = Util.randomBitSet(k, d, rnd.getRandom());
+ selectedAttributes = Util.randomBitSet(k, d, rnd.getSingleThreadedRandom());
}
/**
@@ -156,12 +156,12 @@ public class NumberVectorRandomFeatureSelectionFilter<V extends NumberVector<?>>
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
IntParameter kP = new IntParameter(NUMBER_SELECTED_ATTRIBUTES_ID, 1);
- kP.addConstraint(new GreaterEqualConstraint(1));
- if (config.grab(kP)) {
+ kP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
+ if(config.grab(kP)) {
k = kP.getValue().intValue();
}
RandomParameter rndP = new RandomParameter(SEED_ID);
- if (config.grab(rndP)) {
+ if(config.grab(rndP)) {
rnd = rndP.getValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/parser/AbstractParser.java b/src/de/lmu/ifi/dbs/elki/datasource/parser/AbstractParser.java
index 1e689638..e8201db1 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/parser/AbstractParser.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/parser/AbstractParser.java
@@ -23,15 +23,11 @@ package de.lmu.ifi.dbs.elki.datasource.parser;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import java.util.ArrayList;
-import java.util.List;
-import java.util.regex.Matcher;
import java.util.regex.Pattern;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.StringLengthConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.PatternParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.StringParameter;
@@ -41,6 +37,9 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.StringParameter;
* options.
*
* @author Arthur Zimek
+ * @author Erich Schubert
+ *
+ * @apiviz.composedOf Tokenizer
*/
public abstract class AbstractParser {
/**
@@ -51,7 +50,7 @@ public abstract class AbstractParser {
/**
* A quote pattern
*/
- public static final char QUOTE_CHAR = '\"';
+ public static final String QUOTE_CHARS = "\"'";
/**
* A pattern catching most numbers that can be parsed using
@@ -73,96 +72,38 @@ public abstract class AbstractParser {
public static final String ATTRIBUTE_CONCATENATION = " ";
/**
- * Stores the column separator pattern
- */
- private Pattern colSep = null;
-
- /**
- * Stores the quotation character
+ * Comment pattern.
*/
- protected char quoteChar = QUOTE_CHAR;
+ protected Pattern comment = null;
/**
- * Comment pattern.
+ * String tokenizer.
*/
- protected Pattern comment = null;
+ protected Tokenizer tokenizer;
/**
* Constructor.
*
* @param colSep Column separator
- * @param quoteChar Quote character
+ * @param quoteChars Quote character
* @param comment Comment pattern
*/
- public AbstractParser(Pattern colSep, char quoteChar, Pattern comment) {
+ public AbstractParser(Pattern colSep, String quoteChars, Pattern comment) {
super();
- this.colSep = colSep;
- this.quoteChar = quoteChar;
+ this.tokenizer = new Tokenizer(colSep, quoteChars);
this.comment = comment;
}
- /**
- * Tokenize a string. Works much like colSep.split() except it honors
- * quotation characters.
- *
- * @param input Input string
- * @return Tokenized string
- */
- protected List<String> tokenize(String input) {
- ArrayList<String> matchList = new ArrayList<>();
- Matcher m = colSep.matcher(input);
-
- int index = 0;
- boolean inquote = (input.length() > 0) && (input.charAt(0) == quoteChar);
- while (m.find()) {
- // Quoted code path vs. regular code path
- if (inquote && m.start() > 0) {
- // Closing quote found?
- if (m.start() > index + 1 && input.charAt(m.start() - 1) == quoteChar) {
- // Strip quote characters
- if (index + 1 < m.start() - 1) {
- matchList.add(input.substring(index + 1, m.start() - 1));
- }
- // Seek past
- index = m.end();
- // new quote?
- inquote = (index < input.length()) && (input.charAt(index) == quoteChar);
- }
- } else {
- // Add match before separator
- if (index < m.start()) {
- matchList.add(input.substring(index, m.start()));
- }
- // Seek past separator
- index = m.end();
- // new quote?
- inquote = (index < input.length()) && (input.charAt(index) == quoteChar);
+ public static int lengthWithoutLinefeed(String line) {
+ int length = line.length();
+ while(length > 0) {
+ char last = line.charAt(length - 1);
+ if(last != '\n' && last != '\r') {
+ break;
}
+ --length;
}
- // Nothing found - return original string.
- if (index == 0) {
- matchList.add(input);
- return matchList;
- }
- // Add tail after last separator.
- if (inquote) {
- if (input.charAt(input.length() - 1) == quoteChar) {
- if (index + 1 < input.length() - 1) {
- matchList.add(input.substring(index + 1, input.length() - 1));
- }
- } else {
- getLogger().warning("Invalid quoted line in input: no closing quote found in: " + input);
- if (index < input.length()) {
- matchList.add(input.substring(index, input.length()));
- }
- }
- } else {
- if (index < input.length()) {
- matchList.add(input.substring(index, input.length()));
- }
- }
- // Return
- return matchList;
+ return length;
}
/**
@@ -183,43 +124,6 @@ public abstract class AbstractParser {
}
/**
- * Utility function, which is a bit more robust wrt. parsing double values. In
- * particular: infinite values, and creates fewer objects.
- *
- * @param s String s
- * @return parsed value
- * @throws NumberFormatException
- */
- public static double parseDouble(String s) throws NumberFormatException {
- try {
- return Double.parseDouble(s);
- } catch (NumberFormatException e) {
- int len = s.length();
- if (len > 0) {
- int p = 0;
- char cur = s.charAt(p);
- boolean isNegative = cur == '-';
- if (isNegative && ++p < len) {
- cur = s.charAt(p);
- }
- if (cur == '∞') {
- return isNegative ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY;
- }
- if (len - p == 3 && "Inf".regionMatches(true, 0, s, p, 3)) {
- return isNegative ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY;
- }
- if (len - p == 8 && "Infinity".regionMatches(true, 0, s, p, 8)) {
- return isNegative ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY;
- }
- if (len == 3 && "NaN".equalsIgnoreCase(s)) {
- return Double.NaN;
- }
- }
- throw e;
- }
- }
-
- /**
* Parameterization class.
*
* @author Erich Schubert
@@ -235,9 +139,9 @@ public abstract class AbstractParser {
/**
* OptionID for the quote character parameter (defaults to a double
- * quotation mark as in {@link #QUOTE_CHAR}.
+ * quotation mark as in {@link AbstractParser#QUOTE_CHARS}.
*/
- public static final OptionID QUOTE_ID = new OptionID("parser.quote", "Quotation character. The default is to use a double quote.");
+ public static final OptionID QUOTE_ID = new OptionID("parser.quote", "Quotation characters. By default, both double and single ASCII quotes are accepted.");
/**
* Comment pattern.
@@ -252,7 +156,7 @@ public abstract class AbstractParser {
/**
* Stores the quotation character
*/
- protected char quoteChar = QUOTE_CHAR;
+ protected String quoteChars = QUOTE_CHARS;
/**
* Comment pattern.
@@ -263,16 +167,15 @@ public abstract class AbstractParser {
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
PatternParameter colParam = new PatternParameter(COLUMN_SEPARATOR_ID, DEFAULT_SEPARATOR);
- if (config.grab(colParam)) {
+ if(config.grab(colParam)) {
colSep = colParam.getValue();
}
- StringParameter quoteParam = new StringParameter(QUOTE_ID, String.valueOf(QUOTE_CHAR));
- quoteParam.addConstraint(new StringLengthConstraint(1, 1));
- if (config.grab(quoteParam)) {
- quoteChar = quoteParam.getValue().charAt(0);
+ StringParameter quoteParam = new StringParameter(QUOTE_ID, QUOTE_CHARS);
+ if(config.grab(quoteParam)) {
+ quoteChars = quoteParam.getValue();
}
PatternParameter commentP = new PatternParameter(COMMENT_ID, COMMENT_PATTERN);
- if (config.grab(commentP)) {
+ if(config.grab(commentP)) {
comment = commentP.getValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/parser/AbstractStreamingParser.java b/src/de/lmu/ifi/dbs/elki/datasource/parser/AbstractStreamingParser.java
index 53b4b6e8..a218638a 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/parser/AbstractStreamingParser.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/parser/AbstractStreamingParser.java
@@ -38,11 +38,11 @@ public abstract class AbstractStreamingParser extends AbstractParser implements
* Constructor.
*
* @param colSep Column separator pattern
- * @param quoteChar Quote character
+ * @param quoteChars Quote characters
* @param comment Comment pattern
*/
- public AbstractStreamingParser(Pattern colSep, char quoteChar, Pattern comment) {
- super(colSep, quoteChar, comment);
+ public AbstractStreamingParser(Pattern colSep, String quoteChars, Pattern comment) {
+ super(colSep, quoteChars, comment);
}
@Override
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/parser/ArffParser.java b/src/de/lmu/ifi/dbs/elki/datasource/parser/ArffParser.java
index 718963d1..50714b81 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/parser/ArffParser.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/parser/ArffParser.java
@@ -48,6 +48,7 @@ import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle;
import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.utilities.FormatUtil;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
@@ -125,6 +126,11 @@ public class ArffParser implements Parser {
Pattern magic_class;
/**
+ * (Reused) buffer for building label lists.
+ */
+ ArrayList<String> labels = new ArrayList<>();
+
+ /**
* Constructor.
*
* @param magic_eid Magic to recognize external IDs
@@ -230,7 +236,7 @@ public class ArffParser implements Parser {
nextToken(tokenizer);
if(tokenizer.ttype == StreamTokenizer.TT_WORD) {
if(TypeUtil.NUMBER_VECTOR_FIELD.equals(elkitypes[targ[dim]])) {
- map.put(dim, AbstractParser.parseDouble(tokenizer.sval));
+ map.put(dim, FormatUtil.parseDouble(tokenizer.sval));
}
else {
map.put(dim, tokenizer.sval);
@@ -270,7 +276,7 @@ public class ArffParser implements Parser {
}
else if(TypeUtil.LABELLIST.equals(elkitypes[out])) {
// Build a label list out of successive labels
- LabelList ll = new LabelList(1);
+ labels.clear();
for(TIntObjectIterator<Object> iter = map.iterator(); iter.hasNext();) {
iter.advance();
int i = iter.key();
@@ -281,12 +287,12 @@ public class ArffParser implements Parser {
break;
}
String v = (String) iter.value();
- if(ll.size() < i - s) {
+ if(labels.size() < i - s) {
LOG.warning("Sparse consecutive labels are currently not correctly supported.");
}
- ll.add(v);
+ labels.add(v);
}
- data[out] = ll;
+ data[out] = LabelList.make(labels);
}
else if(TypeUtil.EXTERNALID.equals(elkitypes[out])) {
String val = (String) map.get(s);
@@ -327,7 +333,7 @@ public class ArffParser implements Parser {
}
else if(tokenizer.ttype == StreamTokenizer.TT_WORD) {
try {
- cur[k] = AbstractParser.parseDouble(tokenizer.sval);
+ cur[k] = FormatUtil.parseDouble(tokenizer.sval);
}
catch(NumberFormatException e) {
throw new AbortException("Expected number value, got: " + tokenizer.sval);
@@ -342,15 +348,15 @@ public class ArffParser implements Parser {
}
else if(TypeUtil.LABELLIST.equals(etyp[out])) {
// Build a label list out of successive labels
- LabelList ll = new LabelList(dimsize[out]);
+ labels.clear();
for(int k = 0; k < dimsize[out]; k++) {
if(tokenizer.ttype != StreamTokenizer.TT_WORD) {
throw new AbortException("Expected word token, got: " + tokenizer.toString());
}
- ll.add(tokenizer.sval);
+ labels.add(tokenizer.sval);
nextToken(tokenizer);
}
- data[out] = ll;
+ data[out] = LabelList.make(labels);
}
else if(TypeUtil.EXTERNALID.equals(etyp[out])) {
if(tokenizer.ttype != StreamTokenizer.TT_WORD) {
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/parser/BitVectorLabelParser.java b/src/de/lmu/ifi/dbs/elki/datasource/parser/BitVectorLabelParser.java
index 07019040..26bc38af 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/parser/BitVectorLabelParser.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/parser/BitVectorLabelParser.java
@@ -28,10 +28,10 @@ import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
+import java.util.BitSet;
import java.util.List;
import java.util.regex.Pattern;
-import de.lmu.ifi.dbs.elki.data.Bit;
import de.lmu.ifi.dbs.elki.data.BitVector;
import de.lmu.ifi.dbs.elki.data.LabelList;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
@@ -64,11 +64,11 @@ public class BitVectorLabelParser extends AbstractParser implements Parser {
* Constructor.
*
* @param colSep Column separator
- * @param quoteChar Quotation character
+ * @param quoteChars Quotation character
* @param comment Comment pattern
*/
- public BitVectorLabelParser(Pattern colSep, char quoteChar, Pattern comment) {
- super(colSep, quoteChar, comment);
+ public BitVectorLabelParser(Pattern colSep, String quoteChars, Pattern comment) {
+ super(colSep, quoteChars, comment);
}
@Override
@@ -78,38 +78,40 @@ public class BitVectorLabelParser extends AbstractParser implements Parser {
int dimensionality = -1;
List<BitVector> vectors = new ArrayList<>();
List<LabelList> labels = new ArrayList<>();
+ ArrayList<String> ll = new ArrayList<>();
try {
- for (String line; (line = reader.readLine()) != null; lineNumber++) {
+ for(String line; (line = reader.readLine()) != null; lineNumber++) {
// Skip empty lines and comments
- if (line.length() <= 0 || (comment != null && comment.matcher(line).matches())) {
+ if(line.length() <= 0 || (comment != null && comment.matcher(line).matches())) {
continue;
}
- List<String> entries = tokenize(line);
- // FIXME: use more efficient storage right away?
- List<Bit> attributes = new ArrayList<>();
- LabelList ll = null;
- for (String entry : entries) {
+ BitSet bitSet = new BitSet();
+ ll.clear();
+ int i = 0;
+ for(tokenizer.initialize(line, 0, lengthWithoutLinefeed(line)); tokenizer.valid(); tokenizer.advance()) {
try {
- Bit attribute = Bit.valueOf(entry);
- attributes.add(attribute);
- } catch (NumberFormatException e) {
- if (ll == null) {
- ll = new LabelList(1);
+ if(tokenizer.getLongBase10() > 0) {
+ bitSet.set(i);
}
- ll.add(entry);
+ ++i;
+ }
+ catch(NumberFormatException e) {
+ ll.add(tokenizer.getSubstring());
}
}
- if (dimensionality < 0) {
- dimensionality = attributes.size();
- } else if (dimensionality != attributes.size()) {
+ if(dimensionality < 0) {
+ dimensionality = i;
+ }
+ else if(dimensionality != i) {
throw new IllegalArgumentException("Differing dimensionality in line " + lineNumber + ".");
}
- vectors.add(new BitVector(attributes.toArray(new Bit[attributes.size()])));
- labels.add(ll);
+ vectors.add(new BitVector(bitSet, dimensionality));
+ labels.add(LabelList.make(ll));
}
- } catch (IOException e) {
+ }
+ catch(IOException e) {
throw new IllegalArgumentException("Error while parsing line " + lineNumber + ".");
}
return MultipleObjectsBundle.makeSimple(getTypeInformation(dimensionality), vectors, TypeUtil.LABELLIST, labels);
@@ -134,7 +136,7 @@ public class BitVectorLabelParser extends AbstractParser implements Parser {
public static class Parameterizer extends AbstractParser.Parameterizer {
@Override
protected BitVectorLabelParser makeInstance() {
- return new BitVectorLabelParser(colSep, quoteChar, comment);
+ return new BitVectorLabelParser(colSep, quoteChars, comment);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/parser/CategorialDataAsNumberVectorParser.java b/src/de/lmu/ifi/dbs/elki/datasource/parser/CategorialDataAsNumberVectorParser.java
new file mode 100644
index 00000000..3dd49470
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/datasource/parser/CategorialDataAsNumberVectorParser.java
@@ -0,0 +1,161 @@
+package de.lmu.ifi.dbs.elki.datasource.parser;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import gnu.trove.map.hash.TObjectIntHashMap;
+
+import java.util.BitSet;
+import java.util.regex.Pattern;
+
+import de.lmu.ifi.dbs.elki.data.LabelList;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayLikeUtil;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
+
+/**
+ * A very simple parser for categorial data, which will then be encoded as
+ * numbers. This is closely modeled after the number vector parser.
+ *
+ * TODO: specify handling for numerical values.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.landmark
+ * @apiviz.has NumberVector
+ *
+ * @param <V> the type of NumberVector used
+ */
+@Description("This parser expects data in roughly the same format as the NumberVectorLabelParser,\n"//
+ + "except that it will enumerate all unique strings to always produce numerical values.\n"//
+ + "This way, it can for example handle files that contain lines like 'y,n,y,y,n,y,n'.")
+public class CategorialDataAsNumberVectorParser<V extends NumberVector<?>> extends NumberVectorLabelParser<V> {
+ /**
+ * Logging class.
+ */
+ private static final Logging LOG = Logging.getLogger(CategorialDataAsNumberVectorParser.class);
+
+ /**
+ * For String unification.
+ */
+ TObjectIntHashMap<String> unique = new TObjectIntHashMap<>();
+
+ /**
+ * Base for enumerating unique values.
+ */
+ int ustart = Math.max(unique.getNoEntryValue() + 1, 1);
+
+ /**
+ * Pattern for NaN values.
+ */
+ Pattern nanpattern = Pattern.compile("\\?");
+
+ /**
+ * Constructor with defaults.
+ *
+ * @param factory Vector factory
+ */
+ public CategorialDataAsNumberVectorParser(NumberVector.Factory<V, ?> factory) {
+ this(Pattern.compile(DEFAULT_SEPARATOR), QUOTE_CHARS, Pattern.compile(COMMENT_PATTERN), null, factory);
+ }
+
+ /**
+ * Constructor.
+ *
+ * @param colSep Column separator
+ * @param quoteChars Quote character
+ * @param comment Comment pattern
+ * @param labelIndices Column indexes that are numeric.
+ * @param factory Vector factory
+ */
+ public CategorialDataAsNumberVectorParser(Pattern colSep, String quoteChars, Pattern comment, BitSet labelIndices, NumberVector.Factory<V, ?> factory) {
+ super(colSep, quoteChars, comment, labelIndices, factory);
+ }
+
+ @Override
+ public Event nextEvent() {
+ Event e = super.nextEvent();
+ if(e == Event.END_OF_STREAM) {
+ unique.clear();
+ }
+ return e;
+ }
+
+ @Override
+ protected void parseLineInternal(String line) {
+ // Split into numerical attributes and labels
+ attributes.reset();
+ labels.clear();
+
+ int i = 0;
+ for(tokenizer.initialize(line, 0, lengthWithoutLinefeed(line)); tokenizer.valid(); tokenizer.advance(), i++) {
+ if(labelIndices == null || !labelIndices.get(i)) {
+ try {
+ double attribute = tokenizer.getDouble();
+ attributes.add(attribute);
+ continue;
+ }
+ catch(NumberFormatException e) {
+ String s = tokenizer.getSubstring();
+ if(nanpattern.matcher(s).matches()) {
+ attributes.add(Double.NaN);
+ continue;
+ }
+ int id = unique.get(s);
+ if(id == unique.getNoEntryValue()) {
+ id = ustart + unique.size();
+ unique.put(s, id);
+ }
+ attributes.add(id);
+ continue;
+ }
+ }
+ // Else: labels.
+ haslabels = true;
+ labels.add(tokenizer.getSubstring());
+ }
+ // Pass outside via class variables
+ curvec = createDBObject(attributes, ArrayLikeUtil.TDOUBLELISTADAPTER);
+ curlbl = LabelList.make(labels);
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer<V extends NumberVector<?>> extends NumberVectorLabelParser.Parameterizer<V> {
+ @Override
+ protected CategorialDataAsNumberVectorParser<V> makeInstance() {
+ return new CategorialDataAsNumberVectorParser<>(colSep, quoteChars, comment, labelIndices, factory);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/parser/DoubleVectorLabelParser.java b/src/de/lmu/ifi/dbs/elki/datasource/parser/DoubleVectorLabelParser.java
index b95dce74..bf84f2ce 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/parser/DoubleVectorLabelParser.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/parser/DoubleVectorLabelParser.java
@@ -63,19 +63,19 @@ public class DoubleVectorLabelParser extends NumberVectorLabelParser<DoubleVecto
* Constructor.
*
* @param colSep Column separator
- * @param quoteChar Quotation character
+ * @param quoteChars Quotation character
* @param comment Comment pattern
* @param labelIndices Indices to use as labels
*/
- public DoubleVectorLabelParser(Pattern colSep, char quoteChar, Pattern comment, BitSet labelIndices) {
- super(colSep, quoteChar, comment, labelIndices, DoubleVector.FACTORY);
+ public DoubleVectorLabelParser(Pattern colSep, String quoteChars, Pattern comment, BitSet labelIndices) {
+ super(colSep, quoteChars, comment, labelIndices, DoubleVector.FACTORY);
}
/**
* Constructor with default values.
*/
public DoubleVectorLabelParser() {
- this(Pattern.compile(DEFAULT_SEPARATOR), QUOTE_CHAR, Pattern.compile(COMMENT_PATTERN), new BitSet());
+ this(Pattern.compile(DEFAULT_SEPARATOR), QUOTE_CHARS, Pattern.compile(COMMENT_PATTERN), new BitSet());
}
@Override
@@ -98,7 +98,7 @@ public class DoubleVectorLabelParser extends NumberVectorLabelParser<DoubleVecto
@Override
protected DoubleVectorLabelParser makeInstance() {
- return new DoubleVectorLabelParser(colSep, quoteChar, comment, labelIndices);
+ return new DoubleVectorLabelParser(colSep, quoteChars, comment, labelIndices);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/parser/FloatVectorLabelParser.java b/src/de/lmu/ifi/dbs/elki/datasource/parser/FloatVectorLabelParser.java
index 71b65cfc..6d800cd8 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/parser/FloatVectorLabelParser.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/parser/FloatVectorLabelParser.java
@@ -65,12 +65,12 @@ public class FloatVectorLabelParser extends NumberVectorLabelParser<FloatVector>
* Constructor.
*
* @param colSep Column separator
- * @param quoteChar Quotation character
+ * @param quoteChars Quotation character
* @param comment Comment pattern
* @param labelIndices Indices to use as labels
*/
- public FloatVectorLabelParser(Pattern colSep, char quoteChar, Pattern comment, BitSet labelIndices) {
- super(colSep, quoteChar, comment, labelIndices, FloatVector.FACTORY);
+ public FloatVectorLabelParser(Pattern colSep, String quoteChars, Pattern comment, BitSet labelIndices) {
+ super(colSep, quoteChars, comment, labelIndices, FloatVector.FACTORY);
}
@Override
@@ -93,7 +93,7 @@ public class FloatVectorLabelParser extends NumberVectorLabelParser<FloatVector>
@Override
protected FloatVectorLabelParser makeInstance() {
- return new FloatVectorLabelParser(colSep, quoteChar, comment, labelIndices);
+ return new FloatVectorLabelParser(colSep, quoteChars, comment, labelIndices);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/parser/NumberVectorLabelParser.java b/src/de/lmu/ifi/dbs/elki/datasource/parser/NumberVectorLabelParser.java
index 39da752b..3fe4af09 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/parser/NumberVectorLabelParser.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/parser/NumberVectorLabelParser.java
@@ -29,8 +29,9 @@ import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
+import java.util.ArrayList;
import java.util.BitSet;
-import java.util.Iterator;
+import java.util.HashMap;
import java.util.List;
import java.util.regex.Pattern;
@@ -79,25 +80,6 @@ public class NumberVectorLabelParser<V extends NumberVector<?>> extends Abstract
private static final Logging LOG = Logging.getLogger(NumberVectorLabelParser.class);
/**
- * A comma separated list of the indices of labels (may be numeric), counting
- * whitespace separated entries in a line starting with 0. The corresponding
- * entries will be treated as a label.
- * <p>
- * Key: {@code -parser.labelIndices}
- * </p>
- */
- public static final OptionID LABEL_INDICES_ID = new OptionID("parser.labelIndices", "A comma separated list of the indices of labels (may be numeric), counting whitespace separated entries in a line starting with 0. The corresponding entries will be treated as a label.");
-
- /**
- * Parameter to specify the type of vectors to produce.
- * <p>
- * Key: {@code -parser.vector-type}<br />
- * Default: DoubleVector
- * </p>
- */
- public static final OptionID VECTOR_TYPE_ID = new OptionID("parser.vector-type", "The type of vectors to create for numerical attributes.");
-
- /**
* Keeps the indices of the attributes to be treated as a string label.
*/
protected BitSet labelIndices;
@@ -138,6 +120,11 @@ public class NumberVectorLabelParser<V extends NumberVector<?>> extends Abstract
protected BitSet labelcolumns = null;
/**
+ * Whether or not the data set has labels.
+ */
+ protected boolean haslabels = false;
+
+ /**
* Current vector.
*/
protected V curvec = null;
@@ -148,6 +135,21 @@ public class NumberVectorLabelParser<V extends NumberVector<?>> extends Abstract
protected LabelList curlbl = null;
/**
+ * (Reused) store for numerical attributes.
+ */
+ final TDoubleArrayList attributes = new TDoubleArrayList();
+
+ /**
+ * (Reused) store for labels.
+ */
+ final ArrayList<String> labels = new ArrayList<>();
+
+ /**
+ * For String unification.
+ */
+ HashMap<String, String> unique = new HashMap<>();
+
+ /**
* Event to report next.
*/
Event nextevent = null;
@@ -158,20 +160,20 @@ public class NumberVectorLabelParser<V extends NumberVector<?>> extends Abstract
* @param factory Vector factory
*/
public NumberVectorLabelParser(NumberVector.Factory<V, ?> factory) {
- this(Pattern.compile(DEFAULT_SEPARATOR), QUOTE_CHAR, Pattern.compile(COMMENT_PATTERN), null, factory);
+ this(Pattern.compile(DEFAULT_SEPARATOR), QUOTE_CHARS, Pattern.compile(COMMENT_PATTERN), null, factory);
}
/**
* Constructor.
*
* @param colSep Column separator
- * @param quoteChar Quote character
+ * @param quoteChars Quote character
* @param comment Comment pattern
* @param labelIndices Column indexes that are numeric.
* @param factory Vector factory
*/
- public NumberVectorLabelParser(Pattern colSep, char quoteChar, Pattern comment, BitSet labelIndices, NumberVector.Factory<V, ?> factory) {
- super(colSep, quoteChar, comment);
+ public NumberVectorLabelParser(Pattern colSep, String quoteChars, Pattern comment, BitSet labelIndices, NumberVector.Factory<V, ?> factory) {
+ super(colSep, quoteChars, comment);
this.labelIndices = labelIndices;
this.factory = factory;
}
@@ -183,8 +185,9 @@ public class NumberVectorLabelParser<V extends NumberVector<?>> extends Abstract
mindim = Integer.MAX_VALUE;
maxdim = 0;
columnnames = null;
+ haslabels = false;
labelcolumns = new BitSet();
- if (labelIndices != null) {
+ if(labelIndices != null) {
labelcolumns.or(labelIndices);
}
}
@@ -196,36 +199,31 @@ public class NumberVectorLabelParser<V extends NumberVector<?>> extends Abstract
@Override
public Event nextEvent() {
- if (nextevent != null) {
+ if(nextevent != null) {
Event ret = nextevent;
nextevent = null;
return ret;
}
try {
- for (String line; (line = reader.readLine()) != null; lineNumber++) {
+ for(String line; (line = reader.readLine()) != null; lineNumber++) {
// Skip empty lines and comments
- if (line.length() <= 0 || (comment != null && comment.matcher(line).matches())) {
+ if(line.length() <= 0 || (comment != null && comment.matcher(line).matches())) {
continue;
}
parseLineInternal(line);
// Maybe a header column?
- if (curvec == null) {
+ if(curvec == null) {
continue;
}
final int curdim = curvec.getDimensionality();
- if (maxdim < mindim) {
- mindim = curdim;
- maxdim = curdim;
- buildMeta();
- nextevent = Event.NEXT_OBJECT;
- return Event.META_CHANGED;
- } else if (mindim < curdim || maxdim > curdim) {
+ if(curdim > maxdim || mindim > curdim) {
mindim = Math.min(mindim, curdim);
maxdim = Math.max(maxdim, curdim);
buildMeta();
nextevent = Event.NEXT_OBJECT;
return Event.META_CHANGED;
- } else if (curlbl != null && meta != null && meta.size() == 1) {
+ }
+ else if(curlbl != null && meta != null && meta.size() == 1) {
buildMeta();
nextevent = Event.NEXT_OBJECT;
return Event.META_CHANGED;
@@ -234,8 +232,10 @@ public class NumberVectorLabelParser<V extends NumberVector<?>> extends Abstract
}
reader.close();
reader = null;
+ unique.clear();
return Event.END_OF_STREAM;
- } catch (IOException e) {
+ }
+ catch(IOException e) {
throw new IllegalArgumentException("Error while parsing line " + lineNumber + ".");
}
}
@@ -244,11 +244,12 @@ public class NumberVectorLabelParser<V extends NumberVector<?>> extends Abstract
* Update the meta element.
*/
protected void buildMeta() {
- if (labelcolumns.cardinality() > 0 || (labelIndices != null && labelIndices.cardinality() > 0)) {
+ if(haslabels) {
meta = new BundleMeta(2);
meta.add(getTypeInformation(mindim, maxdim));
meta.add(TypeUtil.LABELLIST);
- } else {
+ }
+ else {
meta = new BundleMeta(1);
meta.add(getTypeInformation(mindim, maxdim));
}
@@ -256,10 +257,10 @@ public class NumberVectorLabelParser<V extends NumberVector<?>> extends Abstract
@Override
public Object data(int rnum) {
- if (rnum == 0) {
+ if(rnum == 0) {
return curvec;
}
- if (rnum == 1) {
+ if(rnum == 1) {
return curlbl;
}
throw new ArrayIndexOutOfBoundsException();
@@ -273,45 +274,48 @@ public class NumberVectorLabelParser<V extends NumberVector<?>> extends Abstract
* @param line Line to process
*/
protected void parseLineInternal(String line) {
- List<String> entries = tokenize(line);
- // Split into numerical attributes and labels
- TDoubleArrayList attributes = new TDoubleArrayList(entries.size());
- LabelList labels = null;
+ attributes.reset();
+ labels.clear();
- Iterator<String> itr = entries.iterator();
- for (int i = 0; itr.hasNext(); i++) {
- String ent = itr.next();
- if (labelIndices == null || !labelIndices.get(i)) {
+ // Split into numerical attributes and labels
+ int i = 0;
+ for(tokenizer.initialize(line, 0, lengthWithoutLinefeed(line)); tokenizer.valid(); tokenizer.advance(), i++) {
+ if(labelIndices == null || !labelIndices.get(i)) {
try {
- double attribute = parseDouble(ent);
+ double attribute = tokenizer.getDouble();
attributes.add(attribute);
continue;
- } catch (NumberFormatException e) {
+ }
+ catch(NumberFormatException e) {
// Ignore attempt, add to labels below.
labelcolumns.set(i);
}
}
// Else: labels.
- if (labels == null) {
- labels = new LabelList(1);
+ haslabels = true;
+ final String lbl = tokenizer.getSubstring();
+ String u = unique.get(lbl);
+ if(u == null) {
+ u = lbl;
+ unique.put(u, u);
}
- // Make a new string, to not keep the whole file in memory!
- labels.add(new String(ent));
+ labels.add(u);
}
// Maybe a label row?
- if (lineNumber == 1 && attributes.size() == 0) {
- columnnames = labels;
+ if(lineNumber == 1 && attributes.size() == 0) {
+ columnnames = new ArrayList<>(labels);
labelcolumns.clear();
- if (labelIndices != null) {
+ if(labelIndices != null) {
labelcolumns.or(labelIndices);
}
curvec = null;
curlbl = null;
+ haslabels = false;
return;
}
// Pass outside via class variables
curvec = createDBObject(attributes, ArrayLikeUtil.TDOUBLELISTADAPTER);
- curlbl = labels;
+ curlbl = LabelList.make(labels);
}
/**
@@ -334,13 +338,13 @@ public class NumberVectorLabelParser<V extends NumberVector<?>> extends Abstract
* @return Prototype object
*/
SimpleTypeInformation<V> getTypeInformation(int mindim, int maxdim) {
- if (mindim == maxdim) {
+ if(mindim == maxdim) {
String[] colnames = null;
- if (columnnames != null) {
- if (columnnames.size() - labelcolumns.cardinality() == mindim) {
+ if(columnnames != null) {
+ if(columnnames.size() - labelcolumns.cardinality() == mindim) {
colnames = new String[mindim];
- for (int i = 0, j = 0; i < columnnames.size(); i++) {
- if (!labelcolumns.get(i)) {
+ for(int i = 0, j = 0; i < columnnames.size(); i++) {
+ if(!labelcolumns.get(i)) {
colnames[j] = columnnames.get(i);
j++;
}
@@ -348,10 +352,12 @@ public class NumberVectorLabelParser<V extends NumberVector<?>> extends Abstract
}
}
return new VectorFieldTypeInformation<>(factory, mindim, colnames);
- } else if (mindim < maxdim) {
+ }
+ else if(mindim < maxdim) {
// Variable dimensionality - return non-vector field type
return new VectorTypeInformation<>(factory.getRestrictionClass(), factory.getDefaultSerializer(), mindim, maxdim);
- } else {
+ }
+ else {
throw new AbortException("No vectors were read from the input file - cannot determine vector data type.");
}
}
@@ -370,6 +376,25 @@ public class NumberVectorLabelParser<V extends NumberVector<?>> extends Abstract
*/
public static class Parameterizer<V extends NumberVector<?>> extends AbstractParser.Parameterizer {
/**
+ * A comma separated list of the indices of labels (may be numeric),
+ * counting whitespace separated entries in a line starting with 0. The
+ * corresponding entries will be treated as a label.
+ * <p>
+ * Key: {@code -parser.labelIndices}
+ * </p>
+ */
+ public static final OptionID LABEL_INDICES_ID = new OptionID("parser.labelIndices", "A comma separated list of the indices of labels (may be numeric), counting whitespace separated entries in a line starting with 0. The corresponding entries will be treated as a label.");
+
+ /**
+ * Parameter to specify the type of vectors to produce.
+ * <p>
+ * Key: {@code -parser.vector-type}<br />
+ * Default: DoubleVector
+ * </p>
+ */
+ public static final OptionID VECTOR_TYPE_ID = new OptionID("parser.vector-type", "The type of vectors to create for numerical attributes.");
+
+ /**
* Keeps the indices of the attributes to be treated as a string label.
*/
protected BitSet labelIndices = null;
@@ -393,7 +418,7 @@ public class NumberVectorLabelParser<V extends NumberVector<?>> extends Abstract
*/
protected void getFactory(Parameterization config) {
ObjectParameter<NumberVector.Factory<V, ?>> factoryP = new ObjectParameter<>(VECTOR_TYPE_ID, NumberVector.Factory.class, DoubleVector.Factory.class);
- if (config.grab(factoryP)) {
+ if(config.grab(factoryP)) {
factory = factoryP.instantiateClass(config);
}
}
@@ -406,10 +431,10 @@ public class NumberVectorLabelParser<V extends NumberVector<?>> extends Abstract
protected void getLabelIndices(Parameterization config) {
IntListParameter labelIndicesP = new IntListParameter(LABEL_INDICES_ID, true);
- if (config.grab(labelIndicesP)) {
+ if(config.grab(labelIndicesP)) {
labelIndices = new BitSet();
List<Integer> labelcols = labelIndicesP.getValue();
- for (Integer idx : labelcols) {
+ for(Integer idx : labelcols) {
labelIndices.set(idx.intValue());
}
}
@@ -417,7 +442,7 @@ public class NumberVectorLabelParser<V extends NumberVector<?>> extends Abstract
@Override
protected NumberVectorLabelParser<V> makeInstance() {
- return new NumberVectorLabelParser<>(colSep, quoteChar, comment, labelIndices, factory);
+ return new NumberVectorLabelParser<>(colSep, quoteChars, comment, labelIndices, factory);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/parser/SimplePolygonParser.java b/src/de/lmu/ifi/dbs/elki/datasource/parser/SimplePolygonParser.java
index a3d46ed8..457a161b 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/parser/SimplePolygonParser.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/parser/SimplePolygonParser.java
@@ -28,7 +28,6 @@ import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
-import java.util.Iterator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -41,7 +40,7 @@ import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.StringLengthConstraint;
+import de.lmu.ifi.dbs.elki.utilities.FormatUtil;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.PatternParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.StringParameter;
@@ -78,11 +77,11 @@ public class SimplePolygonParser extends AbstractParser implements Parser {
* Constructor.
*
* @param colSep Column separator
- * @param quoteChar Quotation character
+ * @param quoteChars Quotation character
* @param comment Comment pattern
*/
- public SimplePolygonParser(Pattern colSep, char quoteChar, Pattern comment) {
- super(colSep, quoteChar, comment);
+ public SimplePolygonParser(Pattern colSep, String quoteChars, Pattern comment) {
+ super(colSep, quoteChars, comment);
}
@Override
@@ -94,17 +93,17 @@ public class SimplePolygonParser extends AbstractParser implements Parser {
List<LabelList> labels = null;
List<ExternalID> eids = new ArrayList<>();
try {
- for (String line; (line = reader.readLine()) != null; lineNumber++) {
+ for(String line; (line = reader.readLine()) != null; lineNumber++) {
// Skip empty lines and comments
- if (line.length() <= 0 || (comment != null && comment.matcher(line).matches())) {
+ if(line.length() <= 0 || (comment != null && comment.matcher(line).matches())) {
continue;
}
Object[] objs = parseLine(line);
polys.add((PolygonsObject) objs[0]);
- if (objs[1] != null) {
- if (labels == null) {
+ if(objs[1] != null) {
+ if(labels == null) {
labels = new ArrayList<>();
- for (int i = 0; i < polys.size() - 1; i++) {
+ for(int i = 0; i < polys.size() - 1; i++) {
labels.add(null);
}
}
@@ -112,13 +111,15 @@ public class SimplePolygonParser extends AbstractParser implements Parser {
}
eids.add((ExternalID) objs[2]);
}
- } catch (IOException e) {
+ }
+ catch(IOException e) {
throw new IllegalArgumentException("Error while parsing line " + lineNumber + ".");
}
- if (labels != null) {
+ if(labels != null) {
return MultipleObjectsBundle.makeSimple(TypeUtil.POLYGON_TYPE, polys, TypeUtil.LABELLIST, labels, TypeUtil.EXTERNALID, eids);
- } else {
+ }
+ else {
return MultipleObjectsBundle.makeSimple(TypeUtil.POLYGON_TYPE, polys, TypeUtil.EXTERNALID, eids);
}
}
@@ -131,56 +132,54 @@ public class SimplePolygonParser extends AbstractParser implements Parser {
* @return Parsed polygon
*/
private Object[] parseLine(String line) {
- List<String> entries = tokenize(line);
- Iterator<String> iter = entries.iterator();
-
ExternalID eid = null;
- LabelList labels = null;
List<Polygon> polys = new ArrayList<>(1);
+ ArrayList<String> labels = new ArrayList<>(); // TODO: reuse?
List<Vector> coords = new ArrayList<>();
- while (iter.hasNext()) {
- String cur = iter.next();
- Matcher m = COORD.matcher(cur);
- if (m.find()) {
+ for(tokenizer.initialize(line, 0, lengthWithoutLinefeed(line)); tokenizer.valid(); tokenizer.advance()) {
+ Matcher m = COORD.matcher(line).region(tokenizer.getStart(), tokenizer.getEnd());
+ if(m.find()) {
try {
- double c1 = Double.parseDouble(m.group(1));
- double c2 = Double.parseDouble(m.group(2));
- if (m.group(3) != null) {
- double c3 = Double.parseDouble(m.group(3));
+ double c1 = FormatUtil.parseDouble(m.group(1));
+ double c2 = FormatUtil.parseDouble(m.group(2));
+ if(m.group(3) != null) {
+ double c3 = FormatUtil.parseDouble(m.group(3));
coords.add(new Vector(new double[] { c1, c2, c3 }));
- } else {
+ }
+ else {
coords.add(new Vector(new double[] { c1, c2 }));
}
continue;
- } catch (NumberFormatException e) {
- LOG.warning("Looked like a coordinate pair but didn't parse: " + cur);
+ }
+ catch(NumberFormatException e) {
+ LOG.warning("Looked like a coordinate pair but didn't parse: " + tokenizer.getSubstring());
}
}
- // Polygon separator.
- if (cur.equals(POLYGON_SEPARATOR)) {
- if (coords.size() > 0) {
+ // Match polygon separator:
+ final int len = tokenizer.getEnd() - tokenizer.getStart();
+ if(POLYGON_SEPARATOR.length() == len && //
+ POLYGON_SEPARATOR.regionMatches(0, line, tokenizer.getStart(), len)) {
+ if(coords.size() > 0) {
polys.add(new Polygon(coords));
coords = new ArrayList<>();
}
continue;
}
+ String cur = tokenizer.getSubstring();
// First label will become the External ID
- if (eid == null) {
+ if(eid == null) {
eid = new ExternalID(cur);
- } else {
- // Label
- if (labels == null) {
- labels = new LabelList(1);
- }
+ }
+ else {
labels.add(cur);
}
}
// Complete polygon
- if (coords.size() > 0) {
+ if(coords.size() > 0) {
polys.add(new Polygon(coords));
}
- return new Object[] { new PolygonsObject(polys), labels, eid };
+ return new Object[] { new PolygonsObject(polys), LabelList.make(labels), eid };
}
@Override
@@ -199,24 +198,23 @@ public class SimplePolygonParser extends AbstractParser implements Parser {
@Override
protected void makeOptions(Parameterization config) {
PatternParameter colParam = new PatternParameter(COLUMN_SEPARATOR_ID, "\\s+");
- if (config.grab(colParam)) {
+ if(config.grab(colParam)) {
colSep = colParam.getValue();
}
- StringParameter quoteParam = new StringParameter(QUOTE_ID, String.valueOf(QUOTE_CHAR));
- quoteParam.addConstraint(new StringLengthConstraint(1, 1));
- if (config.grab(quoteParam)) {
- quoteChar = quoteParam.getValue().charAt(0);
+ StringParameter quoteParam = new StringParameter(QUOTE_ID, QUOTE_CHARS);
+ if(config.grab(quoteParam)) {
+ quoteChars = quoteParam.getValue();
}
PatternParameter commentP = new PatternParameter(COMMENT_ID, COMMENT_PATTERN);
- if (config.grab(commentP)) {
+ if(config.grab(commentP)) {
comment = commentP.getValue();
}
}
@Override
protected SimplePolygonParser makeInstance() {
- return new SimplePolygonParser(colSep, quoteChar, comment);
+ return new SimplePolygonParser(colSep, quoteChars, comment);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/parser/SparseBitVectorLabelParser.java b/src/de/lmu/ifi/dbs/elki/datasource/parser/SparseBitVectorLabelParser.java
index 5f9e5e05..06925e67 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/parser/SparseBitVectorLabelParser.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/parser/SparseBitVectorLabelParser.java
@@ -65,11 +65,11 @@ public class SparseBitVectorLabelParser extends AbstractParser implements Parser
* Constructor.
*
* @param colSep Column separator
- * @param quoteChar Quotation character
+ * @param quoteChars Quotation character
* @param comment Comment pattern
*/
- public SparseBitVectorLabelParser(Pattern colSep, char quoteChar, Pattern comment) {
- super(colSep, quoteChar, comment);
+ public SparseBitVectorLabelParser(Pattern colSep, String quoteChars, Pattern comment) {
+ super(colSep, quoteChars, comment);
}
@Override
@@ -82,40 +82,37 @@ public class SparseBitVectorLabelParser extends AbstractParser implements Parser
try {
List<BitSet> bitSets = new ArrayList<>();
List<LabelList> allLabels = new ArrayList<>();
- for (String line; (line = reader.readLine()) != null; lineNumber++) {
+ ArrayList<String> labels = new ArrayList<>();
+ for(String line; (line = reader.readLine()) != null; lineNumber++) {
// Skip empty lines and comments
- if (line.length() <= 0 || (comment != null && comment.matcher(line).matches())) {
+ if(line.length() <= 0 || (comment != null && comment.matcher(line).matches())) {
continue;
}
- List<String> entries = tokenize(line);
BitSet bitSet = new BitSet();
- LabelList labels = null;
+ labels.clear();
- for (String entry : entries) {
+ for(tokenizer.initialize(line, 0, lengthWithoutLinefeed(line)); tokenizer.valid(); tokenizer.advance()) {
try {
- int index = Integer.parseInt(entry);
+ int index = (int) tokenizer.getLongBase10();
bitSet.set(index);
dimensionality = Math.max(dimensionality, index);
- } catch (NumberFormatException e) {
- if (labels == null) {
- labels = new LabelList(1);
- }
- labels.add(entry);
+ }
+ catch(NumberFormatException e) {
+ labels.add(tokenizer.getSubstring());
}
}
bitSets.add(bitSet);
- allLabels.add(labels);
+ allLabels.add(LabelList.make(labels));
}
- dimensionality++;
- for (int i = 0; i < bitSets.size(); i++) {
- BitSet bitSet = bitSets.get(i);
- LabelList labels = allLabels.get(i);
- vectors.add(new BitVector(bitSet, dimensionality));
- lblc.add(labels);
+ ++dimensionality;
+ for(int i = 0; i < bitSets.size(); i++) {
+ vectors.add(new BitVector(bitSets.get(i), dimensionality));
+ lblc.add(allLabels.get(i));
}
- } catch (IOException e) {
+ }
+ catch(IOException e) {
throw new IllegalArgumentException("Error while parsing line " + lineNumber + ".");
}
return MultipleObjectsBundle.makeSimple(getTypeInformation(dimensionality), vectors, TypeUtil.LABELLIST, lblc);
@@ -140,7 +137,7 @@ public class SparseBitVectorLabelParser extends AbstractParser implements Parser
public static class Parameterizer extends AbstractParser.Parameterizer {
@Override
protected SparseBitVectorLabelParser makeInstance() {
- return new SparseBitVectorLabelParser(colSep, quoteChar, comment);
+ return new SparseBitVectorLabelParser(colSep, quoteChars, comment);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/parser/SparseFloatVectorLabelParser.java b/src/de/lmu/ifi/dbs/elki/datasource/parser/SparseFloatVectorLabelParser.java
index d5fe6219..87efbf55 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/parser/SparseFloatVectorLabelParser.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/parser/SparseFloatVectorLabelParser.java
@@ -65,7 +65,6 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
*
* @deprecated Use {@link SparseNumberVectorLabelParser} instead!
*/
-// FIXME: Maxdim!
@Title("Sparse Float Vector Label Parser")
@Description("Parser for the following line format:\n" + "A single line provides a single point. Entries are separated by whitespace. " + "The values will be parsed as floats (resulting in a set of SparseFloatVectors). A line is expected in the following format: The first entry of each line is the number of attributes with coordinate value not zero. Subsequent entries are of the form (index, value), where index is the number of the corresponding dimension, and value is the value of the corresponding attribute." + "Any pair of two subsequent substrings not containing whitespace is tried to be read as int and float. If this fails for the first of the pair (interpreted ans index), it will be appended to a label. (Thus, any label must not be parseable as Integer.) If the float component is not parseable, an exception will be thrown. Empty lines and lines beginning with \"#\" will be ignored.")
@Deprecated
@@ -74,12 +73,12 @@ public class SparseFloatVectorLabelParser extends SparseNumberVectorLabelParser<
* Constructor.
*
* @param colSep Column separator
- * @param quoteChar Quotation character
+ * @param quoteChars Quotation character
* @param comment Comment pattern
* @param labelIndices Indices to use as labels
*/
- public SparseFloatVectorLabelParser(Pattern colSep, char quoteChar, Pattern comment, BitSet labelIndices) {
- super(colSep, quoteChar, comment, labelIndices, SparseFloatVector.FACTORY);
+ public SparseFloatVectorLabelParser(Pattern colSep, String quoteChars, Pattern comment, BitSet labelIndices) {
+ super(colSep, quoteChars, comment, labelIndices, SparseFloatVector.FACTORY);
}
/**
@@ -92,7 +91,7 @@ public class SparseFloatVectorLabelParser extends SparseNumberVectorLabelParser<
public static class Parameterizer extends SparseNumberVectorLabelParser.Parameterizer<SparseFloatVector> {
@Override
protected SparseFloatVectorLabelParser makeInstance() {
- return new SparseFloatVectorLabelParser(colSep, quoteChar, comment, labelIndices);
+ return new SparseFloatVectorLabelParser(colSep, quoteChars, comment, labelIndices);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/parser/SparseNumberVectorLabelParser.java b/src/de/lmu/ifi/dbs/elki/datasource/parser/SparseNumberVectorLabelParser.java
index bdd8ab77..902d59a9 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/parser/SparseNumberVectorLabelParser.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/parser/SparseNumberVectorLabelParser.java
@@ -25,8 +25,8 @@ package de.lmu.ifi.dbs.elki.datasource.parser;
import gnu.trove.map.hash.TIntDoubleHashMap;
+import java.util.ArrayList;
import java.util.BitSet;
-import java.util.List;
import java.util.regex.Pattern;
import de.lmu.ifi.dbs.elki.data.LabelList;
@@ -92,68 +92,71 @@ public class SparseNumberVectorLabelParser<V extends SparseNumberVector<?>> exte
private SparseNumberVector.Factory<V, ?> sparsefactory;
/**
+ * (Reused) set of values for the number vector.
+ */
+ TIntDoubleHashMap values = new TIntDoubleHashMap();
+
+ /**
+ * (Reused) label buffer.
+ */
+ ArrayList<String> labels = new ArrayList<>();
+
+ /**
* Constructor.
*
* @param colSep Column separator
- * @param quoteChar Quotation character
+ * @param quoteChars Quotation character
* @param comment Comment pattern
* @param labelIndices Indices to use as labels
* @param factory Vector factory
*/
- public SparseNumberVectorLabelParser(Pattern colSep, char quoteChar, Pattern comment, BitSet labelIndices, SparseNumberVector.Factory<V, ?> factory) {
- super(colSep, quoteChar, comment, labelIndices, factory);
+ public SparseNumberVectorLabelParser(Pattern colSep, String quoteChars, Pattern comment, BitSet labelIndices, SparseNumberVector.Factory<V, ?> factory) {
+ super(colSep, quoteChars, comment, labelIndices, factory);
this.sparsefactory = factory;
}
@Override
protected void parseLineInternal(String line) {
- List<String> entries = tokenize(line);
- int cardinality = Integer.parseInt(entries.get(0));
+ tokenizer.initialize(line, 0, lengthWithoutLinefeed(line));
+ int cardinality = (int) tokenizer.getLongBase10();
- TIntDoubleHashMap values = new TIntDoubleHashMap(cardinality, 1);
- LabelList labels = null;
+ values.clear();
+ labels.clear();
int thismax = 0;
-
- for (int i = 1; i < entries.size() - 1; i++) {
- if (labelIndices == null || !labelIndices.get(i)) {
+
+ while(tokenizer.valid()) {
+ if(values.size() < cardinality) {
try {
- int index = Integer.parseInt(entries.get(i));
- if (index >= maxdim) {
- maxdim = index + 1;
- }
- thismax = Math.max(thismax, index);
- double attribute = parseDouble(entries.get(i));
- values.put(index, attribute);
- i++;
- } catch (NumberFormatException e) {
- if (labels == null) {
- labels = new LabelList(1);
+ int index = (int) tokenizer.getLongBase10();
+ tokenizer.advance();
+ // Respect labelIndices.
+ if(labelIndices == null || !labelIndices.get(index)) {
+ double attribute = tokenizer.getDouble();
+ thismax = Math.max(thismax, index + 1);
+ values.put(index, attribute);
+ tokenizer.advance();
+ continue;
}
- labels.add(entries.get(i));
- continue;
}
- } else {
- if (labels == null) {
- labels = new LabelList(1);
+ catch(NumberFormatException e) {
+ // continue with fallback below.
}
- labels.add(entries.get(i));
}
+ // Fallback: treat as label
+ haslabels = true;
+ labels.add(tokenizer.getSubstring());
+ tokenizer.advance();
}
- if (values.size() > maxdim) {
- throw new AbortException("Invalid sparse vector seen: " + line);
- }
- if (thismax < mindim) {
- mindim = thismax;
- }
- curvec = sparsefactory.newNumberVector(values, maxdim);
- curlbl = labels;
+ curvec = sparsefactory.newNumberVector(values, thismax);
+ curlbl = LabelList.make(labels);
}
@Override
protected SimpleTypeInformation<V> getTypeInformation(int mindim, int maxdim) {
- if (mindim == maxdim) {
+ if(mindim == maxdim) {
return new VectorFieldTypeInformation<>(factory, mindim);
- } else if (mindim < maxdim) {
+ }
+ else if(mindim < maxdim) {
return new VectorTypeInformation<>(factory.getRestrictionClass(), factory.getDefaultSerializer(), mindim, maxdim);
}
throw new AbortException("No vectors were read from the input file - cannot determine vector data type.");
@@ -175,14 +178,14 @@ public class SparseNumberVectorLabelParser<V extends SparseNumberVector<?>> exte
@Override
protected void getFactory(Parameterization config) {
ObjectParameter<SparseNumberVector.Factory<V, ?>> factoryP = new ObjectParameter<>(VECTOR_TYPE_ID, SparseNumberVector.Factory.class, SparseFloatVector.Factory.class);
- if (config.grab(factoryP)) {
+ if(config.grab(factoryP)) {
factory = factoryP.instantiateClass(config);
}
}
@Override
protected SparseNumberVectorLabelParser<V> makeInstance() {
- return new SparseNumberVectorLabelParser<>(colSep, quoteChar, comment, labelIndices, (SparseNumberVector.Factory<V, ?>) factory);
+ return new SparseNumberVectorLabelParser<>(colSep, quoteChars, comment, labelIndices, (SparseNumberVector.Factory<V, ?>) factory);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/parser/StringParser.java b/src/de/lmu/ifi/dbs/elki/datasource/parser/StringParser.java
index 41f21c5d..6541b881 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/parser/StringParser.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/parser/StringParser.java
@@ -82,6 +82,7 @@ public class StringParser implements Parser {
int lineNumber = 0;
List<String> data = new ArrayList<>();
List<LabelList> labels = new ArrayList<>();
+ ArrayList<String> ll = new ArrayList<>(1);
try {
for (String line; (line = reader.readLine()) != null; lineNumber++) {
// Skip empty lines and comments
@@ -90,9 +91,9 @@ public class StringParser implements Parser {
}
final String val = trimWhitespace ? line.trim() : line;
data.add(val);
- LabelList ll = new LabelList(1);
+ ll.clear();
ll.add(val);
- labels.add(ll);
+ labels.add(LabelList.make(ll));
}
} catch (IOException e) {
throw new IllegalArgumentException("Error while parsing line " + lineNumber + ".");
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/parser/TermFrequencyParser.java b/src/de/lmu/ifi/dbs/elki/datasource/parser/TermFrequencyParser.java
index 580c5320..f0ccbf50 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/parser/TermFrequencyParser.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/parser/TermFrequencyParser.java
@@ -28,8 +28,8 @@ import gnu.trove.map.TObjectIntMap;
import gnu.trove.map.hash.TIntDoubleHashMap;
import gnu.trove.map.hash.TObjectIntHashMap;
+import java.util.ArrayList;
import java.util.BitSet;
-import java.util.List;
import java.util.regex.Pattern;
import de.lmu.ifi.dbs.elki.data.LabelList;
@@ -84,16 +84,26 @@ public class TermFrequencyParser<V extends SparseNumberVector<?>> extends Number
private SparseNumberVector.Factory<V, ?> sparsefactory;
/**
+ * (Reused) set of values for the number vector.
+ */
+ TIntDoubleHashMap values = new TIntDoubleHashMap();
+
+ /**
+ * (Reused) label buffer.
+ */
+ ArrayList<String> labels = new ArrayList<>();
+
+ /**
* Constructor.
*
* @param normalize Normalize
* @param colSep Column separator
- * @param quoteChar Quotation character
+ * @param quoteChars Quotation character
* @param comment Comment pattern
* @param labelIndices Indices to use as labels
*/
- public TermFrequencyParser(boolean normalize, Pattern colSep, char quoteChar, Pattern comment, BitSet labelIndices, SparseNumberVector.Factory<V, ?> factory) {
- super(colSep, quoteChar, comment, labelIndices, factory);
+ public TermFrequencyParser(boolean normalize, Pattern colSep, String quoteChars, Pattern comment, BitSet labelIndices, SparseNumberVector.Factory<V, ?> factory) {
+ super(colSep, quoteChars, comment, labelIndices, factory);
this.normalize = normalize;
this.keymap = new TObjectIntHashMap<>(1001, .5f, -1);
this.sparsefactory = factory;
@@ -101,21 +111,20 @@ public class TermFrequencyParser<V extends SparseNumberVector<?>> extends Number
@Override
protected void parseLineInternal(String line) {
- List<String> entries = tokenize(line);
-
double len = 0;
- TIntDoubleHashMap values = new TIntDoubleHashMap();
- LabelList labels = null;
+ values.clear();
+ labels.clear();
String curterm = null;
- for (int i = 0; i < entries.size(); i++) {
- if (curterm == null) {
- curterm = entries.get(i);
- } else {
+ for(tokenizer.initialize(line, 0, lengthWithoutLinefeed(line)); tokenizer.valid(); tokenizer.advance()) {
+ if(curterm == null) {
+ curterm = tokenizer.getSubstring();
+ }
+ else {
try {
- double attribute = parseDouble(entries.get(i));
+ double attribute = tokenizer.getDouble();
int curdim = keymap.get(curterm);
- if (curdim < 0) {
+ if(curdim < 0) {
curdim = numterms;
keymap.put(curterm, curdim);
++numterms;
@@ -123,26 +132,21 @@ public class TermFrequencyParser<V extends SparseNumberVector<?>> extends Number
values.put(curdim, attribute);
len += attribute;
curterm = null;
- } catch (NumberFormatException e) {
- if (curterm != null) {
- if (labels == null) {
- labels = new LabelList(1);
- }
+ }
+ catch(NumberFormatException e) {
+ if(curterm != null) {
labels.add(curterm);
}
- curterm = entries.get(i);
+ curterm = tokenizer.getSubstring();
}
}
}
- if (curterm != null) {
- if (labels == null) {
- labels = new LabelList(1);
- }
+ if(curterm != null) {
labels.add(curterm);
}
- if (normalize) {
- if (Math.abs(len - 1.0) > 1E-10 && len > 1E-10) {
- for (TIntDoubleIterator iter = values.iterator(); iter.hasNext();) {
+ if(normalize) {
+ if(Math.abs(len - 1.0) > 1E-10 && len > 1E-10) {
+ for(TIntDoubleIterator iter = values.iterator(); iter.hasNext();) {
iter.advance();
iter.setValue(iter.value() / len);
}
@@ -150,14 +154,15 @@ public class TermFrequencyParser<V extends SparseNumberVector<?>> extends Number
}
curvec = sparsefactory.newNumberVector(values, numterms);
- curlbl = labels;
+ curlbl = LabelList.make(labels);
}
@Override
protected SimpleTypeInformation<V> getTypeInformation(int mindim, int maxdim) {
- if (mindim == maxdim) {
+ if(mindim == maxdim) {
return new VectorFieldTypeInformation<>(factory, mindim);
- } else if (mindim < maxdim) {
+ }
+ else if(mindim < maxdim) {
return new VectorTypeInformation<>(factory.getRestrictionClass(), factory.getDefaultSerializer(), mindim, maxdim);
}
throw new AbortException("No vectors were read from the input file - cannot determine vector data type.");
@@ -190,7 +195,7 @@ public class TermFrequencyParser<V extends SparseNumberVector<?>> extends Number
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
Flag normF = new Flag(NORMALIZE_FLAG);
- if (config.grab(normF)) {
+ if(config.grab(normF)) {
normalize = normF.getValue().booleanValue();
}
}
@@ -198,14 +203,14 @@ public class TermFrequencyParser<V extends SparseNumberVector<?>> extends Number
@Override
protected void getFactory(Parameterization config) {
ObjectParameter<SparseNumberVector.Factory<V, ?>> factoryP = new ObjectParameter<>(VECTOR_TYPE_ID, SparseNumberVector.Factory.class, SparseFloatVector.Factory.class);
- if (config.grab(factoryP)) {
+ if(config.grab(factoryP)) {
factory = factoryP.instantiateClass(config);
}
}
@Override
protected TermFrequencyParser<V> makeInstance() {
- return new TermFrequencyParser<>(normalize, colSep, quoteChar, comment, labelIndices, (SparseNumberVector.Factory<V, ?>) factory);
+ return new TermFrequencyParser<>(normalize, colSep, quoteChars, comment, labelIndices, (SparseNumberVector.Factory<V, ?>) factory);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/parser/Tokenizer.java b/src/de/lmu/ifi/dbs/elki/datasource/parser/Tokenizer.java
new file mode 100644
index 00000000..0cf4c81a
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/datasource/parser/Tokenizer.java
@@ -0,0 +1,230 @@
+package de.lmu.ifi.dbs.elki.datasource.parser;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.utilities.FormatUtil;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.iterator.Iter;
+
+/**
+ * String tokenizer.
+ *
+ * @author Erich Schubert
+ */
+public class Tokenizer implements Iter {
+ /**
+ * Class logger.
+ */
+ private static final Logging LOG = Logging.getLogger(Tokenizer.class);
+
+ /**
+ * Separator pattern.
+ */
+ private Pattern colSep;
+
+ /**
+ * Quote characters
+ */
+ public static final String QUOTE_CHAR = "\"'";
+
+ /**
+ * Stores the quotation character
+ */
+ private char[] quoteChars = QUOTE_CHAR.toCharArray();
+
+ /**
+ * Constructor.
+ *
+ * @param colSep Column separator pattern.
+ * @param quoteChars Quotation character.
+ */
+ public Tokenizer(Pattern colSep, String quoteChars) {
+ super();
+ this.colSep = colSep;
+ this.quoteChars = quoteChars.toCharArray();
+ }
+
+ /**
+ * Regular expression match helper.
+ */
+ private Matcher m = null;
+
+ /**
+ * Data currently processed.
+ */
+ private CharSequence input;
+
+ /**
+ * Substring to process.
+ */
+ private int send;
+
+ /**
+ * Current positions of result and iterator.
+ */
+ private int start, end, index;
+
+ /**
+ * Initialize parser with a new string.
+ *
+ * @param input New string to parse.
+ * @param begin Begin
+ * @param end End
+ */
+ public void initialize(CharSequence input, int begin, int end) {
+ this.input = input;
+ this.send = end;
+ this.m = colSep.matcher(input).region(begin, end);
+ this.index = begin;
+ advance();
+ }
+
+ @Override
+ public boolean valid() {
+ return start < send;
+ }
+
+ @Override
+ public void advance() {
+ char inquote = isQuote(index);
+ while(m.find()) {
+ // Quoted code path vs. regular code path
+ if(inquote != 0) {
+ // Matching closing quote found?
+ if(m.start() > index + 1 && input.charAt(m.start() - 1) == inquote) {
+ this.start = index + 1;
+ this.end = m.start() - 1;
+ this.index = m.end();
+ return;
+ }
+ continue;
+ }
+ else {
+ this.start = index;
+ this.end = m.start();
+ this.index = m.end();
+ return;
+ }
+ }
+ // Add tail after last separator.
+ this.start = index;
+ this.end = send;
+ this.index = end + 1;
+ if(inquote != 0) {
+ final int last = send - 1;
+ if(input.charAt(last) == inquote) {
+ ++this.start;
+ --this.end;
+ }
+ else {
+ LOG.warning("Invalid quoted line in input: no closing quote found in: " + input);
+ }
+ }
+ }
+
+ /**
+ * Get the current part as substring
+ *
+ * @return Current value as substring.
+ */
+ public String getSubstring() {
+ // TODO: detect Java <6 and make sure we only return the substring?
+ // With java 7, String.substring will arraycopy the characters.
+ return input.subSequence(start, end).toString();
+ }
+
+ /**
+ * Get current value as double.
+ *
+ * @return double value
+ * @throws NumberFormatException when current value cannot be parsed as double
+ * value.
+ */
+ public double getDouble() throws NumberFormatException {
+ return FormatUtil.parseDouble(input, start, end);
+ }
+
+ /**
+ * Get current value as long.
+ *
+ * @return double value
+ * @throws NumberFormatException when current value cannot be parsed as long
+ * value.
+ */
+ public long getLongBase10() throws NumberFormatException {
+ return FormatUtil.parseLongBase10(input, start, end);
+ }
+
+ /**
+ * Test for empty tokens; usually at end of line.
+ *
+ * @return Empty
+ */
+ public boolean isEmpty() {
+ return end <= start;
+ }
+
+ /**
+ * Detect quote characters.
+ *
+ * TODO: support more than one quote character, make sure opening and closing
+ * quotes match then.
+ *
+ * @param index Position
+ * @return {@code 1} when a quote character, {@code 0} otherwise.
+ */
+ private char isQuote(int index) {
+ if(index >= input.length()) {
+ return 0;
+ }
+ char c = input.charAt(index);
+ for(int i = 0; i < quoteChars.length; i++) {
+ if(c == quoteChars[i]) {
+ return c;
+ }
+ }
+ return 0;
+ }
+
+ /**
+ * Get start of token.
+ *
+ * @return Start
+ */
+ public int getStart() {
+ return start;
+ }
+
+ /**
+ * Get end of token.
+ *
+ * @return End
+ */
+ public int getEnd() {
+ return end;
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/AbstractVectorDoubleDistanceFunction.java b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/AbstractVectorDoubleDistanceFunction.java
index ea178bbb..e29d9237 100644
--- a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/AbstractVectorDoubleDistanceFunction.java
+++ b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/AbstractVectorDoubleDistanceFunction.java
@@ -73,8 +73,7 @@ public abstract class AbstractVectorDoubleDistanceFunction extends AbstractPrimi
* @throws IllegalArgumentException when dimensionalities are not the same.
*/
public static final int dimensionality(SpatialComparable o1, SpatialComparable o2) {
- final int dim1 = o1.getDimensionality();
- final int dim2 = o2.getDimensionality();
+ final int dim1 = o1.getDimensionality(), dim2 = o2.getDimensionality();
if (dim1 != dim2) {
throw new IllegalArgumentException("Objects do not have the same dimensionality.");
}
@@ -92,8 +91,42 @@ public abstract class AbstractVectorDoubleDistanceFunction extends AbstractPrimi
* @throws IllegalArgumentException when dimensionalities are not the same.
*/
public static final int dimensionality(SpatialComparable o1, SpatialComparable o2, int expect) {
- final int dim1 = o1.getDimensionality();
- final int dim2 = o2.getDimensionality();
+ final int dim1 = o1.getDimensionality(), dim2 = o2.getDimensionality();
+ if (dim1 != dim2 || dim1 != expect) {
+ throw new IllegalArgumentException("Objects do not have the expected dimensionality of " + expect);
+ }
+ return expect;
+ }
+
+ /**
+ * Get the common dimensionality of the two objects. Throw an
+ * {@link IllegalArgumentException} otherwise.
+ *
+ * @param o1 First vector / MBR
+ * @param o2 Second vector / MBR
+ * @return Common dimensionality
+ * @throws IllegalArgumentException when dimensionalities are not the same.
+ */
+ public static final int dimensionality(NumberVector<?> o1, NumberVector<?> o2) {
+ final int dim1 = o1.getDimensionality(), dim2 = o2.getDimensionality();
+ if (dim1 != dim2) {
+ throw new IllegalArgumentException("Objects do not have the same dimensionality.");
+ }
+ return dim1;
+ }
+
+ /**
+ * Get the common dimensionality of the two objects. Throw an
+ * {@link IllegalArgumentException} otherwise.
+ *
+ * @param o1 First vector / MBR
+ * @param o2 Second vector / MBR
+ * @param expect Expected dimensionality
+ * @return Common dimensionality
+ * @throws IllegalArgumentException when dimensionalities are not the same.
+ */
+ public static final int dimensionality(NumberVector<?> o1, NumberVector<?> o2, int expect) {
+ final int dim1 = o1.getDimensionality(), dim2 = o2.getDimensionality();
if (dim1 != dim2 || dim1 != expect) {
throw new IllegalArgumentException("Objects do not have the expected dimensionality of " + expect);
}
diff --git a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/MinKDistance.java b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/MinKDistance.java
index 5e29ec51..4da3a5dd 100644
--- a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/MinKDistance.java
+++ b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/MinKDistance.java
@@ -37,7 +37,7 @@ import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistance
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
@@ -132,7 +132,7 @@ public class MinKDistance<O, D extends Distance<D>> extends AbstractDatabaseDist
* KNN query instance
*/
private KNNQuery<T, D> knnQuery;
-
+
/**
* Value for k
*/
@@ -204,16 +204,16 @@ public class MinKDistance<O, D extends Distance<D>> extends AbstractDatabaseDist
public TypeInformation getInputTypeRestriction() {
return parentDistance.getInputTypeRestriction();
}
-
+
@Override
public boolean equals(Object obj) {
if(obj == null) {
return false;
}
- if (!this.getClass().equals(obj.getClass())) {
+ if(!this.getClass().equals(obj.getClass())) {
return false;
}
- MinKDistance<?,?> other = (MinKDistance<?, ?>) obj;
+ MinKDistance<?, ?> other = (MinKDistance<?, ?>) obj;
return this.parentDistance.equals(other.parentDistance) && this.k == other.k;
}
@@ -239,16 +239,16 @@ public class MinKDistance<O, D extends Distance<D>> extends AbstractDatabaseDist
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
final IntParameter kP = new IntParameter(K_ID);
- kP.addConstraint(new GreaterConstraint(1));
+ kP.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
if(config.grab(kP)) {
k = kP.getValue();
}
-
+
final ObjectParameter<DistanceFunction<? super O, D>> parentDistanceP = new ObjectParameter<>(DISTANCE_FUNCTION_ID, DistanceFunction.class, EuclideanDistanceFunction.class);
- if (config.grab(parentDistanceP)) {
+ if(config.grab(parentDistanceP)) {
parentDistance = parentDistanceP.instantiateClass(config);
}
- }
+ }
@Override
protected MinKDistance<O, D> makeInstance() {
diff --git a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/adapter/AbstractSimilarityAdapter.java b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/adapter/AbstractSimilarityAdapter.java
index 890a3ece..b2bd9a72 100644
--- a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/adapter/AbstractSimilarityAdapter.java
+++ b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/adapter/AbstractSimilarityAdapter.java
@@ -70,14 +70,14 @@ public abstract class AbstractSimilarityAdapter<O> extends AbstractDatabaseDista
/**
* Holds the similarity function.
*/
- protected NormalizedSimilarityFunction<? super O, ? extends NumberDistance<?, ?>> similarityFunction;
+ protected NormalizedSimilarityFunction<? super O> similarityFunction;
/**
* Constructor.
*
* @param similarityFunction Similarity function to use.
*/
- public AbstractSimilarityAdapter(NormalizedSimilarityFunction<? super O, ? extends NumberDistance<?, ?>> similarityFunction) {
+ public AbstractSimilarityAdapter(NormalizedSimilarityFunction<? super O> similarityFunction) {
super();
this.similarityFunction = similarityFunction;
}
@@ -102,11 +102,11 @@ public abstract class AbstractSimilarityAdapter<O> extends AbstractDatabaseDista
@Override
public boolean equals(Object obj) {
- if(obj == null) {
+ if (obj == null) {
return false;
}
// Same subclass
- if(!this.getClass().equals(obj.getClass())) {
+ if (!this.getClass().equals(obj.getClass())) {
return false;
}
// Same similarity function
@@ -170,15 +170,15 @@ public abstract class AbstractSimilarityAdapter<O> extends AbstractDatabaseDista
/**
* Holds the similarity function.
*/
- protected NormalizedSimilarityFunction<? super O, ? extends NumberDistance<?, ?>> similarityFunction = null;
+ protected NormalizedSimilarityFunction<? super O> similarityFunction = null;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final ObjectParameter<NormalizedSimilarityFunction<? super O, ? extends NumberDistance<?, ?>>> param = new ObjectParameter<>(SIMILARITY_FUNCTION_ID, NormalizedSimilarityFunction.class, FractionalSharedNearestNeighborSimilarityFunction.class);
- if(config.grab(param)) {
+ final ObjectParameter<NormalizedSimilarityFunction<? super O>> param = new ObjectParameter<>(SIMILARITY_FUNCTION_ID, NormalizedSimilarityFunction.class, FractionalSharedNearestNeighborSimilarityFunction.class);
+ if (config.grab(param)) {
similarityFunction = param.instantiateClass(config);
}
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/adapter/ArccosSimilarityAdapter.java b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/adapter/ArccosSimilarityAdapter.java
index a808e2a5..4bb2b99d 100644
--- a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/adapter/ArccosSimilarityAdapter.java
+++ b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/adapter/ArccosSimilarityAdapter.java
@@ -47,7 +47,7 @@ public class ArccosSimilarityAdapter<O> extends AbstractSimilarityAdapter<O> {
*
* @param similarityFunction Similarity function
*/
- public ArccosSimilarityAdapter(NormalizedSimilarityFunction<? super O, ? extends NumberDistance<?, ?>> similarityFunction) {
+ public ArccosSimilarityAdapter(NormalizedSimilarityFunction<? super O> similarityFunction) {
super(similarityFunction);
}
@@ -95,4 +95,4 @@ public class ArccosSimilarityAdapter<O> extends AbstractSimilarityAdapter<O> {
return new ArccosSimilarityAdapter<>(similarityFunction);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/adapter/LinearAdapterLinear.java b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/adapter/LinearAdapterLinear.java
index c78ff112..428b2c41 100644
--- a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/adapter/LinearAdapterLinear.java
+++ b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/adapter/LinearAdapterLinear.java
@@ -47,7 +47,7 @@ public class LinearAdapterLinear<O> extends AbstractSimilarityAdapter<O> {
*
* @param similarityFunction Similarity function
*/
- public LinearAdapterLinear(NormalizedSimilarityFunction<? super O, ? extends NumberDistance<?, ?>> similarityFunction) {
+ public LinearAdapterLinear(NormalizedSimilarityFunction<? super O> similarityFunction) {
super(similarityFunction);
}
diff --git a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/adapter/LnSimilarityAdapter.java b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/adapter/LnSimilarityAdapter.java
index 2a9b49f9..cffd9e2a 100644
--- a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/adapter/LnSimilarityAdapter.java
+++ b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/adapter/LnSimilarityAdapter.java
@@ -47,7 +47,7 @@ public class LnSimilarityAdapter<O> extends AbstractSimilarityAdapter<O> {
*
* @param similarityFunction Similarity function
*/
- public LnSimilarityAdapter(NormalizedSimilarityFunction<? super O, ? extends NumberDistance<?, ?>> similarityFunction) {
+ public LnSimilarityAdapter(NormalizedSimilarityFunction<? super O> similarityFunction) {
super(similarityFunction);
}
diff --git a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/colorhistogram/HSBHistogramQuadraticDistanceFunction.java b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/colorhistogram/HSBHistogramQuadraticDistanceFunction.java
index 414a4787..3dae70e8 100644
--- a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/colorhistogram/HSBHistogramQuadraticDistanceFunction.java
+++ b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/colorhistogram/HSBHistogramQuadraticDistanceFunction.java
@@ -31,8 +31,7 @@ import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.ListEachConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.ListSizeConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntListParameter;
@@ -113,12 +112,12 @@ public class HSBHistogramQuadraticDistanceFunction extends WeightedDistanceFunct
if(obj == null) {
return false;
}
- if (!this.getClass().equals(obj.getClass())) {
+ if(!this.getClass().equals(obj.getClass())) {
return false;
}
- return this.weightMatrix.equals(((HSBHistogramQuadraticDistanceFunction)obj).weightMatrix);
+ return this.weightMatrix.equals(((HSBHistogramQuadraticDistanceFunction) obj).weightMatrix);
}
-
+
/**
* Parameterization class.
*
@@ -138,7 +137,7 @@ public class HSBHistogramQuadraticDistanceFunction extends WeightedDistanceFunct
super.makeOptions(config);
IntListParameter param = new IntListParameter(BPP_ID);
param.addConstraint(new ListSizeConstraint(3));
- param.addConstraint(new ListEachConstraint<Integer>(new GreaterEqualConstraint(1)));
+ param.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT_LIST);
if(config.grab(param)) {
List<Integer> quant = param.getValue();
assert (quant.size() == 3);
diff --git a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/correlation/ERiCDistanceFunction.java b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/correlation/ERiCDistanceFunction.java
index bece8279..b8c18304 100644
--- a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/correlation/ERiCDistanceFunction.java
+++ b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/correlation/ERiCDistanceFunction.java
@@ -37,7 +37,7 @@ import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAFilteredResult;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
@@ -129,14 +129,14 @@ public class ERiCDistanceFunction extends AbstractIndexBasedDistanceFunction<Num
private boolean approximatelyLinearDependent(PCAFilteredResult pca1, PCAFilteredResult pca2) {
Matrix m1_czech = pca1.dissimilarityMatrix();
Matrix v2_strong = pca2.adapatedStrongEigenvectors();
- for (int i = 0; i < v2_strong.getColumnDimensionality(); i++) {
+ for(int i = 0; i < v2_strong.getColumnDimensionality(); i++) {
Vector v2_i = v2_strong.getCol(i);
// check, if distance of v2_i to the space of pca_1 > delta
// (i.e., if v2_i spans up a new dimension)
double dist = Math.sqrt(v2_i.transposeTimes(v2_i) - v2_i.transposeTimesTimes(m1_czech, v2_i));
// if so, return false
- if (dist > delta) {
+ if(dist > delta) {
return false;
}
}
@@ -157,34 +157,36 @@ public class ERiCDistanceFunction extends AbstractIndexBasedDistanceFunction<Num
* distance function
*/
public BitDistance distance(NumberVector<?> v1, NumberVector<?> v2, PCAFilteredResult pca1, PCAFilteredResult pca2) {
- if (pca1.getCorrelationDimension() < pca2.getCorrelationDimension()) {
+ if(pca1.getCorrelationDimension() < pca2.getCorrelationDimension()) {
throw new IllegalStateException("pca1.getCorrelationDimension() < pca2.getCorrelationDimension(): " + pca1.getCorrelationDimension() + " < " + pca2.getCorrelationDimension());
}
boolean approximatelyLinearDependent;
- if (pca1.getCorrelationDimension() == pca2.getCorrelationDimension()) {
+ if(pca1.getCorrelationDimension() == pca2.getCorrelationDimension()) {
approximatelyLinearDependent = approximatelyLinearDependent(pca1, pca2) && approximatelyLinearDependent(pca2, pca1);
- } else {
+ }
+ else {
approximatelyLinearDependent = approximatelyLinearDependent(pca1, pca2);
}
- if (!approximatelyLinearDependent) {
+ if(!approximatelyLinearDependent) {
return new BitDistance(true);
}
else {
double affineDistance;
- if (pca1.getCorrelationDimension() == pca2.getCorrelationDimension()) {
+ if(pca1.getCorrelationDimension() == pca2.getCorrelationDimension()) {
WeightedDistanceFunction df1 = new WeightedDistanceFunction(pca1.similarityMatrix());
WeightedDistanceFunction df2 = new WeightedDistanceFunction(pca2.similarityMatrix());
affineDistance = Math.max(df1.distance(v1, v2).doubleValue(), df2.distance(v1, v2).doubleValue());
- } else {
+ }
+ else {
WeightedDistanceFunction df1 = new WeightedDistanceFunction(pca1.similarityMatrix());
affineDistance = df1.distance(v1, v2).doubleValue();
}
- if (affineDistance > tau) {
+ if(affineDistance > tau) {
return new BitDistance(true);
}
@@ -194,10 +196,10 @@ public class ERiCDistanceFunction extends AbstractIndexBasedDistanceFunction<Num
@Override
public boolean equals(Object obj) {
- if (obj == null) {
+ if(obj == null) {
return false;
}
- if (!this.getClass().equals(obj.getClass())) {
+ if(!this.getClass().equals(obj.getClass())) {
return false;
}
ERiCDistanceFunction other = (ERiCDistanceFunction) obj;
@@ -267,14 +269,14 @@ public class ERiCDistanceFunction extends AbstractIndexBasedDistanceFunction<Num
configIndexFactory(config, FilteredLocalPCAIndex.Factory.class, KNNQueryFilteredPCAIndex.Factory.class);
final DoubleParameter deltaP = new DoubleParameter(DELTA_ID, 0.1);
- deltaP.addConstraint(new GreaterEqualConstraint(0));
- if (config.grab(deltaP)) {
+ deltaP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE);
+ if(config.grab(deltaP)) {
delta = deltaP.doubleValue();
}
final DoubleParameter tauP = new DoubleParameter(TAU_ID, 0.1);
- tauP.addConstraint(new GreaterEqualConstraint(0));
- if (config.grab(tauP)) {
+ tauP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE);
+ if(config.grab(tauP)) {
tau = tauP.doubleValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/correlation/PCABasedCorrelationDistanceFunction.java b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/correlation/PCABasedCorrelationDistanceFunction.java
index 4d5553a3..b7a22b32 100644
--- a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/correlation/PCABasedCorrelationDistanceFunction.java
+++ b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/correlation/PCABasedCorrelationDistanceFunction.java
@@ -36,7 +36,7 @@ import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAFilteredResult;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
@@ -93,10 +93,10 @@ public class PCABasedCorrelationDistanceFunction extends AbstractIndexBasedDista
@Override
public boolean equals(Object obj) {
- if (obj == null) {
+ if(obj == null) {
return false;
}
- if (!this.getClass().equals(obj.getClass())) {
+ if(!this.getClass().equals(obj.getClass())) {
return false;
}
PCABasedCorrelationDistanceFunction other = (PCABasedCorrelationDistanceFunction) obj;
@@ -166,7 +166,7 @@ public class PCABasedCorrelationDistanceFunction extends AbstractIndexBasedDista
// for all strong eigenvectors of rv2
Matrix m1_czech = pca1.dissimilarityMatrix();
- for (int i = 0; i < v2_strong.getColumnDimensionality(); i++) {
+ for(int i = 0; i < v2_strong.getColumnDimensionality(); i++) {
Vector v2_i = v2_strong.getCol(i);
// check, if distance of v2_i to the space of rv1 > delta
// (i.e., if v2_i spans up a new dimension)
@@ -174,7 +174,7 @@ public class PCABasedCorrelationDistanceFunction extends AbstractIndexBasedDista
// if so, insert v2_i into v1 and adjust v1
// and compute m1_czech new, increase lambda1
- if (lambda1 < dimensionality && dist > delta) {
+ if(lambda1 < dimensionality && dist > delta) {
adjust(v1, e1_czech, v2_i, lambda1++);
m1_czech = v1.times(e1_czech).timesTranspose(v1);
}
@@ -182,7 +182,7 @@ public class PCABasedCorrelationDistanceFunction extends AbstractIndexBasedDista
// for all strong eigenvectors of rv1
Matrix m2_czech = pca2.dissimilarityMatrix();
- for (int i = 0; i < v1_strong.getColumnDimensionality(); i++) {
+ for(int i = 0; i < v1_strong.getColumnDimensionality(); i++) {
Vector v1_i = v1_strong.getCol(i);
// check, if distance of v1_i to the space of rv2 > delta
// (i.e., if v1_i spans up a new dimension)
@@ -190,7 +190,7 @@ public class PCABasedCorrelationDistanceFunction extends AbstractIndexBasedDista
// if so, insert v1_i into v2 and adjust v2
// and compute m2_czech new , increase lambda2
- if (lambda2 < dimensionality && dist > delta) {
+ if(lambda2 < dimensionality && dist > delta) {
adjust(v2, e2_czech, v1_i, lambda2++);
m2_czech = v2.times(e2_czech).timesTranspose(v2);
}
@@ -231,7 +231,7 @@ public class PCABasedCorrelationDistanceFunction extends AbstractIndexBasedDista
// normalize v
Vector v_i = vector.copy();
Vector sum = new Vector(dim);
- for (int k = 0; k < corrDim; k++) {
+ for(int k = 0; k < corrDim; k++) {
Vector v_k = v.getCol(k);
sum.plusTimesEquals(v_k, v_i.transposeTimes(v_k));
}
@@ -248,12 +248,12 @@ public class PCABasedCorrelationDistanceFunction extends AbstractIndexBasedDista
* @return the Euclidean distance between the given two vectors
*/
private double euclideanDistance(V dv1, V dv2) {
- if (dv1.getDimensionality() != dv2.getDimensionality()) {
+ if(dv1.getDimensionality() != dv2.getDimensionality()) {
throw new IllegalArgumentException("Different dimensionality of FeatureVectors\n first argument: " + dv1.toString() + "\n second argument: " + dv2.toString());
}
double sqrDist = 0;
- for (int i = 0; i < dv1.getDimensionality(); i++) {
+ for(int i = 0; i < dv1.getDimensionality(); i++) {
double manhattanI = dv1.doubleValue(i) - dv2.doubleValue(i);
sqrDist += manhattanI * manhattanI;
}
@@ -277,8 +277,8 @@ public class PCABasedCorrelationDistanceFunction extends AbstractIndexBasedDista
configIndexFactory(config, FilteredLocalPCAIndex.Factory.class, KNNQueryFilteredPCAIndex.Factory.class);
final DoubleParameter param = new DoubleParameter(DELTA_ID, 0.25);
- param.addConstraint(new GreaterEqualConstraint(0));
- if (config.grab(param)) {
+ param.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE);
+ if(config.grab(param)) {
delta = param.doubleValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/external/NumberDistanceParser.java b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/external/NumberDistanceParser.java
index 9a7315b3..b8e720ab 100644
--- a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/external/NumberDistanceParser.java
+++ b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/external/NumberDistanceParser.java
@@ -28,7 +28,6 @@ import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.HashMap;
-import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
@@ -40,6 +39,7 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.datasource.parser.AbstractParser;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress;
@@ -78,12 +78,12 @@ public class NumberDistanceParser<D extends NumberDistance<D, ?>> extends Abstra
* Constructor.
*
* @param colSep Column separator pattern
- * @param quoteChar Quote character
+ * @param quoteChars Quote characters
* @param comment Comment pattern
* @param distanceFactory Distance factory to use
*/
- public NumberDistanceParser(Pattern colSep, char quoteChar, Pattern comment, D distanceFactory) {
- super(colSep, quoteChar, comment);
+ public NumberDistanceParser(Pattern colSep, String quoteChars, Pattern comment, D distanceFactory) {
+ super(colSep, quoteChars, comment);
this.distanceFactory = distanceFactory;
}
@@ -96,56 +96,80 @@ public class NumberDistanceParser<D extends NumberDistance<D, ?>> extends Abstra
ModifiableDBIDs ids = DBIDUtil.newHashSet();
Map<DBIDPair, D> distanceCache = new HashMap<>();
try {
- for (String line; (line = reader.readLine()) != null; lineNumber++) {
- if (prog != null) {
+ for(String line; (line = reader.readLine()) != null; lineNumber++) {
+ if(prog != null) {
prog.incrementProcessed(LOG);
}
// Skip empty lines and comments
- if (line.length() <= 0 || (comment != null && comment.matcher(line).matches())) {
+ if(line.length() <= 0 || (comment != null && comment.matcher(line).matches())) {
continue;
}
- List<String> entries = tokenize(line);
- if (entries.size() != 3) {
- throw new IllegalArgumentException("Line " + lineNumber + " does not have the " + "required input format: id1 id2 distanceValue! " + line);
- }
+ tokenizer.initialize(line, 0, lengthWithoutLinefeed(line));
+ if(!tokenizer.valid()) {
+ throw new IllegalArgumentException("Less than three values in line " + lineNumber);
+ }
DBID id1, id2;
try {
- id1 = DBIDUtil.importInteger(Integer.parseInt(entries.get(0)));
- } catch (NumberFormatException e) {
+ id1 = DBIDUtil.importInteger((int) tokenizer.getLongBase10());
+ tokenizer.advance();
+ }
+ catch(NumberFormatException e) {
throw new IllegalArgumentException("Error in line " + lineNumber + ": id1 is no integer!");
}
+ if(!tokenizer.valid()) {
+ throw new IllegalArgumentException("Less than three values in line " + lineNumber);
+ }
try {
- id2 = DBIDUtil.importInteger(Integer.parseInt(entries.get(1)));
- } catch (NumberFormatException e) {
+ id2 = DBIDUtil.importInteger((int) tokenizer.getLongBase10());
+ tokenizer.advance();
+ }
+ catch(NumberFormatException e) {
throw new IllegalArgumentException("Error in line " + lineNumber + ": id2 is no integer!");
}
+ if(!tokenizer.valid()) {
+ throw new IllegalArgumentException("Less than three values in line " + lineNumber);
+ }
try {
- D distance = distanceFactory.parseString(entries.get(2));
+ final D distance;
+ if(distanceFactory == DoubleDistance.FACTORY) {
+ @SuppressWarnings("unchecked")
+ D dist = (D) DoubleDistance.FACTORY.fromDouble(tokenizer.getDouble());
+ distance = dist;
+ }
+ else {
+ distance = distanceFactory.parseString(tokenizer.getSubstring());
+ }
+ tokenizer.advance();
put(id1, id2, distance, distanceCache);
ids.add(id1);
ids.add(id2);
- } catch (IllegalArgumentException e) {
+ }
+ catch(IllegalArgumentException e) {
throw new IllegalArgumentException("Error in line " + lineNumber + ":" + e.getMessage(), e);
}
+ if(tokenizer.valid()) {
+ throw new IllegalArgumentException("More than three values in line " + lineNumber);
+ }
}
- } catch (IOException e) {
+ }
+ catch(IOException e) {
throw new IllegalArgumentException("Error while parsing line " + lineNumber + ".");
}
- if (prog != null) {
+ if(prog != null) {
prog.setCompleted(LOG);
}
// check if all distance values are specified
- for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
- for (DBIDIter iter2 = ids.iter(); iter2.valid(); iter2.advance()) {
- if (DBIDUtil.compare(iter2, iter) <= 0) {
+ for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ for(DBIDIter iter2 = ids.iter(); iter2.valid(); iter2.advance()) {
+ if(DBIDUtil.compare(iter2, iter) <= 0) {
continue;
}
- if (!containsKey(iter, iter2, distanceCache)) {
+ if(!containsKey(iter, iter2, distanceCache)) {
throw new IllegalArgumentException("Distance value for " + DBIDUtil.toString(iter) + " - " + DBIDUtil.toString(iter2) + " is missing!");
}
}
@@ -163,14 +187,14 @@ public class NumberDistanceParser<D extends NumberDistance<D, ?>> extends Abstra
*/
private void put(DBID id1, DBID id2, D distance, Map<DBIDPair, D> cache) {
// the smaller id is the first key
- if (DBIDUtil.compare(id1, id2) > 0) {
+ if(DBIDUtil.compare(id1, id2) > 0) {
put(id2, id1, distance, cache);
return;
}
D oldDistance = cache.put(DBIDUtil.newPair(id1, id2), distance);
- if (oldDistance != null) {
+ if(oldDistance != null) {
throw new IllegalArgumentException("Distance value for specified ids is already assigned!");
}
}
@@ -186,7 +210,7 @@ public class NumberDistanceParser<D extends NumberDistance<D, ?>> extends Abstra
* specified ids, false otherwise
*/
public boolean containsKey(DBIDRef id1, DBIDRef id2, Map<DBIDPair, D> cache) {
- if (DBIDUtil.compare(id1, id2) > 0) {
+ if(DBIDUtil.compare(id1, id2) > 0) {
return containsKey(id2, id1, cache);
}
@@ -215,14 +239,14 @@ public class NumberDistanceParser<D extends NumberDistance<D, ?>> extends Abstra
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
ObjectParameter<D> distFuncP = new ObjectParameter<>(DISTANCE_ID, Distance.class);
- if (config.grab(distFuncP)) {
+ if(config.grab(distFuncP)) {
distanceFactory = distFuncP.instantiateClass(config);
}
}
@Override
protected NumberDistanceParser<D> makeInstance() {
- return new NumberDistanceParser<>(colSep, quoteChar, comment, distanceFactory);
+ return new NumberDistanceParser<>(colSep, quoteChars, comment, distanceFactory);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/geo/DimensionSelectingLatLngDistanceFunction.java b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/geo/DimensionSelectingLatLngDistanceFunction.java
index 1974e533..8eea1f15 100644
--- a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/geo/DimensionSelectingLatLngDistanceFunction.java
+++ b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/geo/DimensionSelectingLatLngDistanceFunction.java
@@ -34,7 +34,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.exceptions.NotImplementedException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.NoDuplicateValueGlobalConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -83,18 +83,21 @@ public class DimensionSelectingLatLngDistanceFunction extends AbstractSpatialDou
@Override
@Reference(authors = "Erich Schubert, Arthur Zimek and Hans-Peter Kriegel", title = "Geodetic Distance Queries on R-Trees for Indexing Geographic Data", booktitle = "Advances in Spatial and Temporal Databases - 13th International Symposium, SSTD 2013, Munich, Germany")
public double doubleMinDist(SpatialComparable mbr1, SpatialComparable mbr2) {
- if (mbr1 instanceof NumberVector) {
- if (mbr2 instanceof NumberVector) {
+ if(mbr1 instanceof NumberVector) {
+ if(mbr2 instanceof NumberVector) {
return doubleDistance((NumberVector<?>) mbr1, (NumberVector<?>) mbr2);
- } else {
+ }
+ else {
NumberVector<?> o1 = (NumberVector<?>) mbr1;
return model.minDistDeg(o1.doubleValue(dimlat), o1.doubleValue(dimlng), mbr2.getMin(dimlat), mbr2.getMin(dimlng), mbr2.getMax(dimlat), mbr2.getMax(dimlng));
}
- } else {
- if (mbr2 instanceof NumberVector) {
+ }
+ else {
+ if(mbr2 instanceof NumberVector) {
NumberVector<?> o2 = (NumberVector<?>) mbr2;
return model.minDistDeg(o2.doubleValue(dimlat), o2.doubleValue(dimlng), mbr1.getMin(dimlat), mbr1.getMin(dimlng), mbr1.getMax(dimlat), mbr1.getMax(dimlng));
- } else {
+ }
+ else {
throw new NotImplementedException("This distance function cannot - yet - be used with this algorithm, as the lower bound rectangle to rectangle distances have not yet been formalized for geodetic data.");
}
}
@@ -117,27 +120,28 @@ public class DimensionSelectingLatLngDistanceFunction extends AbstractSpatialDou
@Override
public boolean equals(Object obj) {
- if (this == obj) {
+ if(this == obj) {
return true;
}
- if (obj == null) {
+ if(obj == null) {
return false;
}
- if (getClass() != obj.getClass()) {
+ if(getClass() != obj.getClass()) {
return false;
}
DimensionSelectingLatLngDistanceFunction other = (DimensionSelectingLatLngDistanceFunction) obj;
- if (dimlat != other.dimlat) {
+ if(dimlat != other.dimlat) {
return false;
}
- if (dimlng != other.dimlng) {
+ if(dimlng != other.dimlng) {
return false;
}
- if (model == null) {
- if (other.model != null) {
+ if(model == null) {
+ if(other.model != null) {
return false;
}
- } else if (!model.equals(other.model)) {
+ }
+ else if(!model.equals(other.model)) {
return false;
}
return true;
@@ -180,18 +184,18 @@ public class DimensionSelectingLatLngDistanceFunction extends AbstractSpatialDou
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
final IntParameter dimlatP = new IntParameter(LATDIM_ID);
- dimlatP.addConstraint(new GreaterEqualConstraint(0));
- if (config.grab(dimlatP)) {
+ dimlatP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_INT);
+ if(config.grab(dimlatP)) {
dimlat = dimlatP.getValue();
}
final IntParameter dimlngP = new IntParameter(LNGDIM_ID);
- dimlngP.addConstraint(new GreaterEqualConstraint(0));
- if (config.grab(dimlngP)) {
+ dimlngP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_INT);
+ if(config.grab(dimlngP)) {
dimlng = dimlngP.getValue();
}
config.checkConstraint(new NoDuplicateValueGlobalConstraint(dimlatP, dimlngP));
ObjectParameter<EarthModel> modelP = new ObjectParameter<>(EarthModel.MODEL_ID, EarthModel.class, SphericalVincentyEarthModel.class);
- if (config.grab(modelP)) {
+ if(config.grab(modelP)) {
model = modelP.instantiateClass(config);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/histogram/package-info.java b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/histogram/package-info.java
index e6a9a8be..770dcf52 100644
--- a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/histogram/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/histogram/package-info.java
@@ -1,4 +1,27 @@
/**
* Distance functions for <b>one-dimensional</b> histograms.
*/
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
package de.lmu.ifi.dbs.elki.distance.distancefunction.histogram; \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/EuclideanDistanceFunction.java b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/EuclideanDistanceFunction.java
index 03116430..24dfdc16 100644
--- a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/EuclideanDistanceFunction.java
+++ b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/EuclideanDistanceFunction.java
@@ -34,7 +34,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
* @author Arthur Zimek
*/
@Alias({ "euclidean", "euclid", "l2", "EuclideanDistanceFunction", "de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction" })
-public class EuclideanDistanceFunction extends LPNormDistanceFunction {
+public class EuclideanDistanceFunction extends LPIntegerNormDistanceFunction {
/**
* Static instance. Use this!
*/
@@ -48,81 +48,127 @@ public class EuclideanDistanceFunction extends LPNormDistanceFunction {
*/
@Deprecated
public EuclideanDistanceFunction() {
- super(2.);
+ super(2);
}
- @Override
- public double doubleDistance(NumberVector<?> v1, NumberVector<?> v2) {
- final int dim = dimensionality(v1, v2);
- double agg = 0.;
- for (int d = 0; d < dim; d++) {
- final double delta = v1.doubleValue(d) - v2.doubleValue(d);
+ private final double doublePreDistance(NumberVector<?> v1, NumberVector<?> v2, int start, int end, double agg) {
+ for(int d = start; d < end; d++) {
+ final double xd = v1.doubleValue(d), yd = v2.doubleValue(d);
+ final double delta = xd - yd;
agg += delta * delta;
}
- return Math.sqrt(agg);
+ return agg;
}
- @Override
- public double doubleNorm(NumberVector<?> v) {
- final int dim = v.getDimensionality();
- double agg = 0.;
- for (int d = 0; d < dim; d++) {
- final double val = v.doubleValue(d);
- agg += val * val;
+ private final double doublePreDistanceVM(NumberVector<?> v, SpatialComparable mbr, int start, int end, double agg) {
+ for(int d = start; d < end; d++) {
+ final double value = v.doubleValue(d), min = mbr.getMin(d);
+ double delta = min - value;
+ if(delta < 0.) {
+ delta = value - mbr.getMax(d);
+ }
+ if(delta > 0.) {
+ agg += delta * delta;
+ }
}
- return Math.sqrt(agg);
+ return agg;
}
- protected double doubleMinDistObject(NumberVector<?> v, SpatialComparable mbr) {
- final int dim = dimensionality(mbr, v);
- double agg = 0.;
- for (int d = 0; d < dim; d++) {
- final double value = v.doubleValue(d), min = mbr.getMin(d);
- final double diff;
- if (value < min) {
- diff = min - value;
- } else {
- final double max = mbr.getMax(d);
- if (value > max) {
- diff = value - max;
- } else {
- continue;
- }
+ private final double doublePreDistanceMBR(SpatialComparable mbr1, SpatialComparable mbr2, int start, int end, double agg) {
+ for(int d = start; d < end; d++) {
+ double delta = mbr2.getMin(d) - mbr1.getMax(d);
+ if(delta < 0.) {
+ delta = mbr1.getMin(d) - mbr2.getMax(d);
+ }
+ if(delta > 0.) {
+ agg += delta * delta;
}
- agg += diff * diff;
+ }
+ return agg;
+ }
+
+ private final double doublePreNorm(NumberVector<?> v, int start, int end, double agg) {
+ for(int d = start; d < end; d++) {
+ final double xd = v.doubleValue(d);
+ agg += xd * xd;
+ }
+ return agg;
+ }
+
+ private final double doublePreNormMBR(SpatialComparable mbr, int start, int end, double agg) {
+ for(int d = start; d < end; d++) {
+ double delta = mbr.getMin(d);
+ if(delta < 0.) {
+ delta = -mbr.getMax(d);
+ }
+ if(delta > 0.) {
+ agg += delta * delta;
+ }
+ }
+ return agg;
+ }
+
+ @Override
+ public double doubleDistance(NumberVector<?> v1, NumberVector<?> v2) {
+ final int dim1 = v1.getDimensionality(), dim2 = v2.getDimensionality();
+ final int mindim = (dim1 < dim2) ? dim1 : dim2;
+ double agg = doublePreDistance(v1, v2, 0, mindim, 0.);
+ if(dim1 > mindim) {
+ agg = doublePreNorm(v1, mindim, dim1, agg);
+ }
+ else if(dim2 > mindim) {
+ agg = doublePreNorm(v2, mindim, dim2, agg);
}
return Math.sqrt(agg);
}
@Override
+ public double doubleNorm(NumberVector<?> v) {
+ return Math.sqrt(doublePreNorm(v, 0, v.getDimensionality(), 0.));
+ }
+
+ @Override
public double doubleMinDist(SpatialComparable mbr1, SpatialComparable mbr2) {
- // Some optimizations for simpler cases.
- if (mbr1 instanceof NumberVector) {
- if (mbr2 instanceof NumberVector) {
- return doubleDistance((NumberVector<?>) mbr1, (NumberVector<?>) mbr2);
- } else {
- return doubleMinDistObject((NumberVector<?>) mbr1, mbr2);
- }
- } else if (mbr2 instanceof NumberVector) {
- return doubleMinDistObject((NumberVector<?>) mbr2, mbr1);
- }
- final int dim = dimensionality(mbr1, mbr2);
+ final int dim1 = mbr1.getDimensionality(), dim2 = mbr2.getDimensionality();
+ final int mindim = (dim1 < dim2) ? dim1 : dim2;
+
+ final NumberVector<?> v1 = (mbr1 instanceof NumberVector) ? (NumberVector<?>) mbr1 : null;
+ final NumberVector<?> v2 = (mbr2 instanceof NumberVector) ? (NumberVector<?>) mbr2 : null;
double agg = 0.;
- for (int d = 0; d < dim; d++) {
- final double diff;
- final double d1 = mbr2.getMin(d) - mbr1.getMax(d);
- if (d1 > 0.) {
- diff = d1;
- } else {
- final double d2 = mbr1.getMin(d) - mbr2.getMax(d);
- if (d2 > 0.) {
- diff = d2;
- } else {
- continue;
- }
+ if(v1 != null) {
+ if(v2 != null) {
+ agg = doublePreDistance(v1, v2, 0, mindim, agg);
+ }
+ else {
+ agg = doublePreDistanceVM(v1, mbr2, 0, mindim, agg);
+ }
+ }
+ else {
+ if(v2 != null) {
+ agg = doublePreDistanceVM(v2, mbr1, 0, mindim, agg);
+ }
+ else {
+ agg = doublePreDistanceMBR(mbr1, mbr2, 0, mindim, agg);
+ }
+ }
+ // first object has more dimensions.
+ if(dim1 > mindim) {
+ if(v1 != null) {
+ agg = doublePreNorm(v1, mindim, dim1, agg);
+ }
+ else {
+ agg = doublePreNormMBR(v1, mindim, dim1, agg);
+ }
+ }
+ // second object has more dimensions.
+ if(dim2 > mindim) {
+ if(v2 != null) {
+ agg = doublePreNorm(v2, mindim, dim2, agg);
+ }
+ else {
+ agg = doublePreNormMBR(mbr2, mindim, dim2, agg);
}
- agg += diff * diff;
}
return Math.sqrt(agg);
}
@@ -139,13 +185,13 @@ public class EuclideanDistanceFunction extends LPNormDistanceFunction {
@Override
public boolean equals(Object obj) {
- if (obj == null) {
+ if(obj == null) {
return false;
}
- if (obj == this) {
+ if(obj == this) {
return true;
}
- if (this.getClass().equals(obj.getClass())) {
+ if(this.getClass().equals(obj.getClass())) {
return true;
}
return super.equals(obj);
diff --git a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/LPIntegerNormDistanceFunction.java b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/LPIntegerNormDistanceFunction.java
new file mode 100644
index 00000000..22ae45b3
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/LPIntegerNormDistanceFunction.java
@@ -0,0 +1,215 @@
+package de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.spatial.SpatialComparable;
+import de.lmu.ifi.dbs.elki.math.MathUtil;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+
+/**
+ * Provides a LP-Norm for number vectors. Optimized version for integer values
+ * of p. This will likely not have huge impact, but YMMV.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.landmark
+ */
+public class LPIntegerNormDistanceFunction extends LPNormDistanceFunction {
+ /**
+ * Integer value of p.
+ */
+ int intp;
+
+ /**
+ * Constructor, internal version.
+ *
+ * @param p Parameter p
+ */
+ public LPIntegerNormDistanceFunction(int p) {
+ super(p);
+ this.intp = p;
+ }
+
+ private final double doublePreDistance(NumberVector<?> v1, NumberVector<?> v2, final int start, final int end, double agg) {
+ for(int d = start; d < end; d++) {
+ final double xd = v1.doubleValue(d), yd = v2.doubleValue(d);
+ final double delta = (xd >= yd) ? xd - yd : yd - xd;
+ agg += MathUtil.powi(delta, intp);
+ }
+ return agg;
+ }
+
+ private final double doublePreDistanceVM(NumberVector<?> v, SpatialComparable mbr, final int start, final int end, double agg) {
+ for(int d = start; d < end; d++) {
+ final double value = v.doubleValue(d), min = mbr.getMin(d);
+ double delta = min - value;
+ if(delta < 0.) {
+ delta = value - mbr.getMax(d);
+ }
+ if(delta > 0.) {
+ agg += MathUtil.powi(delta, intp);
+ }
+ }
+ return agg;
+ }
+
+ private final double doublePreDistanceMBR(SpatialComparable mbr1, SpatialComparable mbr2, final int start, final int end, double agg) {
+ for(int d = start; d < end; d++) {
+ double delta = mbr2.getMin(d) - mbr1.getMax(d);
+ if(delta < 0.) {
+ delta = mbr1.getMin(d) - mbr2.getMax(d);
+ }
+ if(delta > 0.) {
+ agg += MathUtil.powi(delta, intp);
+ }
+ }
+ return agg;
+ }
+
+ private final double doublePreNorm(NumberVector<?> v, final int start, final int end, double agg) {
+ for(int d = start; d < end; d++) {
+ final double xd = v.doubleValue(d);
+ final double delta = xd >= 0. ? xd : -xd;
+ agg += MathUtil.powi(delta, intp);
+ }
+ return agg;
+ }
+
+ private final double doublePreNormMBR(SpatialComparable mbr, final int start, final int end, double agg) {
+ for(int d = start; d < end; d++) {
+ double delta = mbr.getMin(d);
+ if(delta < 0.) {
+ delta = -mbr.getMax(d);
+ }
+ if(delta > 0.) {
+ agg += MathUtil.powi(delta, intp);
+ }
+ }
+ return agg;
+ }
+
+ @Override
+ public double doubleDistance(NumberVector<?> v1, NumberVector<?> v2) {
+ final int dim1 = v1.getDimensionality(), dim2 = v2.getDimensionality();
+ final int mindim = (dim1 < dim2) ? dim1 : dim2;
+ double agg = doublePreDistance(v1, v2, 0, mindim, 0.);
+ if(dim1 > mindim) {
+ agg = doublePreNorm(v1, mindim, dim1, agg);
+ }
+ else if(dim2 > mindim) {
+ agg = doublePreNorm(v2, mindim, dim2, agg);
+ }
+ return Math.pow(agg, invp);
+ }
+
+ @Override
+ public double doubleNorm(NumberVector<?> v) {
+ return Math.pow(doublePreNorm(v, 0, v.getDimensionality(), 0.), invp);
+ }
+
+ @Override
+ public double doubleMinDist(SpatialComparable mbr1, SpatialComparable mbr2) {
+ final int dim1 = mbr1.getDimensionality(), dim2 = mbr2.getDimensionality();
+ final int mindim = (dim1 < dim2) ? dim1 : dim2;
+
+ final NumberVector<?> v1 = (mbr1 instanceof NumberVector) ? (NumberVector<?>) mbr1 : null;
+ final NumberVector<?> v2 = (mbr2 instanceof NumberVector) ? (NumberVector<?>) mbr2 : null;
+
+ double agg = 0.;
+ if(v1 != null) {
+ if(v2 != null) {
+ agg = doublePreDistance(v1, v2, 0, mindim, agg);
+ }
+ else {
+ agg = doublePreDistanceVM(v1, mbr2, 0, mindim, agg);
+ }
+ }
+ else {
+ if(v2 != null) {
+ agg = doublePreDistanceVM(v2, mbr1, 0, mindim, agg);
+ }
+ else {
+ agg = doublePreDistanceMBR(mbr1, mbr2, 0, mindim, agg);
+ }
+ }
+ // first object has more dimensions.
+ if(dim1 > mindim) {
+ if(v1 != null) {
+ agg = doublePreNorm(v1, mindim, dim1, agg);
+ }
+ else {
+ agg = doublePreNormMBR(v1, mindim, dim1, agg);
+ }
+ }
+ // second object has more dimensions.
+ if(dim2 > mindim) {
+ if(v2 != null) {
+ agg = doublePreNorm(v2, mindim, dim2, agg);
+ }
+ else {
+ agg = doublePreNormMBR(mbr2, mindim, dim2, agg);
+ }
+ }
+ return Math.pow(agg, invp);
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ /**
+ * The value of p.
+ */
+ protected int p;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ final IntParameter paramP = new IntParameter(P_ID);
+ paramP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
+ if(config.grab(paramP)) {
+ p = paramP.getValue();
+ }
+ }
+
+ @Override
+ protected LPIntegerNormDistanceFunction makeInstance() {
+ if(p == 1) {
+ return ManhattanDistanceFunction.STATIC;
+ }
+ if(p == 2) {
+ return EuclideanDistanceFunction.STATIC;
+ }
+ return new LPIntegerNormDistanceFunction(p);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/LPNormDistanceFunction.java b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/LPNormDistanceFunction.java
index c25e04f1..42753ac1 100644
--- a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/LPNormDistanceFunction.java
+++ b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/LPNormDistanceFunction.java
@@ -25,11 +25,13 @@ package de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.spatial.SpatialComparable;
+import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.AbstractSpatialDoubleDistanceNorm;
import de.lmu.ifi.dbs.elki.utilities.Alias;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
@@ -63,62 +65,180 @@ public class LPNormDistanceFunction extends AbstractSpatialDoubleDistanceNorm {
this.invp = 1. / p;
}
- @Override
- public double doubleDistance(NumberVector<?> v1, NumberVector<?> v2) {
- final int dim = dimensionality(v1, v2);
- double agg = 0.;
- for (int d = 0; d < dim; d++) {
+ /**
+ * Compute unscaled distance in a range of dimensions.
+ *
+ * @param v1 First object
+ * @param v2 Second object
+ * @param start First dimension
+ * @param end Exclusive last dimension
+ * @param agg Current aggregate value
+ * @return Aggregated values.
+ */
+ private final double doublePreDistance(NumberVector<?> v1, NumberVector<?> v2, final int start, final int end, double agg) {
+ for(int d = start; d < end; d++) {
final double xd = v1.doubleValue(d), yd = v2.doubleValue(d);
final double delta = (xd >= yd) ? xd - yd : yd - xd;
agg += Math.pow(delta, p);
}
- return Math.pow(agg, invp);
+ return agg;
}
- @Override
- public double doubleNorm(NumberVector<?> v) {
- final int dim = v.getDimensionality();
- double agg = 0.;
- for (int d = 0; d < dim; d++) {
+ /**
+ * Compute unscaled distance in a range of dimensions.
+ *
+ * @param v First vector
+ * @param mbr Second MBR
+ * @param start First dimension
+ * @param end Exclusive last dimension
+ * @param agg Current aggregate value
+ * @return Aggregated values.
+ */
+ private final double doublePreDistanceVM(NumberVector<?> v, SpatialComparable mbr, final int start, final int end, double agg) {
+ for(int d = start; d < end; d++) {
+ final double value = v.doubleValue(d), min = mbr.getMin(d);
+ double delta = min - value;
+ if(delta < 0.) {
+ delta = value - mbr.getMax(d);
+ }
+ if(delta > 0.) {
+ agg += Math.pow(delta, p);
+ }
+ }
+ return agg;
+ }
+
+ /**
+ * Compute unscaled distance in a range of dimensions.
+ *
+ * @param mbr1 First MBR
+ * @param mbr2 Second MBR
+ * @param start First dimension
+ * @param end Exclusive last dimension
+ * @param agg Current aggregate value
+ * @return Aggregated values.
+ */
+ private final double doublePreDistanceMBR(SpatialComparable mbr1, SpatialComparable mbr2, final int start, final int end, double agg) {
+ for(int d = start; d < end; d++) {
+ double delta = mbr2.getMin(d) - mbr1.getMax(d);
+ if(delta < 0.) {
+ delta = mbr1.getMin(d) - mbr2.getMax(d);
+ }
+ if(delta > 0.) {
+ agg += Math.pow(delta, p);
+ }
+ }
+ return agg;
+ }
+
+ /**
+ * Compute unscaled norm in a range of dimensions.
+ *
+ * @param v Data object
+ * @param start First dimension
+ * @param end Exclusive last dimension
+ * @param agg Current aggregate value
+ * @return Aggregated values.
+ */
+ private final double doublePreNorm(NumberVector<?> v, final int start, final int end, double agg) {
+ for(int d = start; d < end; d++) {
final double xd = v.doubleValue(d);
- agg += Math.pow(xd >= 0. ? xd : -xd, p);
+ final double delta = xd >= 0. ? xd : -xd;
+ agg += Math.pow(delta, p);
+ }
+ return agg;
+ }
+
+ /**
+ * Compute unscaled norm in a range of dimensions.
+ *
+ * @param mbr Data object
+ * @param start First dimension
+ * @param end Exclusive last dimension
+ * @param agg Current aggregate value
+ * @return Aggregated values.
+ */
+ private final double doublePreNormMBR(SpatialComparable mbr, final int start, final int end, double agg) {
+ for(int d = start; d < end; d++) {
+ double delta = mbr.getMin(d);
+ if(delta < 0.) {
+ delta = -mbr.getMax(d);
+ }
+ if(delta > 0.) {
+ agg += Math.pow(delta, p);
+ }
+ }
+ return agg;
+ }
+
+ @Override
+ public double doubleDistance(NumberVector<?> v1, NumberVector<?> v2) {
+ final int dim1 = v1.getDimensionality(), dim2 = v2.getDimensionality();
+ final int mindim = (dim1 < dim2) ? dim1 : dim2;
+ double agg = doublePreDistance(v1, v2, 0, mindim, 0.);
+ if(dim1 > mindim) {
+ agg = doublePreNorm(v1, mindim, dim1, agg);
+ }
+ else if(dim2 > mindim) {
+ agg = doublePreNorm(v2, mindim, dim2, agg);
}
return Math.pow(agg, invp);
}
@Override
+ public double doubleNorm(NumberVector<?> v) {
+ return Math.pow(doublePreNorm(v, 0, v.getDimensionality(), 0.), invp);
+ }
+
+ @Override
public double doubleMinDist(SpatialComparable mbr1, SpatialComparable mbr2) {
- // Optimization for the simplest case
- if (mbr1 instanceof NumberVector) {
- if (mbr2 instanceof NumberVector) {
- return doubleDistance((NumberVector<?>) mbr1, (NumberVector<?>) mbr2);
- }
- }
- // TODO: optimize for more simpler cases: obj vs. rect?
- final int dim = dimensionality(mbr1, mbr2);
+ final int dim1 = mbr1.getDimensionality(), dim2 = mbr2.getDimensionality();
+ final int mindim = (dim1 < dim2) ? dim1 : dim2;
+
+ final NumberVector<?> v1 = (mbr1 instanceof NumberVector) ? (NumberVector<?>) mbr1 : null;
+ final NumberVector<?> v2 = (mbr2 instanceof NumberVector) ? (NumberVector<?>) mbr2 : null;
double agg = 0.;
- for (int d = 0; d < dim; d++) {
- final double diff;
- final double d1 = mbr2.getMin(d) - mbr1.getMax(d);
- if (d1 > 0.) {
- diff = d1;
- } else {
- final double d2 = mbr1.getMin(d) - mbr2.getMax(d);
- if (d2 > 0.) {
- diff = d2;
- } else { // The mbrs intersect!
- continue;
- }
- }
- agg += Math.pow(diff, p);
+ if(v1 != null) {
+ if(v2 != null) {
+ agg = doublePreDistance(v1, v2, 0, mindim, agg);
+ }
+ else {
+ agg = doublePreDistanceVM(v1, mbr2, 0, mindim, agg);
+ }
+ }
+ else {
+ if(v2 != null) {
+ agg = doublePreDistanceVM(v2, mbr1, 0, mindim, agg);
+ }
+ else {
+ agg = doublePreDistanceMBR(mbr1, mbr2, 0, mindim, agg);
+ }
+ }
+ // first object has more dimensions.
+ if(dim1 > mindim) {
+ if(v1 != null) {
+ agg = doublePreNorm(v1, mindim, dim1, agg);
+ }
+ else {
+ agg = doublePreNormMBR(v1, mindim, dim1, agg);
+ }
+ }
+ // second object has more dimensions.
+ if(dim2 > mindim) {
+ if(v2 != null) {
+ agg = doublePreNorm(v2, mindim, dim2, agg);
+ }
+ else {
+ agg = doublePreNormMBR(mbr2, mindim, dim2, agg);
+ }
}
return Math.pow(agg, invp);
}
@Override
public boolean isMetric() {
- return (p >= 1);
+ return (p >= 1.);
}
@Override
@@ -137,15 +257,20 @@ public class LPNormDistanceFunction extends AbstractSpatialDoubleDistanceNorm {
@Override
public boolean equals(Object obj) {
- if (obj == null) {
+ if(obj == null) {
return false;
}
- if (obj instanceof LPNormDistanceFunction) {
+ if(obj instanceof LPNormDistanceFunction) {
return this.p == ((LPNormDistanceFunction) obj).p;
}
return false;
}
+ @Override
+ public SimpleTypeInformation<? super NumberVector<?>> getInputTypeRestriction() {
+ return TypeUtil.NUMBER_VECTOR_VARIABLE_LENGTH;
+ }
+
/**
* Parameterization class.
*
@@ -163,23 +288,26 @@ public class LPNormDistanceFunction extends AbstractSpatialDoubleDistanceNorm {
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
final DoubleParameter paramP = new DoubleParameter(P_ID);
- paramP.addConstraint(new GreaterConstraint(0.));
- if (config.grab(paramP)) {
+ paramP.addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE);
+ if(config.grab(paramP)) {
p = paramP.getValue();
}
}
@Override
protected LPNormDistanceFunction makeInstance() {
- if (p == 1.0) {
+ if(p == 1.) {
return ManhattanDistanceFunction.STATIC;
}
- if (p == 2.0) {
+ if(p == 2.) {
return EuclideanDistanceFunction.STATIC;
}
- if (p == Double.POSITIVE_INFINITY) {
+ if(p == Double.POSITIVE_INFINITY) {
return MaximumDistanceFunction.STATIC;
}
+ if(p == Math.round(p)) {
+ return new LPIntegerNormDistanceFunction((int) p);
+ }
return new LPNormDistanceFunction(p);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/ManhattanDistanceFunction.java b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/ManhattanDistanceFunction.java
index 9b8f80a7..3d5d061c 100644
--- a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/ManhattanDistanceFunction.java
+++ b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/ManhattanDistanceFunction.java
@@ -35,7 +35,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
* @author Arthur Zimek
*/
@Alias({ "taxicab", "cityblock", "l1", "ManhattanDistanceFunction", "de.lmu.ifi.dbs.elki.distance.distancefunction.ManhattanDistanceFunction" })
-public class ManhattanDistanceFunction extends LPNormDistanceFunction {
+public class ManhattanDistanceFunction extends LPIntegerNormDistanceFunction {
/**
* The static instance to use.
*/
@@ -49,73 +49,121 @@ public class ManhattanDistanceFunction extends LPNormDistanceFunction {
*/
@Deprecated
public ManhattanDistanceFunction() {
- super(1.);
+ super(1);
}
- @Override
- public double doubleDistance(NumberVector<?> v1, NumberVector<?> v2) {
- final int dim = dimensionality(v1, v2);
- double agg = 0.;
- for (int d = 0; d < dim; d++) {
+ private final double doublePreDistance(NumberVector<?> v1, NumberVector<?> v2, int start, int end, double agg) {
+ for (int d = start; d < end; d++) {
final double xd = v1.doubleValue(d), yd = v2.doubleValue(d);
- final double val = (xd >= yd) ? xd - yd : yd - xd;
- agg += val;
+ final double delta = (xd >= yd) ? xd - yd : yd - xd;
+ agg += delta;
}
return agg;
}
- @Override
- public double doubleNorm(NumberVector<?> v) {
- final int dim = v.getDimensionality();
- double agg = 0.;
- for (int d = 0; d < dim; d++) {
+ private final double doublePreDistanceVM(NumberVector<?> v, SpatialComparable mbr, int start, int end, double agg) {
+ for (int d = start; d < end; d++) {
+ final double value = v.doubleValue(d), min = mbr.getMin(d);
+ double delta = min - value;
+ if (delta < 0.) {
+ delta = value - mbr.getMax(d);
+ }
+ if (delta > 0.) {
+ agg += delta;
+ }
+ }
+ return agg;
+ }
+
+ private final double doublePreDistanceMBR(SpatialComparable mbr1, SpatialComparable mbr2, int start, int end, double agg) {
+ for (int d = start; d < end; d++) {
+ double delta = mbr2.getMin(d) - mbr1.getMax(d);
+ if (delta < 0.) {
+ delta = mbr1.getMin(d) - mbr2.getMax(d);
+ }
+ if (delta > 0.) {
+ agg += delta;
+ }
+ }
+ return agg;
+ }
+
+ private final double doublePreNorm(NumberVector<?> v, int start, int end, double agg) {
+ for (int d = start; d < end; d++) {
final double xd = v.doubleValue(d);
- agg += (xd >= 0.) ? xd : -xd;
+ final double delta = (xd >= 0.) ? xd : -xd;
+ agg += delta;
}
return agg;
}
- protected double doubleMinDistObject(NumberVector<?> v, SpatialComparable mbr) {
- final int dim = dimensionality(mbr, v);
- double agg = 0.;
- for (int d = 0; d < dim; d++) {
- final double value = v.doubleValue(d), min = mbr.getMin(d);
- if (value < min) {
- agg += min - value;
- } else {
- final double max = mbr.getMax(d);
- if (value > max) {
- agg += value - max;
- }
+ private final double doublePreNormMBR(SpatialComparable mbr, int start, int end, double agg) {
+ for (int d = start; d < end; d++) {
+ double delta = mbr.getMin(d);
+ if (delta < 0.) {
+ delta = -mbr.getMax(d);
+ }
+ if (delta > 0.) {
+ agg += delta;
}
}
return agg;
}
@Override
+ public double doubleDistance(NumberVector<?> v1, NumberVector<?> v2) {
+ final int dim1 = v1.getDimensionality(), dim2 = v2.getDimensionality();
+ final int mindim = (dim1 < dim2) ? dim1 : dim2;
+ double agg = doublePreDistance(v1, v2, 0, mindim, 0.);
+ if (dim1 > mindim) {
+ agg = doublePreNorm(v1, mindim, dim1, agg);
+ } else if (dim2 > mindim) {
+ agg = doublePreNorm(v2, mindim, dim2, agg);
+ }
+ return agg;
+ }
+
+ @Override
+ public double doubleNorm(NumberVector<?> v) {
+ return doublePreNorm(v, 0, v.getDimensionality(), 0.);
+ }
+
+ @Override
public double doubleMinDist(SpatialComparable mbr1, SpatialComparable mbr2) {
- // Some optimizations for simpler cases.
- if (mbr1 instanceof NumberVector) {
- if (mbr2 instanceof NumberVector) {
- return doubleDistance((NumberVector<?>) mbr1, (NumberVector<?>) mbr2);
+ final int dim1 = mbr1.getDimensionality(), dim2 = mbr2.getDimensionality();
+ final int mindim = (dim1 < dim2) ? dim1 : dim2;
+
+ final NumberVector<?> v1 = (mbr1 instanceof NumberVector) ? (NumberVector<?>) mbr1 : null;
+ final NumberVector<?> v2 = (mbr2 instanceof NumberVector) ? (NumberVector<?>) mbr2 : null;
+
+ double agg = 0.;
+ if (v1 != null) {
+ if (v2 != null) {
+ agg = doublePreDistance(v1, v2, 0, mindim, agg);
+ } else {
+ agg = doublePreDistanceVM(v1, mbr2, 0, mindim, agg);
+ }
+ } else {
+ if (v2 != null) {
+ agg = doublePreDistanceVM(v2, mbr1, 0, mindim, agg);
} else {
- return doubleMinDistObject((NumberVector<?>) mbr1, mbr2);
+ agg = doublePreDistanceMBR(mbr1, mbr2, 0, mindim, agg);
}
- } else if (mbr2 instanceof NumberVector) {
- return doubleMinDistObject((NumberVector<?>) mbr2, mbr1);
}
- final int dim = dimensionality(mbr1, mbr2);
-
- double agg = 0.;
- for (int d = 0; d < dim; d++) {
- final double d1 = mbr2.getMin(d) - mbr1.getMax(d);
- if (d1 > 0.) {
- agg += d1;
+ // first object has more dimensions.
+ if (dim1 > mindim) {
+ if (v1 != null) {
+ agg = doublePreNorm(v1, mindim, dim1, agg);
+ } else {
+ agg = doublePreNormMBR(v1, mindim, dim1, agg);
+ }
+ }
+ // second object has more dimensions.
+ if (dim2 > mindim) {
+ if (v2 != null) {
+ agg = doublePreNorm(v2, mindim, dim2, agg);
} else {
- final double d2 = mbr1.getMin(d) - mbr2.getMax(d);
- if (d2 > 0.) {
- agg += d2;
- }
+ agg = doublePreNormMBR(mbr2, mindim, dim2, agg);
}
}
return agg;
diff --git a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/MaximumDistanceFunction.java b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/MaximumDistanceFunction.java
index 1b54e278..2cc1b10c 100644
--- a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/MaximumDistanceFunction.java
+++ b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/MaximumDistanceFunction.java
@@ -52,61 +52,122 @@ public class MaximumDistanceFunction extends LPNormDistanceFunction {
super(Double.POSITIVE_INFINITY);
}
- @Override
- public double doubleDistance(NumberVector<?> v1, NumberVector<?> v2) {
- final int dim = dimensionality(v1, v2);
- double agg = 0.;
- for (int d = 0; d < dim; d++) {
+ private final double doublePreDistance(NumberVector<?> v1, NumberVector<?> v2, int start, int end, double agg) {
+ for (int d = start; d < end; d++) {
final double xd = v1.doubleValue(d), yd = v2.doubleValue(d);
- final double val = (xd >= yd) ? xd - yd : yd - xd;
- if (val > agg) {
- agg = val;
+ final double delta = (xd >= yd) ? xd - yd : yd - xd;
+ if (delta > agg) {
+ agg = delta;
}
}
return agg;
}
- @Override
- public double doubleNorm(NumberVector<?> v) {
- final int dim = v.getDimensionality();
- double agg = 0.;
- for (int d = 0; d < dim; d++) {
+ private final double doublePreDistanceVM(NumberVector<?> v, SpatialComparable mbr, int start, int end, double agg) {
+ for (int d = start; d < end; d++) {
+ final double value = v.doubleValue(d), min = mbr.getMin(d);
+ double delta = min - value;
+ if (delta < 0.) {
+ delta = value - mbr.getMax(d);
+ }
+ if (delta > agg) {
+ agg = delta;
+ }
+ }
+ return agg;
+ }
+
+ private final double doublePreDistanceMBR(SpatialComparable mbr1, SpatialComparable mbr2, int start, int end, double agg) {
+ for (int d = start; d < end; d++) {
+ double delta = mbr2.getMin(d) - mbr1.getMax(d);
+ if (delta < 0.) {
+ delta = mbr1.getMin(d) - mbr2.getMax(d);
+ }
+ if (delta > agg) {
+ agg = delta;
+ }
+ }
+ return agg;
+ }
+
+ private final double doublePreNorm(NumberVector<?> v, int start, int end, double agg) {
+ for (int d = start; d < end; d++) {
final double xd = v.doubleValue(d);
- final double val = (xd >= 0.) ? xd : -xd;
- if (val > agg) {
- agg = val;
+ final double delta = (xd >= 0.) ? xd : -xd;
+ if (delta > agg) {
+ agg = delta;
+ }
+ }
+ return agg;
+ }
+
+ private final double doublePreNormMBR(SpatialComparable mbr, int start, int end, double agg) {
+ for (int d = start; d < end; d++) {
+ double delta = mbr.getMin(d);
+ if (delta < 0.) {
+ delta = -mbr.getMax(d);
+ }
+ if (delta > agg) {
+ agg = delta;
}
}
return agg;
}
@Override
+ public double doubleDistance(NumberVector<?> v1, NumberVector<?> v2) {
+ final int dim1 = v1.getDimensionality(), dim2 = v2.getDimensionality();
+ final int mindim = (dim1 < dim2) ? dim1 : dim2;
+ double agg = doublePreDistance(v1, v2, 0, mindim, 0.);
+ if (dim1 > mindim) {
+ agg = doublePreNorm(v1, mindim, dim1, agg);
+ } else if (dim2 > mindim) {
+ agg = doublePreNorm(v2, mindim, dim2, agg);
+ }
+ return agg;
+ }
+
+ @Override
+ public double doubleNorm(NumberVector<?> v) {
+ return doublePreNorm(v, 0, v.getDimensionality(), 0.);
+ }
+
+ @Override
public double doubleMinDist(SpatialComparable mbr1, SpatialComparable mbr2) {
- // Some optimizations for simpler cases.
- if (mbr1 instanceof NumberVector) {
- if (mbr2 instanceof NumberVector) {
- return doubleDistance((NumberVector<?>) mbr1, (NumberVector<?>) mbr2);
+ final int dim1 = mbr1.getDimensionality(), dim2 = mbr2.getDimensionality();
+ final int mindim = (dim1 < dim2) ? dim1 : dim2;
+
+ final NumberVector<?> v1 = (mbr1 instanceof NumberVector) ? (NumberVector<?>) mbr1 : null;
+ final NumberVector<?> v2 = (mbr2 instanceof NumberVector) ? (NumberVector<?>) mbr2 : null;
+
+ double agg = 0.;
+ if (v1 != null) {
+ if (v2 != null) {
+ agg = doublePreDistance(v1, v2, 0, mindim, agg);
+ } else {
+ agg = doublePreDistanceVM(v1, mbr2, 0, mindim, agg);
+ }
+ } else {
+ if (v2 != null) {
+ agg = doublePreDistanceVM(v2, mbr1, 0, mindim, agg);
+ } else {
+ agg = doublePreDistanceMBR(mbr1, mbr2, 0, mindim, agg);
}
}
- // TODO: add optimization for point to MBR?
- final int dim = dimensionality(mbr1, mbr2);
- double agg = 0.;
- for (int d = 0; d < dim; d++) {
- final double diff;
- final double d1 = mbr2.getMin(d) - mbr1.getMax(d);
- if (d1 > 0.) {
- diff = d1;
+ // first object has more dimensions.
+ if (dim1 > mindim) {
+ if (v1 != null) {
+ agg = doublePreNorm(v1, mindim, dim1, agg);
} else {
- final double d2 = mbr1.getMin(d) - mbr2.getMax(d);
- if (d2 > 0.) {
- diff = d2;
- } else {
- // The objects overlap in this dimension.
- continue;
- }
+ agg = doublePreNormMBR(v1, mindim, dim1, agg);
}
- if (diff > agg) {
- agg = diff;
+ }
+ // second object has more dimensions.
+ if (dim2 > mindim) {
+ if (v2 != null) {
+ agg = doublePreNorm(v2, mindim, dim2, agg);
+ } else {
+ agg = doublePreNormMBR(mbr2, mindim, dim2, agg);
}
}
return agg;
diff --git a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/SparseEuclideanDistanceFunction.java b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/SparseEuclideanDistanceFunction.java
index bf621103..8fcf0c84 100644
--- a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/SparseEuclideanDistanceFunction.java
+++ b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/SparseEuclideanDistanceFunction.java
@@ -23,8 +23,6 @@ package de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import java.util.BitSet;
-
import de.lmu.ifi.dbs.elki.data.SparseNumberVector;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
@@ -46,50 +44,56 @@ public class SparseEuclideanDistanceFunction extends SparseLPNormDistanceFunctio
*/
@Deprecated
public SparseEuclideanDistanceFunction() {
- super(2.0);
+ super(2.);
}
@Override
public double doubleDistance(SparseNumberVector<?> v1, SparseNumberVector<?> v2) {
// Get the bit masks
- BitSet b1 = v1.getNotNullMask();
- BitSet b2 = v2.getNotNullMask();
- double accu = 0;
- int i1 = b1.nextSetBit(0);
- int i2 = b2.nextSetBit(0);
- while (true) {
- if (i1 == i2) {
- if (i1 < 0) {
- break;
- }
- // Both vectors have a value.
- double val = v1.doubleValue(i1) - v2.doubleValue(i2);
- accu += val * val;
- i1 = b1.nextSetBit(i1 + 1);
- i2 = b2.nextSetBit(i2 + 1);
- } else if (i2 < 0 || (i1 < i2 && i1 >= 0)) {
+ double accu = 0.;
+ int i1 = v1.iter(), i2 = v2.iter();
+ while(v1.iterValid(i1) && v2.iterValid(i2)) {
+ final int d1 = v1.iterDim(i1), d2 = v2.iterDim(i2);
+ if(d1 < d2) {
// In first only
- double val = v1.doubleValue(i1);
+ final double val = v1.iterDoubleValue(i1);
accu += val * val;
- i1 = b1.nextSetBit(i1 + 1);
- } else {
+ i1 = v1.iterAdvance(i1);
+ }
+ else if(d2 < d1) {
// In second only
- double val = v2.doubleValue(i2);
+ final double val = v2.iterDoubleValue(i2);
+ accu += val * val;
+ i2 = v2.iterAdvance(i2);
+ }
+ else {
+ // Both vectors have a value.
+ final double val = v1.iterDoubleValue(i1) - v2.iterDoubleValue(i2);
accu += val * val;
- i2 = b2.nextSetBit(i2 + 1);
+ i1 = v1.iterAdvance(i1);
+ i2 = v2.iterAdvance(i2);
}
}
+ while(v1.iterValid(i1)) {
+ // In first only
+ final double val = v1.iterDoubleValue(i1);
+ accu += val * val;
+ i1 = v1.iterAdvance(i1);
+ }
+ while(v2.iterValid(i2)) {
+ // In second only
+ final double val = v2.iterDoubleValue(i2);
+ accu += val * val;
+ i2 = v2.iterAdvance(i2);
+ }
return Math.sqrt(accu);
}
@Override
public double doubleNorm(SparseNumberVector<?> v1) {
- double accu = 0;
- // Get the bit masks
- BitSet b1 = v1.getNotNullMask();
- // Set in first only
- for (int i = b1.nextSetBit(0); i >= 0; i = b1.nextSetBit(i + 1)) {
- double val = v1.doubleValue(i);
+ double accu = 0.;
+ for(int it = v1.iter(); v1.iterValid(it); it = v1.iterAdvance(it)) {
+ final double val = v1.iterDoubleValue(it);
accu += val * val;
}
return Math.sqrt(accu);
diff --git a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/SparseLPNormDistanceFunction.java b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/SparseLPNormDistanceFunction.java
index 53d63ff8..9fbd9f7f 100644
--- a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/SparseLPNormDistanceFunction.java
+++ b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/SparseLPNormDistanceFunction.java
@@ -23,8 +23,6 @@ package de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import java.util.BitSet;
-
import de.lmu.ifi.dbs.elki.data.SparseNumberVector;
import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
@@ -32,7 +30,7 @@ import de.lmu.ifi.dbs.elki.distance.distancefunction.AbstractPrimitiveDistanceFu
import de.lmu.ifi.dbs.elki.distance.distancefunction.DoubleNorm;
import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
@@ -46,59 +44,67 @@ public class SparseLPNormDistanceFunction extends AbstractPrimitiveDistanceFunct
/**
* Keeps the currently set p.
*/
- private double p;
+ private double p, invp;
/**
* Provides a LP-Norm for FeatureVectors.
*/
public SparseLPNormDistanceFunction(double p) {
super();
+ this.p = p;
+ this.invp = 1. / p;
}
@Override
public double doubleDistance(SparseNumberVector<?> v1, SparseNumberVector<?> v2) {
// Get the bit masks
- BitSet b1 = v1.getNotNullMask();
- BitSet b2 = v2.getNotNullMask();
- double accu = 0;
- int i1 = b1.nextSetBit(0);
- int i2 = b2.nextSetBit(0);
- while (true) {
- if (i1 == i2) {
- if (i1 < 0) {
- break;
- }
- // Both vectors have a value.
- double val = Math.abs(v1.doubleValue(i1) - v2.doubleValue(i2));
- accu += Math.pow(val, p);
- i1 = b1.nextSetBit(i1 + 1);
- i2 = b2.nextSetBit(i2 + 1);
- } else if (i2 < 0 || (i1 < i2 && i1 >= 0)) {
+ double accu = 0.;
+ int i1 = v1.iter(), i2 = v2.iter();
+ while(v1.iterValid(i1) && v2.iterValid(i2)) {
+ final int d1 = v1.iterDim(i1), d2 = v2.iterDim(i2);
+ if(d1 < d2) {
// In first only
- double val = Math.abs(v1.doubleValue(i1));
+ final double val = Math.abs(v1.iterDoubleValue(i1));
accu += Math.pow(val, p);
- i1 = b1.nextSetBit(i1 + 1);
- } else {
+ i1 = v1.iterAdvance(i1);
+ }
+ else if(d2 < d1) {
// In second only
- double val = Math.abs(v2.doubleValue(i2));
+ final double val = Math.abs(v2.iterDoubleValue(i2));
+ accu += Math.pow(val, p);
+ i2 = v2.iterAdvance(i2);
+ }
+ else {
+ // Both vectors have a value.
+ final double val = Math.abs(v1.iterDoubleValue(i1) - v2.iterDoubleValue(i2));
accu += Math.pow(val, p);
- i2 = b2.nextSetBit(i2 + 1);
+ i1 = v1.iterAdvance(i1);
+ i2 = v2.iterAdvance(i2);
}
}
- return Math.pow(accu, 1.0 / p);
+ while(v1.iterValid(i1)) {
+ // In first only
+ final double val = Math.abs(v1.iterDoubleValue(i1));
+ accu += Math.pow(val, p);
+ i1 = v1.iterAdvance(i1);
+ }
+ while(v2.iterValid(i2)) {
+ // In second only
+ final double val = Math.abs(v2.iterDoubleValue(i2));
+ accu += Math.pow(val, p);
+ i2 = v2.iterAdvance(i2);
+ }
+ return Math.pow(accu, invp);
}
@Override
public double doubleNorm(SparseNumberVector<?> v1) {
- double sqrDist = 0;
- // Get the bit masks
- BitSet b1 = v1.getNotNullMask();
- // Set in first only
- for(int i = b1.nextSetBit(0); i >= 0; i = b1.nextSetBit(i + 1)) {
- double manhattanI = Math.abs(v1.doubleValue(i));
- sqrDist += Math.pow(manhattanI, p);
+ double accu = 0.;
+ for(int it = v1.iter(); v1.iterValid(it); it = v1.iterAdvance(it)) {
+ final double val = Math.abs(v1.iterDoubleValue(it));
+ accu += Math.pow(val, p);
}
- return Math.pow(sqrDist, 1.0 / p);
+ return Math.pow(accu, invp);
}
@Override
@@ -123,7 +129,7 @@ public class SparseLPNormDistanceFunction extends AbstractPrimitiveDistanceFunct
@Override
public boolean isMetric() {
- return (p >= 1);
+ return (p >= 1.);
}
/**
@@ -137,13 +143,13 @@ public class SparseLPNormDistanceFunction extends AbstractPrimitiveDistanceFunct
/**
* Value for p
*/
- double p = 2.0;
+ double p = 2.;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
DoubleParameter pP = new DoubleParameter(LPNormDistanceFunction.P_ID);
- pP.addConstraint(new GreaterConstraint(0));
+ pP.addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE);
if(config.grab(pP)) {
p = pP.getValue();
}
@@ -151,10 +157,10 @@ public class SparseLPNormDistanceFunction extends AbstractPrimitiveDistanceFunct
@Override
protected SparseLPNormDistanceFunction makeInstance() {
- if(p == 2.0) {
+ if(p == 2.) {
return SparseEuclideanDistanceFunction.STATIC;
}
- if(p == 1.0) {
+ if(p == 1.) {
return SparseManhattanDistanceFunction.STATIC;
}
if(p == Double.POSITIVE_INFINITY) {
diff --git a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/SparseManhattanDistanceFunction.java b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/SparseManhattanDistanceFunction.java
index 4e397e0c..b22aaad7 100644
--- a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/SparseManhattanDistanceFunction.java
+++ b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/SparseManhattanDistanceFunction.java
@@ -1,4 +1,5 @@
package de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski;
+
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
@@ -22,8 +23,6 @@ package de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import java.util.BitSet;
-
import de.lmu.ifi.dbs.elki.data.SparseNumberVector;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
@@ -37,7 +36,7 @@ public class SparseManhattanDistanceFunction extends SparseLPNormDistanceFunctio
* Static instance
*/
public static final SparseManhattanDistanceFunction STATIC = new SparseManhattanDistanceFunction();
-
+
/**
* Constructor.
*
@@ -45,52 +44,59 @@ public class SparseManhattanDistanceFunction extends SparseLPNormDistanceFunctio
*/
@Deprecated
public SparseManhattanDistanceFunction() {
- super(1.0);
+ super(1.);
}
@Override
public double doubleDistance(SparseNumberVector<?> v1, SparseNumberVector<?> v2) {
// Get the bit masks
- BitSet b1 = v1.getNotNullMask();
- BitSet b2 = v2.getNotNullMask();
- double accu = 0;
- int i1 = b1.nextSetBit(0);
- int i2 = b2.nextSetBit(0);
- while (true) {
- if (i1 == i2) {
- if (i1 < 0) {
- break;
- }
- // Both vectors have a value.
- double val = Math.abs(v1.doubleValue(i1) - v2.doubleValue(i2));
- accu += val;
- i1 = b1.nextSetBit(i1 + 1);
- i2 = b2.nextSetBit(i2 + 1);
- } else if (i2 < 0 || (i1 < i2 && i1 >= 0)) {
+ double accu = 0.;
+ int i1 = v1.iter(), i2 = v2.iter();
+ while(v1.iterValid(i1) && v2.iterValid(i2)) {
+ final int d1 = v1.iterDim(i1), d2 = v2.iterDim(i2);
+ if(d1 < d2) {
// In first only
- double val = Math.abs(v1.doubleValue(i1));
+ final double val = Math.abs(v1.iterDoubleValue(i1));
accu += val;
- i1 = b1.nextSetBit(i1 + 1);
- } else {
+ i1 = v1.iterAdvance(i1);
+ }
+ else if(d2 < d1) {
// In second only
- double val = Math.abs(v2.doubleValue(i2));
+ final double val = Math.abs(v2.iterDoubleValue(i2));
accu += val;
- i2 = b2.nextSetBit(i2 + 1);
+ i2 = v2.iterAdvance(i2);
}
+ else {
+ // Both vectors have a value.
+ final double val = Math.abs(v1.iterDoubleValue(i1) - v2.iterDoubleValue(i2));
+ accu += val;
+ i1 = v1.iterAdvance(i1);
+ i2 = v2.iterAdvance(i2);
+ }
+ }
+ while(v1.iterValid(i1)) {
+ // In first only
+ final double val = Math.abs(v1.iterDoubleValue(i1));
+ accu += val;
+ i1 = v1.iterAdvance(i1);
+ }
+ while(v2.iterValid(i2)) {
+ // In second only
+ final double val = Math.abs(v2.iterDoubleValue(i2));
+ accu += val;
+ i2 = v2.iterAdvance(i2);
}
return accu;
}
@Override
public double doubleNorm(SparseNumberVector<?> v1) {
- double sqrDist = 0;
- // Get the bit masks
- BitSet b1 = v1.getNotNullMask();
- // Set in first only
- for(int i = b1.nextSetBit(0); i >= 0; i = b1.nextSetBit(i + 1)) {
- sqrDist += Math.abs(v1.doubleValue(i));
+ double accu = 0.;
+ for(int it = v1.iter(); v1.iterValid(it); it = v1.iterAdvance(it)) {
+ final double val = Math.abs(v1.iterDoubleValue(it));
+ accu += val;
}
- return sqrDist;
+ return accu;
}
/**
diff --git a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/SparseMaximumDistanceFunction.java b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/SparseMaximumDistanceFunction.java
index f01ae32b..4a516f7a 100644
--- a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/SparseMaximumDistanceFunction.java
+++ b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/SparseMaximumDistanceFunction.java
@@ -1,9 +1,5 @@
package de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski;
-import java.util.BitSet;
-
-import de.lmu.ifi.dbs.elki.data.SparseNumberVector;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
@@ -26,6 +22,8 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+import de.lmu.ifi.dbs.elki.data.SparseNumberVector;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
/**
* Maximum distance function. Optimized for sparse vectors.
@@ -37,7 +35,7 @@ public class SparseMaximumDistanceFunction extends SparseLPNormDistanceFunction
* Static instance
*/
public static final SparseMaximumDistanceFunction STATIC = new SparseMaximumDistanceFunction();
-
+
/**
* Constructor.
*
@@ -51,46 +49,65 @@ public class SparseMaximumDistanceFunction extends SparseLPNormDistanceFunction
@Override
public double doubleDistance(SparseNumberVector<?> v1, SparseNumberVector<?> v2) {
// Get the bit masks
- BitSet b1 = v1.getNotNullMask();
- BitSet b2 = v2.getNotNullMask();
- double accu = 0;
- int i1 = b1.nextSetBit(0);
- int i2 = b2.nextSetBit(0);
- while (true) {
- if (i1 == i2) {
- if (i1 < 0) {
- break;
- }
- // Both vectors have a value.
- double val = Math.abs(v1.doubleValue(i1) - v2.doubleValue(i2));
- accu = Math.max(accu, val);
- i1 = b1.nextSetBit(i1 + 1);
- i2 = b2.nextSetBit(i2 + 1);
- } else if (i2 < 0 || (i1 < i2 && i1 >= 0)) {
+ double accu = 0.;
+ int i1 = v1.iter(), i2 = v2.iter();
+ while(v1.iterValid(i1) && v2.iterValid(i2)) {
+ final int d1 = v1.iterDim(i1), d2 = v2.iterDim(i2);
+ if(d1 < d2) {
// In first only
- double val = Math.abs(v1.doubleValue(i1));
- accu = Math.max(accu, val);
- i1 = b1.nextSetBit(i1 + 1);
- } else {
+ final double val = Math.abs(v1.iterDoubleValue(i1));
+ if(val > accu) {
+ accu = val;
+ }
+ i1 = v1.iterAdvance(i1);
+ }
+ else if(d2 < d1) {
// In second only
- double val = Math.abs(v2.doubleValue(i2));
- accu = Math.max(accu, val);
- i2 = b2.nextSetBit(i2 + 1);
+ final double val = Math.abs(v2.iterDoubleValue(i2));
+ if(val > accu) {
+ accu = val;
+ }
+ i2 = v2.iterAdvance(i2);
}
+ else {
+ // Both vectors have a value.
+ final double val = Math.abs(v1.iterDoubleValue(i1) - v2.iterDoubleValue(i2));
+ if(val > accu) {
+ accu = val;
+ }
+ i1 = v1.iterAdvance(i1);
+ i2 = v2.iterAdvance(i2);
+ }
+ }
+ while(v1.iterValid(i1)) {
+ // In first only
+ final double val = Math.abs(v1.iterDoubleValue(i1));
+ if(val > accu) {
+ accu = val;
+ }
+ i1 = v1.iterAdvance(i1);
+ }
+ while(v2.iterValid(i2)) {
+ // In second only
+ final double val = Math.abs(v2.iterDoubleValue(i2));
+ if(val > accu) {
+ accu = val;
+ }
+ i2 = v2.iterAdvance(i2);
}
return accu;
}
@Override
public double doubleNorm(SparseNumberVector<?> v1) {
- double sqrDist = 0;
- // Get the bit masks
- BitSet b1 = v1.getNotNullMask();
- // Set in first only
- for(int i = b1.nextSetBit(0); i >= 0; i = b1.nextSetBit(i + 1)) {
- sqrDist = Math.max(sqrDist, Math.abs(v1.doubleValue(i)));
+ double accu = 0.;
+ for(int it = v1.iter(); v1.iterValid(it); it = v1.iterAdvance(it)) {
+ final double val = Math.abs(v1.iterDoubleValue(it));
+ if(val > accu) {
+ accu = val;
+ }
}
- return sqrDist;
+ return accu;
}
/**
diff --git a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/WeightedEuclideanDistanceFunction.java b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/WeightedEuclideanDistanceFunction.java
index 2bc85aae..4fb4e6a2 100644
--- a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/WeightedEuclideanDistanceFunction.java
+++ b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/WeightedEuclideanDistanceFunction.java
@@ -43,74 +43,143 @@ public class WeightedEuclideanDistanceFunction extends WeightedLPNormDistanceFun
super(2.0, weights);
}
- /**
- * Provides the Euclidean distance between the given two vectors.
- *
- * @return the Euclidean distance between the given two vectors as raw double
- * value
- */
- @Override
- public double doubleDistance(NumberVector<?> v1, NumberVector<?> v2) {
- final int dim = dimensionality(v1, v2, weights.length);
- double agg = 0.;
- for (int d = 0; d < dim; d++) {
- final double delta = (v1.doubleValue(d) - v2.doubleValue(d));
+ private final double doublePreDistance(NumberVector<?> v1, NumberVector<?> v2, final int start, final int end, double agg) {
+ for(int d = start; d < end; d++) {
+ final double xd = v1.doubleValue(d), yd = v2.doubleValue(d);
+ final double delta = xd - yd;
agg += delta * delta * weights[d];
}
- return Math.sqrt(agg);
+ return agg;
+ }
+
+ private final double doublePreDistanceVM(NumberVector<?> v, SpatialComparable mbr, final int start, final int end, double agg) {
+ for(int d = start; d < end; d++) {
+ final double value = v.doubleValue(d), min = mbr.getMin(d);
+ double delta = min - value;
+ if(delta < 0.) {
+ delta = value - mbr.getMax(d);
+ }
+ if(delta > 0.) {
+ agg += delta * delta * weights[d];
+ }
+ }
+ return agg;
+ }
+
+ private final double doublePreDistanceMBR(SpatialComparable mbr1, SpatialComparable mbr2, final int start, final int end, double agg) {
+ for(int d = start; d < end; d++) {
+ double delta = mbr2.getMin(d) - mbr1.getMax(d);
+ if(delta < 0.) {
+ delta = mbr1.getMin(d) - mbr2.getMax(d);
+ }
+ if(delta > 0.) {
+ agg += delta * delta * weights[d];
+ }
+ }
+ return agg;
+ }
+
+ private final double doublePreNorm(NumberVector<?> v, final int start, final int end, double agg) {
+ for(int d = start; d < end; d++) {
+ final double xd = v.doubleValue(d);
+ agg += xd * xd * weights[d];
+ }
+ return agg;
+ }
+
+ private final double doublePreNormMBR(SpatialComparable mbr, final int start, final int end, double agg) {
+ for(int d = start; d < end; d++) {
+ double delta = mbr.getMin(d);
+ if(delta < 0.) {
+ delta = -mbr.getMax(d);
+ }
+ if(delta > 0.) {
+ agg += delta * delta * weights[d];
+ }
+ }
+ return agg;
}
@Override
- public double doubleNorm(NumberVector<?> obj) {
- final int dim = obj.getDimensionality();
- double agg = 0.;
- for (int d = 0; d < dim; d++) {
- final double delta = obj.doubleValue(dim);
- agg += delta * delta * weights[d];
+ public double doubleDistance(NumberVector<?> v1, NumberVector<?> v2) {
+ final int dim1 = v1.getDimensionality(), dim2 = v2.getDimensionality();
+ final int mindim = (dim1 < dim2) ? dim1 : dim2;
+ double agg = doublePreDistance(v1, v2, 0, mindim, 0.);
+ if(dim1 > mindim) {
+ agg = doublePreNorm(v1, mindim, dim1, agg);
+ }
+ else if(dim2 > mindim) {
+ agg = doublePreNorm(v2, mindim, dim2, agg);
}
return Math.sqrt(agg);
}
@Override
+ public double doubleNorm(NumberVector<?> v) {
+ return Math.sqrt(doublePreNorm(v, 0, v.getDimensionality(), 0.));
+ }
+
+ @Override
public double doubleMinDist(SpatialComparable mbr1, SpatialComparable mbr2) {
- // Optimization for the simplest case
- if (mbr1 instanceof NumberVector) {
- if (mbr2 instanceof NumberVector) {
- return doubleDistance((NumberVector<?>) mbr1, (NumberVector<?>) mbr2);
+ final int dim1 = mbr1.getDimensionality(), dim2 = mbr2.getDimensionality();
+ final int mindim = (dim1 < dim2) ? dim1 : dim2;
+
+ final NumberVector<?> v1 = (mbr1 instanceof NumberVector) ? (NumberVector<?>) mbr1 : null;
+ final NumberVector<?> v2 = (mbr2 instanceof NumberVector) ? (NumberVector<?>) mbr2 : null;
+
+ double agg = 0.;
+ if(v1 != null) {
+ if(v2 != null) {
+ agg = doublePreDistance(v1, v2, 0, mindim, agg);
+ }
+ else {
+ agg = doublePreDistanceVM(v1, mbr2, 0, mindim, agg);
+ }
+ }
+ else {
+ if(v2 != null) {
+ agg = doublePreDistanceVM(v2, mbr1, 0, mindim, agg);
+ }
+ else {
+ agg = doublePreDistanceMBR(mbr1, mbr2, 0, mindim, agg);
}
}
- // TODO: optimize for more simpler cases: obj vs. rect?
- final int dim = dimensionality(mbr1, mbr2, weights.length);
- double agg = 0;
- for (int d = 0; d < dim; d++) {
- final double diff;
- if (mbr1.getMax(d) < mbr2.getMin(d)) {
- diff = mbr2.getMin(d) - mbr1.getMax(d);
- } else if (mbr1.getMin(d) > mbr2.getMax(d)) {
- diff = mbr1.getMin(d) - mbr2.getMax(d);
- } else { // The mbrs intersect!
- continue;
- }
- agg += diff * diff * weights[d];
+ // first object has more dimensions.
+ if(dim1 > mindim) {
+ if(v1 != null) {
+ agg = doublePreNorm(v1, mindim, dim1, agg);
+ }
+ else {
+ agg = doublePreNormMBR(v1, mindim, dim1, agg);
+ }
+ }
+ // second object has more dimensions.
+ if(dim2 > mindim) {
+ if(v2 != null) {
+ agg = doublePreNorm(v2, mindim, dim2, agg);
+ }
+ else {
+ agg = doublePreNormMBR(mbr2, mindim, dim2, agg);
+ }
}
return Math.sqrt(agg);
}
@Override
public boolean equals(Object obj) {
- if (this == obj) {
+ if(this == obj) {
return true;
}
- if (obj == null) {
+ if(obj == null) {
return false;
}
- if (!(obj instanceof WeightedEuclideanDistanceFunction)) {
- if (obj.getClass().equals(WeightedLPNormDistanceFunction.class)) {
+ if(!(obj instanceof WeightedEuclideanDistanceFunction)) {
+ if(obj.getClass().equals(WeightedLPNormDistanceFunction.class)) {
return super.equals(obj);
}
- if (obj.getClass().equals(EuclideanDistanceFunction.class)) {
- for (double d : weights) {
- if (d != 1.0) {
+ if(obj.getClass().equals(EuclideanDistanceFunction.class)) {
+ for(double d : weights) {
+ if(d != 1.0) {
return false;
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/WeightedLPNormDistanceFunction.java b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/WeightedLPNormDistanceFunction.java
index 3d49c95a..48a9c5a2 100644
--- a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/WeightedLPNormDistanceFunction.java
+++ b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/WeightedLPNormDistanceFunction.java
@@ -27,13 +27,14 @@ import java.util.Arrays;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.spatial.SpatialComparable;
+import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
/**
* Weighted version of the Minkowski L_p metrics distance function.
*
* @author Erich Schubert
*/
-// TODO: make parameterizable; add optimized variants
public class WeightedLPNormDistanceFunction extends LPNormDistanceFunction {
/**
* Weight array
@@ -51,49 +52,119 @@ public class WeightedLPNormDistanceFunction extends LPNormDistanceFunction {
this.weights = weights;
}
+ private final double doublePreDistance(NumberVector<?> v1, NumberVector<?> v2, final int start, final int end, double agg) {
+ for (int d = start; d < end; d++) {
+ final double xd = v1.doubleValue(d), yd = v2.doubleValue(d);
+ final double delta = (xd >= yd) ? xd - yd : yd - xd;
+ agg += Math.pow(delta, p) * weights[d];
+ }
+ return agg;
+ }
+
+ private final double doublePreDistanceVM(NumberVector<?> v, SpatialComparable mbr, final int start, final int end, double agg) {
+ for (int d = start; d < end; d++) {
+ final double value = v.doubleValue(d), min = mbr.getMin(d);
+ double delta = min - value;
+ if (delta < 0.) {
+ delta = value - mbr.getMax(d);
+ }
+ if (delta > 0.) {
+ agg += Math.pow(delta, p) * weights[d];
+ }
+ }
+ return agg;
+ }
+
+ private final double doublePreDistanceMBR(SpatialComparable mbr1, SpatialComparable mbr2, final int start, final int end, double agg) {
+ for (int d = start; d < end; d++) {
+ double delta = mbr2.getMin(d) - mbr1.getMax(d);
+ if (delta < 0.) {
+ delta = mbr1.getMin(d) - mbr2.getMax(d);
+ }
+ if (delta > 0.) {
+ agg += Math.pow(delta, p) * weights[d];
+ }
+ }
+ return agg;
+ }
+
+ private final double doublePreNorm(NumberVector<?> v, final int start, final int end, double agg) {
+ for (int d = start; d < end; d++) {
+ final double xd = v.doubleValue(d);
+ final double delta = xd >= 0. ? xd : -xd;
+ agg += Math.pow(delta, p) * weights[d];
+ }
+ return agg;
+ }
+
+ private final double doublePreNormMBR(SpatialComparable mbr, final int start, final int end, double agg) {
+ for (int d = start; d < end; d++) {
+ double delta = mbr.getMin(d);
+ if (delta < 0.) {
+ delta = -mbr.getMax(d);
+ }
+ if (delta > 0.) {
+ agg += Math.pow(delta, p) * weights[d];
+ }
+ }
+ return agg;
+ }
+
@Override
public double doubleDistance(NumberVector<?> v1, NumberVector<?> v2) {
- final int dim = dimensionality(v1, v2, weights.length);
- double agg = 0;
- for (int d = 0; d < dim; d++) {
- final double delta = Math.abs(v1.doubleValue(d) - v2.doubleValue(d));
- agg += Math.pow(delta, p) * weights[d];
+ final int dim1 = v1.getDimensionality(), dim2 = v2.getDimensionality();
+ final int mindim = (dim1 < dim2) ? dim1 : dim2;
+ double agg = doublePreDistance(v1, v2, 0, mindim, 0.);
+ if (dim1 > mindim) {
+ agg = doublePreNorm(v1, mindim, dim1, agg);
+ } else if (dim2 > mindim) {
+ agg = doublePreNorm(v2, mindim, dim2, agg);
}
return Math.pow(agg, invp);
}
@Override
public double doubleNorm(NumberVector<?> v) {
- final int dim = v.getDimensionality();
- double agg = 0;
- for (int d = 0; d < dim; d++) {
- final double delta = Math.abs(v.doubleValue(d));
- agg += Math.pow(delta, p) * weights[d];
- }
- return Math.pow(agg, invp);
+ return Math.pow(doublePreNorm(v, 0, v.getDimensionality(), 0.), invp);
}
@Override
public double doubleMinDist(SpatialComparable mbr1, SpatialComparable mbr2) {
- // Optimization for the simplest case
- if (mbr1 instanceof NumberVector) {
- if (mbr2 instanceof NumberVector) {
- return doubleDistance((NumberVector<?>) mbr1, (NumberVector<?>) mbr2);
+ final int dim1 = mbr1.getDimensionality(), dim2 = mbr2.getDimensionality();
+ final int mindim = (dim1 < dim2) ? dim1 : dim2;
+
+ final NumberVector<?> v1 = (mbr1 instanceof NumberVector) ? (NumberVector<?>) mbr1 : null;
+ final NumberVector<?> v2 = (mbr2 instanceof NumberVector) ? (NumberVector<?>) mbr2 : null;
+
+ double agg = 0.;
+ if (v1 != null) {
+ if (v2 != null) {
+ agg = doublePreDistance(v1, v2, 0, mindim, agg);
+ } else {
+ agg = doublePreDistanceVM(v1, mbr2, 0, mindim, agg);
+ }
+ } else {
+ if (v2 != null) {
+ agg = doublePreDistanceVM(v2, mbr1, 0, mindim, agg);
+ } else {
+ agg = doublePreDistanceMBR(mbr1, mbr2, 0, mindim, agg);
+ }
+ }
+ // first object has more dimensions.
+ if (dim1 > mindim) {
+ if (v1 != null) {
+ agg = doublePreNorm(v1, mindim, dim1, agg);
+ } else {
+ agg = doublePreNormMBR(v1, mindim, dim1, agg);
}
}
- // TODO: optimize for more simpler cases: obj vs. rect?
- final int dim = dimensionality(mbr1, mbr2, weights.length);
- double agg = 0;
- for (int d = 0; d < dim; d++) {
- final double diff;
- if (mbr1.getMax(d) < mbr2.getMin(d)) {
- diff = mbr2.getMin(d) - mbr1.getMax(d);
- } else if (mbr1.getMin(d) > mbr2.getMax(d)) {
- diff = mbr1.getMin(d) - mbr2.getMax(d);
- } else { // The mbrs intersect!
- continue;
+ // second object has more dimensions.
+ if (dim2 > mindim) {
+ if (v2 != null) {
+ agg = doublePreNorm(v2, mindim, dim2, agg);
+ } else {
+ agg = doublePreNormMBR(mbr2, mindim, dim2, agg);
}
- agg += Math.pow(diff, p) * weights[d];
}
return Math.pow(agg, invp);
}
@@ -109,7 +180,7 @@ public class WeightedLPNormDistanceFunction extends LPNormDistanceFunction {
if (!(obj instanceof WeightedLPNormDistanceFunction)) {
if (obj instanceof LPNormDistanceFunction && super.equals(obj)) {
for (double d : weights) {
- if (d != 1.0) {
+ if (d != 1.) {
return false;
}
}
@@ -120,4 +191,9 @@ public class WeightedLPNormDistanceFunction extends LPNormDistanceFunction {
WeightedLPNormDistanceFunction other = (WeightedLPNormDistanceFunction) obj;
return Arrays.equals(this.weights, other.weights);
}
+
+ @Override
+ public SimpleTypeInformation<? super NumberVector<?>> getInputTypeRestriction() {
+ return new VectorFieldTypeInformation<>(NumberVector.class, 0, weights.length);
+ }
}
diff --git a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/WeightedManhattanDistanceFunction.java b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/WeightedManhattanDistanceFunction.java
index 186f0435..b419f0df 100644
--- a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/WeightedManhattanDistanceFunction.java
+++ b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/WeightedManhattanDistanceFunction.java
@@ -40,52 +40,122 @@ public class WeightedManhattanDistanceFunction extends WeightedLPNormDistanceFun
* @param weights Weight vector
*/
public WeightedManhattanDistanceFunction(double[] weights) {
- super(1.0, weights);
+ super(1., weights);
}
- @Override
- public double doubleDistance(NumberVector<?> v1, NumberVector<?> v2) {
- final int dim = dimensionality(v1, v2, weights.length);
- double agg = 0;
- for (int d = 0; d < dim; d++) {
- final double delta = Math.abs(v1.doubleValue(d) - v2.doubleValue(d));
+ private final double doublePreDistance(NumberVector<?> v1, NumberVector<?> v2, final int start, final int end, double agg) {
+ for (int d = start; d < end; d++) {
+ final double xd = v1.doubleValue(d), yd = v2.doubleValue(d);
+ final double delta = (xd >= yd) ? xd - yd : yd - xd;
agg += delta * weights[d];
}
return agg;
}
- @Override
- public double doubleNorm(NumberVector<?> v) {
- final int dim = v.getDimensionality();
- double agg = 0;
- for (int d = 0; d < dim; d++) {
- final double delta = Math.abs(v.doubleValue(d));
+ private final double doublePreDistanceVM(NumberVector<?> v, SpatialComparable mbr, final int start, final int end, double agg) {
+ for (int d = start; d < end; d++) {
+ final double value = v.doubleValue(d), min = mbr.getMin(d);
+ double delta = min - value;
+ if (delta < 0.) {
+ delta = value - mbr.getMax(d);
+ }
+ if (delta > 0.) {
+ agg += delta * weights[d];
+ }
+ }
+ return agg;
+ }
+
+ private final double doublePreDistanceMBR(SpatialComparable mbr1, SpatialComparable mbr2, final int start, final int end, double agg) {
+ for (int d = start; d < end; d++) {
+ double delta = mbr2.getMin(d) - mbr1.getMax(d);
+ if (delta < 0.) {
+ delta = mbr1.getMin(d) - mbr2.getMax(d);
+ }
+ if (delta > 0.) {
+ agg += delta * weights[d];
+ }
+ }
+ return agg;
+ }
+
+ private final double doublePreNorm(NumberVector<?> v, final int start, final int end, double agg) {
+ for (int d = start; d < end; d++) {
+ final double xd = v.doubleValue(d);
+ final double delta = xd >= 0. ? xd : -xd;
agg += delta * weights[d];
}
return agg;
}
+ private final double doublePreNormMBR(SpatialComparable mbr, final int start, final int end, double agg) {
+ for (int d = start; d < end; d++) {
+ double delta = mbr.getMin(d);
+ if (delta < 0.) {
+ delta = -mbr.getMax(d);
+ }
+ if (delta > 0.) {
+ agg += delta * weights[d];
+ }
+ }
+ return agg;
+ }
+
+ @Override
+ public double doubleDistance(NumberVector<?> v1, NumberVector<?> v2) {
+ final int dim1 = v1.getDimensionality(), dim2 = v2.getDimensionality();
+ final int mindim = (dim1 < dim2) ? dim1 : dim2;
+ double agg = doublePreDistance(v1, v2, 0, mindim, 0.);
+ if (dim1 > mindim) {
+ agg = doublePreNorm(v1, mindim, dim1, agg);
+ } else if (dim2 > mindim) {
+ agg = doublePreNorm(v2, mindim, dim2, agg);
+ }
+ return agg;
+ }
+
+ @Override
+ public double doubleNorm(NumberVector<?> v) {
+ return doublePreNorm(v, 0, v.getDimensionality(), 0.);
+ }
+
@Override
public double doubleMinDist(SpatialComparable mbr1, SpatialComparable mbr2) {
- // Optimization for the simplest case
- if (mbr1 instanceof NumberVector) {
- if (mbr2 instanceof NumberVector) {
- return doubleDistance((NumberVector<?>) mbr1, (NumberVector<?>) mbr2);
+ final int dim1 = mbr1.getDimensionality(), dim2 = mbr2.getDimensionality();
+ final int mindim = (dim1 < dim2) ? dim1 : dim2;
+
+ final NumberVector<?> v1 = (mbr1 instanceof NumberVector) ? (NumberVector<?>) mbr1 : null;
+ final NumberVector<?> v2 = (mbr2 instanceof NumberVector) ? (NumberVector<?>) mbr2 : null;
+
+ double agg = 0.;
+ if (v1 != null) {
+ if (v2 != null) {
+ agg = doublePreDistance(v1, v2, 0, mindim, agg);
+ } else {
+ agg = doublePreDistanceVM(v1, mbr2, 0, mindim, agg);
+ }
+ } else {
+ if (v2 != null) {
+ agg = doublePreDistanceVM(v2, mbr1, 0, mindim, agg);
+ } else {
+ agg = doublePreDistanceMBR(mbr1, mbr2, 0, mindim, agg);
+ }
+ }
+ // first object has more dimensions.
+ if (dim1 > mindim) {
+ if (v1 != null) {
+ agg = doublePreNorm(v1, mindim, dim1, agg);
+ } else {
+ agg = doublePreNormMBR(v1, mindim, dim1, agg);
}
}
- // TODO: optimize for more simpler cases: obj vs. rect?
- final int dim = dimensionality(mbr1, mbr2, weights.length);
- double agg = 0;
- for (int d = 0; d < dim; d++) {
- final double diff;
- if (mbr1.getMax(d) < mbr2.getMin(d)) {
- diff = mbr2.getMin(d) - mbr1.getMax(d);
- } else if (mbr1.getMin(d) > mbr2.getMax(d)) {
- diff = mbr1.getMin(d) - mbr2.getMax(d);
- } else { // The mbrs intersect!
- continue;
+ // second object has more dimensions.
+ if (dim2 > mindim) {
+ if (v2 != null) {
+ agg = doublePreNorm(v2, mindim, dim2, agg);
+ } else {
+ agg = doublePreNormMBR(mbr2, mindim, dim2, agg);
}
- agg += diff * weights[d];
}
return agg;
}
diff --git a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/WeightedMaximumDistanceFunction.java b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/WeightedMaximumDistanceFunction.java
new file mode 100644
index 00000000..c97848dd
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/minkowski/WeightedMaximumDistanceFunction.java
@@ -0,0 +1,193 @@
+package de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.Arrays;
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.spatial.SpatialComparable;
+
+/**
+ * Weighted version of the Minkowski L_p metrics distance function.
+ *
+ * @author Erich Schubert
+ */
+public class WeightedMaximumDistanceFunction extends WeightedLPNormDistanceFunction {
+ /**
+ * Constructor.
+ *
+ * @param weights Weight vector
+ */
+ public WeightedMaximumDistanceFunction(double[] weights) {
+ super(Double.POSITIVE_INFINITY, weights);
+ }
+
+ private final double doublePreDistance(NumberVector<?> v1, NumberVector<?> v2, final int start, final int end, double agg) {
+ for(int d = start; d < end; d++) {
+ final double xd = v1.doubleValue(d), yd = v2.doubleValue(d);
+ final double delta = ((xd >= yd) ? xd - yd : yd - xd) * weights[d];
+ if(delta > agg) {
+ agg = delta;
+ }
+ }
+ return agg;
+ }
+
+ private final double doublePreDistanceVM(NumberVector<?> v, SpatialComparable mbr, final int start, final int end, double agg) {
+ for(int d = start; d < end; d++) {
+ final double value = v.doubleValue(d), min = mbr.getMin(d);
+ double delta = min - value;
+ if(delta < 0.) {
+ delta = value - mbr.getMax(d);
+ }
+ delta *= weights[d];
+ if(delta > agg) {
+ agg = delta;
+ }
+ }
+ return agg;
+ }
+
+ private final double doublePreDistanceMBR(SpatialComparable mbr1, SpatialComparable mbr2, final int start, final int end, double agg) {
+ for(int d = start; d < end; d++) {
+ double delta = mbr2.getMin(d) - mbr1.getMax(d);
+ if(delta < 0.) {
+ delta = mbr1.getMin(d) - mbr2.getMax(d);
+ }
+ delta *= weights[d];
+ if(delta > agg) {
+ agg = delta;
+ }
+ }
+ return agg;
+ }
+
+ private final double doublePreNorm(NumberVector<?> v, final int start, final int end, double agg) {
+ for(int d = start; d < end; d++) {
+ final double xd = v.doubleValue(d);
+ final double delta = (xd >= 0. ? xd : -xd) * weights[d];
+ if(delta > agg) {
+ agg = delta;
+ }
+ }
+ return agg;
+ }
+
+ private final double doublePreNormMBR(SpatialComparable mbr, final int start, final int end, double agg) {
+ for(int d = start; d < end; d++) {
+ double delta = mbr.getMin(d);
+ if(delta < 0.) {
+ delta = -mbr.getMax(d);
+ }
+ delta *= weights[d];
+ if(delta > agg) {
+ agg = delta;
+ }
+ }
+ return agg;
+ }
+
+ @Override
+ public double doubleDistance(NumberVector<?> v1, NumberVector<?> v2) {
+ final int dim1 = v1.getDimensionality(), dim2 = v2.getDimensionality();
+ final int mindim = (dim1 < dim2) ? dim1 : dim2;
+ double agg = doublePreDistance(v1, v2, 0, mindim, 0.);
+ if(dim1 > mindim) {
+ agg = doublePreNorm(v1, mindim, dim1, agg);
+ }
+ else if(dim2 > mindim) {
+ agg = doublePreNorm(v2, mindim, dim2, agg);
+ }
+ return agg;
+ }
+
+ @Override
+ public double doubleNorm(NumberVector<?> v) {
+ return doublePreNorm(v, 0, v.getDimensionality(), 0.);
+ }
+
+ @Override
+ public double doubleMinDist(SpatialComparable mbr1, SpatialComparable mbr2) {
+ final int dim1 = mbr1.getDimensionality(), dim2 = mbr2.getDimensionality();
+ final int mindim = (dim1 < dim2) ? dim1 : dim2;
+
+ final NumberVector<?> v1 = (mbr1 instanceof NumberVector) ? (NumberVector<?>) mbr1 : null;
+ final NumberVector<?> v2 = (mbr2 instanceof NumberVector) ? (NumberVector<?>) mbr2 : null;
+
+ double agg = 0.;
+ if(v1 != null) {
+ if(v2 != null) {
+ agg = doublePreDistance(v1, v2, 0, mindim, agg);
+ }
+ else {
+ agg = doublePreDistanceVM(v1, mbr2, 0, mindim, agg);
+ }
+ }
+ else {
+ if(v2 != null) {
+ agg = doublePreDistanceVM(v2, mbr1, 0, mindim, agg);
+ }
+ else {
+ agg = doublePreDistanceMBR(mbr1, mbr2, 0, mindim, agg);
+ }
+ }
+ // first object has more dimensions.
+ if(dim1 > mindim) {
+ if(v1 != null) {
+ agg = doublePreNorm(v1, mindim, dim1, agg);
+ }
+ else {
+ agg = doublePreNormMBR(v1, mindim, dim1, agg);
+ }
+ }
+ // second object has more dimensions.
+ if(dim2 > mindim) {
+ if(v2 != null) {
+ agg = doublePreNorm(v2, mindim, dim2, agg);
+ }
+ else {
+ agg = doublePreNormMBR(mbr2, mindim, dim2, agg);
+ }
+ }
+ return agg;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if(this == obj) {
+ return true;
+ }
+ if(obj == null) {
+ return false;
+ }
+ if(!(obj instanceof WeightedMaximumDistanceFunction)) {
+ if(obj instanceof WeightedLPNormDistanceFunction) {
+ return super.equals(obj);
+ }
+ return false;
+ }
+ WeightedMaximumDistanceFunction other = (WeightedMaximumDistanceFunction) obj;
+ return Arrays.equals(this.weights, other.weights);
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/probabilistic/package-info.java b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/probabilistic/package-info.java
index 335c6ae0..7a6f705c 100644
--- a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/probabilistic/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/probabilistic/package-info.java
@@ -3,4 +3,27 @@
*
* @author Erich Schubert
*/
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
package de.lmu.ifi.dbs.elki.distance.distancefunction.probabilistic; \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/subspace/AbstractDimensionsSelectingDoubleDistanceFunction.java b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/subspace/AbstractDimensionsSelectingDoubleDistanceFunction.java
index 25cb6407..052e089c 100644
--- a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/subspace/AbstractDimensionsSelectingDoubleDistanceFunction.java
+++ b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/subspace/AbstractDimensionsSelectingDoubleDistanceFunction.java
@@ -31,8 +31,7 @@ import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunc
import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.ListEachConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntListParameter;
@@ -87,10 +86,10 @@ public abstract class AbstractDimensionsSelectingDoubleDistanceFunction<V extend
@Override
public boolean equals(Object obj) {
- if (obj == null) {
+ if(obj == null) {
return false;
}
- if (!this.getClass().equals(obj.getClass())) {
+ if(!this.getClass().equals(obj.getClass())) {
return false;
}
return this.dimensions.equals(((AbstractDimensionsSelectingDoubleDistanceFunction<?>) obj).dimensions);
@@ -111,10 +110,10 @@ public abstract class AbstractDimensionsSelectingDoubleDistanceFunction<V extend
super.makeOptions(config);
dimensions = new BitSet();
final IntListParameter dimsP = new IntListParameter(DIMS_ID);
- dimsP.addConstraint(new ListEachConstraint<Integer>(new GreaterEqualConstraint(0)));
+ dimsP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_INT_LIST);
dimsP.setOptional(true);
- if (config.grab(dimsP)) {
- for (int d : dimsP.getValue()) {
+ if(config.grab(dimsP)) {
+ for(int d : dimsP.getValue()) {
dimensions.set(d);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/subspace/AbstractPreferenceVectorBasedCorrelationDistanceFunction.java b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/subspace/AbstractPreferenceVectorBasedCorrelationDistanceFunction.java
index f3a07639..20ee637e 100644
--- a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/subspace/AbstractPreferenceVectorBasedCorrelationDistanceFunction.java
+++ b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/subspace/AbstractPreferenceVectorBasedCorrelationDistanceFunction.java
@@ -34,7 +34,7 @@ import de.lmu.ifi.dbs.elki.distance.distancevalue.PreferenceVectorBasedCorrelati
import de.lmu.ifi.dbs.elki.index.IndexFactory;
import de.lmu.ifi.dbs.elki.index.preprocessed.preference.PreferenceVectorIndex;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
@@ -222,7 +222,7 @@ public abstract class AbstractPreferenceVectorBasedCorrelationDistanceFunction<V
protected void configEpsilon(Parameterization config) {
final DoubleParameter epsilonP = new DoubleParameter(EPSILON_ID, 0.001);
- epsilonP.addConstraint(new GreaterEqualConstraint(0));
+ epsilonP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE);
if (config.grab(epsilonP)) {
epsilon = epsilonP.doubleValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/subspace/DimensionSelectingDistanceFunction.java b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/subspace/DimensionSelectingDistanceFunction.java
index fe593f0a..5fbe1c73 100644
--- a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/subspace/DimensionSelectingDistanceFunction.java
+++ b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/subspace/DimensionSelectingDistanceFunction.java
@@ -33,7 +33,7 @@ import de.lmu.ifi.dbs.elki.distance.distancefunction.AbstractSpatialDoubleDistan
import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -75,7 +75,7 @@ public class DimensionSelectingDistanceFunction extends AbstractSpatialDoubleDis
*/
@Override
public double doubleDistance(NumberVector<?> v1, NumberVector<?> v2) {
- if (dim >= v1.getDimensionality() || dim >= v2.getDimensionality() || dim < 0) {
+ if(dim >= v1.getDimensionality() || dim >= v2.getDimensionality() || dim < 0) {
throw new IllegalArgumentException("Specified dimension to be considered " + "is larger that dimensionality of FeatureVectors:" + "\n first argument: " + v1.toString() + "\n second argument: " + v2.toString() + "\n dimension: " + dim);
}
@@ -85,18 +85,20 @@ public class DimensionSelectingDistanceFunction extends AbstractSpatialDoubleDis
@Override
public double doubleMinDist(SpatialComparable mbr1, SpatialComparable mbr2) {
- if (dim >= mbr1.getDimensionality() || dim >= mbr2.getDimensionality() || dim < 0) {
+ if(dim >= mbr1.getDimensionality() || dim >= mbr2.getDimensionality() || dim < 0) {
throw new IllegalArgumentException("Specified dimension to be considered " + "is larger that dimensionality of FeatureVectors:" + "\n first argument: " + mbr1.toString() + "\n second argument: " + mbr2.toString() + "\n dimension: " + dim);
}
double m1, m2;
- if (mbr1.getMax(dim) < mbr2.getMin(dim)) {
+ if(mbr1.getMax(dim) < mbr2.getMin(dim)) {
m1 = mbr1.getMax(dim);
m2 = mbr2.getMin(dim);
- } else if (mbr1.getMin(dim) > mbr2.getMax(dim)) {
+ }
+ else if(mbr1.getMin(dim) > mbr2.getMax(dim)) {
m1 = mbr1.getMin(dim);
m2 = mbr2.getMax(dim);
- } else { // The mbrs intersect!
+ }
+ else { // The mbrs intersect!
m1 = 0;
m2 = 0;
}
@@ -130,10 +132,10 @@ public class DimensionSelectingDistanceFunction extends AbstractSpatialDoubleDis
@Override
public void setSelectedDimensions(BitSet dimensions) {
dim = dimensions.nextSetBit(0);
- if (dim == -1) {
+ if(dim == -1) {
throw new IllegalStateException("No dimension was set.");
}
- if (dimensions.nextSetBit(dim + 1) > 0) {
+ if(dimensions.nextSetBit(dim + 1) > 0) {
throw new IllegalStateException("More than one dimension was set.");
}
}
@@ -150,10 +152,10 @@ public class DimensionSelectingDistanceFunction extends AbstractSpatialDoubleDis
@Override
public boolean equals(Object obj) {
- if (obj == null) {
+ if(obj == null) {
return false;
}
- if (!this.getClass().equals(obj.getClass())) {
+ if(!this.getClass().equals(obj.getClass())) {
return false;
}
return this.dim == ((DimensionSelectingDistanceFunction) obj).dim;
@@ -173,8 +175,8 @@ public class DimensionSelectingDistanceFunction extends AbstractSpatialDoubleDis
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
final IntParameter dimP = new IntParameter(DIM_ID);
- dimP.addConstraint(new GreaterEqualConstraint(0));
- if (config.grab(dimP)) {
+ dimP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_INT);
+ if(config.grab(dimP)) {
dim = dimP.getValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/subspace/SubspaceLPNormDistanceFunction.java b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/subspace/SubspaceLPNormDistanceFunction.java
index 2fbdf876..6c9579ad 100644
--- a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/subspace/SubspaceLPNormDistanceFunction.java
+++ b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/subspace/SubspaceLPNormDistanceFunction.java
@@ -32,10 +32,11 @@ import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
import de.lmu.ifi.dbs.elki.database.query.distance.SpatialPrimitiveDistanceQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DoubleNorm;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.NumberVectorDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.SpatialPrimitiveDoubleDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.LPNormDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
@@ -45,7 +46,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
*
* @author Elke Achtert
*/
-public class SubspaceLPNormDistanceFunction extends AbstractDimensionsSelectingDoubleDistanceFunction<NumberVector<?>> implements SpatialPrimitiveDoubleDistanceFunction<NumberVector<?>>, DoubleNorm<NumberVector<?>> {
+public class SubspaceLPNormDistanceFunction extends AbstractDimensionsSelectingDoubleDistanceFunction<NumberVector<?>> implements SpatialPrimitiveDoubleDistanceFunction<NumberVector<?>>, DoubleNorm<NumberVector<?>>, NumberVectorDistanceFunction<DoubleDistance> {
/**
* Value of p
*/
@@ -200,7 +201,7 @@ public class SubspaceLPNormDistanceFunction extends AbstractDimensionsSelectingD
@Override
protected void makeOptions(Parameterization config) {
final DoubleParameter paramP = new DoubleParameter(LPNormDistanceFunction.P_ID);
- paramP.addConstraint(new GreaterConstraint(0));
+ paramP.addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE);
if(config.grab(paramP)) {
p = paramP.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/subspace/SubspaceMaximumDistanceFunction.java b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/subspace/SubspaceMaximumDistanceFunction.java
new file mode 100644
index 00000000..60791159
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/subspace/SubspaceMaximumDistanceFunction.java
@@ -0,0 +1,149 @@
+package de.lmu.ifi.dbs.elki.distance.distancefunction.subspace;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.BitSet;
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.spatial.SpatialComparable;
+
+/**
+ * Provides a distance function that computes the Euclidean distance between
+ * feature vectors only in specified dimensions.
+ *
+ * @author Elke Achtert
+ */
+public class SubspaceMaximumDistanceFunction extends SubspaceLPNormDistanceFunction {
+ /**
+ * Constructor.
+ *
+ * @param dimensions Selected dimensions
+ */
+ public SubspaceMaximumDistanceFunction(BitSet dimensions) {
+ super(1.0, dimensions);
+ }
+
+ /**
+ * Provides the Euclidean distance between two given feature vectors in the
+ * selected dimensions.
+ *
+ * @param v1 first feature vector
+ * @param v2 second feature vector
+ * @return the Euclidean distance between two given feature vectors in the
+ * selected dimensions
+ */
+ @Override
+ public double doubleDistance(NumberVector<?> v1, NumberVector<?> v2) {
+ if (v1.getDimensionality() != v2.getDimensionality()) {
+ throw new IllegalArgumentException("Different dimensionality of FeatureVectors\n " + "first argument: " + v1 + "\n " + "second argument: " + v2);
+ }
+
+ double agg = 0.;
+ for (int d = dimensions.nextSetBit(0); d >= 0; d = dimensions.nextSetBit(d + 1)) {
+ double v = Math.abs(v1.doubleValue(d) - v2.doubleValue(d));
+ if (v > agg) {
+ agg = v;
+ }
+ }
+ return agg;
+ }
+
+ @Override
+ protected double doubleMinDistObject(SpatialComparable mbr, NumberVector<?> v) {
+ if (mbr.getDimensionality() != v.getDimensionality()) {
+ throw new IllegalArgumentException("Different dimensionality of objects\n " + "first argument: " + mbr.toString() + "\n " + "second argument: " + v.toString());
+ }
+
+ double agg = 0.;
+ for (int d = dimensions.nextSetBit(0); d >= 0; d = dimensions.nextSetBit(d + 1)) {
+ final double value = v.doubleValue(d);
+ final double omin = mbr.getMin(d);
+ final double diff1 = omin - value;
+ if (diff1 > 0.) {
+ if (diff1 > agg) {
+ agg = diff1;
+ }
+ } else {
+ final double omax = mbr.getMax(d);
+ final double diff2 = value - omax;
+ if (diff2 > agg) {
+ agg = diff2;
+ }
+ }
+ }
+ return agg;
+ }
+
+ @Override
+ public double doubleMinDist(SpatialComparable mbr1, SpatialComparable mbr2) {
+ if (mbr1.getDimensionality() != mbr2.getDimensionality()) {
+ throw new IllegalArgumentException("Different dimensionality of objects\n " + "first argument: " + mbr1.toString() + "\n " + "second argument: " + mbr2.toString());
+ }
+ double agg = 0.;
+ for (int d = dimensions.nextSetBit(0); d >= 0; d = dimensions.nextSetBit(d + 1)) {
+ final double max1 = mbr1.getMax(d);
+ final double min2 = mbr2.getMin(d);
+ if (max1 < min2) {
+ double v = min2 - max1;
+ if (v > agg) {
+ agg = v;
+ }
+ } else {
+ final double min1 = mbr1.getMin(d);
+ final double max2 = mbr2.getMax(d);
+ double v = min1 - max2;
+ if (v > agg) {
+ agg = v;
+ }
+ }
+ }
+ return agg;
+ }
+
+ @Override
+ public double doubleNorm(NumberVector<?> obj) {
+ double agg = 0.;
+ for (int d = dimensions.nextSetBit(0); d >= 0; d = dimensions.nextSetBit(d + 1)) {
+ double v = Math.abs(obj.doubleValue(d));
+ if (v > agg) {
+ agg = v;
+ }
+ }
+ return agg;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractDimensionsSelectingDoubleDistanceFunction.Parameterizer {
+ @Override
+ protected SubspaceMaximumDistanceFunction makeInstance() {
+ return new SubspaceMaximumDistanceFunction(dimensions);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/timeseries/AbstractEditDistanceFunction.java b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/timeseries/AbstractEditDistanceFunction.java
index 76630586..c6a35985 100644
--- a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/timeseries/AbstractEditDistanceFunction.java
+++ b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/timeseries/AbstractEditDistanceFunction.java
@@ -29,8 +29,7 @@ import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.AbstractVectorDoubleDistanceFunction;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
@@ -68,10 +67,10 @@ public abstract class AbstractEditDistanceFunction extends AbstractVectorDoubleD
@Override
public boolean equals(Object obj) {
- if (obj == null) {
+ if(obj == null) {
return false;
}
- if (!this.getClass().equals(obj.getClass())) {
+ if(!this.getClass().equals(obj.getClass())) {
return false;
}
return this.bandSize == ((AbstractEditDistanceFunction) obj).bandSize;
@@ -91,9 +90,9 @@ public abstract class AbstractEditDistanceFunction extends AbstractVectorDoubleD
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
final DoubleParameter bandSizeP = new DoubleParameter(BANDSIZE_ID, 0.1);
- bandSizeP.addConstraint(new GreaterEqualConstraint(0));
- bandSizeP.addConstraint(new LessEqualConstraint(1));
- if (config.grab(bandSizeP)) {
+ bandSizeP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE);
+ bandSizeP.addConstraint(CommonConstraints.LESS_EQUAL_ONE_DOUBLE);
+ if(config.grab(bandSizeP)) {
bandSize = bandSizeP.doubleValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/timeseries/EDRDistanceFunction.java b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/timeseries/EDRDistanceFunction.java
index 0e38d8bd..d48a21f0 100644
--- a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/timeseries/EDRDistanceFunction.java
+++ b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/timeseries/EDRDistanceFunction.java
@@ -28,7 +28,7 @@ import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
@@ -82,7 +82,7 @@ public class EDRDistanceFunction extends AbstractEditDistanceFunction {
// features2.length + ", band: " + band);
final double deltaValue = delta;
- for (int i = 0; i < v1.getDimensionality(); i++) {
+ for(int i = 0; i < v1.getDimensionality(); i++) {
// Swap current and prev arrays. We'll just overwrite the new curr.
{
double[] temp = prev;
@@ -90,16 +90,16 @@ public class EDRDistanceFunction extends AbstractEditDistanceFunction {
curr = temp;
}
int l = i - (band + 1);
- if (l < 0) {
+ if(l < 0) {
l = 0;
}
int r = i + (band + 1);
- if (r > (v2.getDimensionality() - 1)) {
+ if(r > (v2.getDimensionality() - 1)) {
r = (v2.getDimensionality() - 1);
}
- for (int j = l; j <= r; j++) {
- if (Math.abs(i - j) <= band) {
+ for(int j = l; j <= r; j++) {
+ if(Math.abs(i - j) <= band) {
// compute squared distance
double val1 = v1.doubleValue(i);
double val2 = v2.doubleValue(j);
@@ -110,23 +110,27 @@ public class EDRDistanceFunction extends AbstractEditDistanceFunction {
final double subcost = (d <= deltaValue) ? 0 : 1;
- if ((i + j) != 0) {
- if ((i == 0) || ((j != 0) && (((prev[j - 1] + subcost) > (curr[j - 1] + 1)) && ((curr[j - 1] + 1) < (prev[j] + 1))))) {
+ if((i + j) != 0) {
+ if((i == 0) || ((j != 0) && (((prev[j - 1] + subcost) > (curr[j - 1] + 1)) && ((curr[j - 1] + 1) < (prev[j] + 1))))) {
// del
cost = curr[j - 1] + 1;
- } else if ((j == 0) || ((i != 0) && (((prev[j - 1] + subcost) > (prev[j] + 1)) && ((prev[j] + 1) < (curr[j - 1] + 1))))) {
+ }
+ else if((j == 0) || ((i != 0) && (((prev[j - 1] + subcost) > (prev[j] + 1)) && ((prev[j] + 1) < (curr[j - 1] + 1))))) {
// ins
cost = prev[j] + 1;
- } else {
+ }
+ else {
// match
cost = prev[j - 1] + subcost;
}
- } else {
+ }
+ else {
cost = 0;
}
curr[j] = cost;
- } else {
+ }
+ else {
curr[j] = Double.POSITIVE_INFINITY; // outside band
}
}
@@ -137,7 +141,7 @@ public class EDRDistanceFunction extends AbstractEditDistanceFunction {
@Override
public boolean equals(Object obj) {
- if (!super.equals(obj)) {
+ if(!super.equals(obj)) {
return false;
}
return this.delta == ((EDRDistanceFunction) obj).delta;
@@ -157,8 +161,8 @@ public class EDRDistanceFunction extends AbstractEditDistanceFunction {
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
final DoubleParameter deltaP = new DoubleParameter(DELTA_ID, 1.0);
- deltaP.addConstraint(new GreaterEqualConstraint(0));
- if (config.grab(deltaP)) {
+ deltaP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE);
+ if(config.grab(deltaP)) {
delta = deltaP.doubleValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/timeseries/ERPDistanceFunction.java b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/timeseries/ERPDistanceFunction.java
index fd5bb61c..e7d7dd7e 100644
--- a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/timeseries/ERPDistanceFunction.java
+++ b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/timeseries/ERPDistanceFunction.java
@@ -28,7 +28,7 @@ import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
@@ -78,7 +78,7 @@ public class ERPDistanceFunction extends AbstractEditDistanceFunction {
// bandsize is the maximum allowed distance to the diagonal
final int band = (int) Math.ceil(v2.getDimensionality() * bandSize);
- for (int i = 0; i < v1.getDimensionality(); i++) {
+ for(int i = 0; i < v1.getDimensionality(); i++) {
// Swap current and prev arrays. We'll just overwrite the new curr.
{
double[] temp = prev;
@@ -86,16 +86,16 @@ public class ERPDistanceFunction extends AbstractEditDistanceFunction {
curr = temp;
}
int l = i - (band + 1);
- if (l < 0) {
+ if(l < 0) {
l = 0;
}
int r = i + (band + 1);
- if (r > (v2.getDimensionality() - 1)) {
+ if(r > (v2.getDimensionality() - 1)) {
r = (v2.getDimensionality() - 1);
}
- for (int j = l; j <= r; j++) {
- if (Math.abs(i - j) <= band) {
+ for(int j = l; j <= r; j++) {
+ if(Math.abs(i - j) <= band) {
// compute squared distance of feature vectors
double val1 = v1.doubleValue(i);
double val2 = g;
@@ -118,24 +118,28 @@ public class ERPDistanceFunction extends AbstractEditDistanceFunction {
final double cost;
- if ((i + j) != 0) {
- if ((i == 0) || ((j != 0) && (((prev[j - 1] + dist12) > (curr[j - 1] + dist2)) && ((curr[j - 1] + dist2) < (prev[j] + dist1))))) {
+ if((i + j) != 0) {
+ if((i == 0) || ((j != 0) && (((prev[j - 1] + dist12) > (curr[j - 1] + dist2)) && ((curr[j - 1] + dist2) < (prev[j] + dist1))))) {
// del
cost = curr[j - 1] + dist2;
- } else if ((j == 0) || ((i != 0) && (((prev[j - 1] + dist12) > (prev[j] + dist1)) && ((prev[j] + dist1) < (curr[j - 1] + dist2))))) {
+ }
+ else if((j == 0) || ((i != 0) && (((prev[j - 1] + dist12) > (prev[j] + dist1)) && ((prev[j] + dist1) < (curr[j - 1] + dist2))))) {
// ins
cost = prev[j] + dist1;
- } else {
+ }
+ else {
// match
cost = prev[j - 1] + dist12;
}
- } else {
+ }
+ else {
cost = 0;
}
curr[j] = cost;
// steps[i][j] = step;
- } else {
+ }
+ else {
curr[j] = Double.POSITIVE_INFINITY; // outside band
}
}
@@ -146,7 +150,7 @@ public class ERPDistanceFunction extends AbstractEditDistanceFunction {
@Override
public boolean equals(Object obj) {
- if (!super.equals(obj)) {
+ if(!super.equals(obj)) {
return false;
}
return this.g == ((ERPDistanceFunction) obj).g;
@@ -166,8 +170,8 @@ public class ERPDistanceFunction extends AbstractEditDistanceFunction {
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
final DoubleParameter gP = new DoubleParameter(G_ID, 0.0);
- gP.addConstraint(new GreaterEqualConstraint(0));
- if (config.grab(gP)) {
+ gP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE);
+ if(config.grab(gP)) {
g = gP.doubleValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/timeseries/LCSSDistanceFunction.java b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/timeseries/LCSSDistanceFunction.java
index 2998248d..4f1c0850 100644
--- a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/timeseries/LCSSDistanceFunction.java
+++ b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/timeseries/LCSSDistanceFunction.java
@@ -34,8 +34,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
@@ -219,15 +218,15 @@ public class LCSSDistanceFunction extends AbstractVectorDoubleDistanceFunction {
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
final DoubleParameter pDeltaP = new DoubleParameter(PDELTA_ID, 0.1);
- pDeltaP.addConstraint(new GreaterEqualConstraint(0));
- pDeltaP.addConstraint(new LessEqualConstraint(1));
+ pDeltaP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE);
+ pDeltaP.addConstraint(CommonConstraints.LESS_EQUAL_ONE_DOUBLE);
if (config.grab(pDeltaP)) {
pDelta = pDeltaP.doubleValue();
}
final DoubleParameter pEpsilonP = new DoubleParameter(PEPSILON_ID, 0.05);
- pEpsilonP.addConstraint(new GreaterEqualConstraint(0));
- pEpsilonP.addConstraint(new LessEqualConstraint(1));
+ pEpsilonP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE);
+ pEpsilonP.addConstraint(CommonConstraints.LESS_EQUAL_ONE_DOUBLE);
if (config.grab(pEpsilonP)) {
pEpsilon = pEpsilonP.doubleValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/distance/distancevalue/BitDistance.java b/src/de/lmu/ifi/dbs/elki/distance/distancevalue/BitDistance.java
index 026e90aa..7c257bb5 100644
--- a/src/de/lmu/ifi/dbs/elki/distance/distancevalue/BitDistance.java
+++ b/src/de/lmu/ifi/dbs/elki/distance/distancevalue/BitDistance.java
@@ -48,6 +48,16 @@ public class BitDistance extends NumberDistance<BitDistance, Bit> {
private boolean value;
/**
+ * Distance 0.
+ */
+ public static final BitDistance ZERO = new BitDistance(false);
+
+ /**
+ * Distance 1.
+ */
+ public static final BitDistance ONE = new BitDistance(true);
+
+ /**
* Generated serial version UID
*/
private static final long serialVersionUID = 6514853467081717551L;
@@ -81,7 +91,7 @@ public class BitDistance extends NumberDistance<BitDistance, Bit> {
@Override
public BitDistance fromDouble(double val) {
- return new BitDistance(val > 0);
+ return (val > 0) ? ONE : ZERO;
}
/**
@@ -122,7 +132,7 @@ public class BitDistance extends NumberDistance<BitDistance, Bit> {
@Override
public double doubleValue() {
- return value ? 1.0 : 0.0;
+ return value ? 1. : 0.;
}
@Override
@@ -142,21 +152,24 @@ public class BitDistance extends NumberDistance<BitDistance, Bit> {
@Override
public BitDistance parseString(String val) throws IllegalArgumentException {
- if (testInputPattern(val)) {
- return new BitDistance(Bit.valueOf(val).bitValue());
- } else {
- throw new IllegalArgumentException("Given pattern \"" + val + "\" does not match required pattern \"" + requiredInputPattern() + "\"");
+ int i = Integer.parseInt(val);
+ if(i == 0) {
+ return ZERO;
+ }
+ if(i == 1) {
+ return ONE;
}
+ throw new IllegalArgumentException("Given pattern \"" + val + "\" does not match required pattern \"" + requiredInputPattern() + "\"");
}
@Override
public BitDistance infiniteDistance() {
- return new BitDistance(true);
+ return ONE;
}
@Override
public BitDistance nullDistance() {
- return new BitDistance(false);
+ return ZERO;
}
@Override
@@ -196,13 +209,13 @@ public class BitDistance extends NumberDistance<BitDistance, Bit> {
@Override
public boolean equals(Object obj) {
- if (this == obj) {
+ if(this == obj) {
return true;
}
- if (!super.equals(obj)) {
+ if(!super.equals(obj)) {
return false;
}
- if (getClass() != obj.getClass()) {
+ if(getClass() != obj.getClass()) {
return false;
}
BitDistance other = (BitDistance) obj;
diff --git a/src/de/lmu/ifi/dbs/elki/distance/distancevalue/DoubleDistance.java b/src/de/lmu/ifi/dbs/elki/distance/distancevalue/DoubleDistance.java
index 4126374d..c49ba6a3 100644
--- a/src/de/lmu/ifi/dbs/elki/distance/distancevalue/DoubleDistance.java
+++ b/src/de/lmu/ifi/dbs/elki/distance/distancevalue/DoubleDistance.java
@@ -164,7 +164,7 @@ public class DoubleDistance extends NumberDistance<DoubleDistance, Double> {
return infiniteDistance();
}
if (testInputPattern(val)) {
- return new DoubleDistance(Double.parseDouble(val));
+ return new DoubleDistance(FormatUtil.parseDouble(val));
} else {
throw new IllegalArgumentException("Given pattern \"" + val + "\" does not match required pattern \"" + requiredInputPattern() + "\"");
}
diff --git a/src/de/lmu/ifi/dbs/elki/distance/distancevalue/PCACorrelationDistance.java b/src/de/lmu/ifi/dbs/elki/distance/distancevalue/PCACorrelationDistance.java
index 69087d59..30026285 100644
--- a/src/de/lmu/ifi/dbs/elki/distance/distancevalue/PCACorrelationDistance.java
+++ b/src/de/lmu/ifi/dbs/elki/distance/distancevalue/PCACorrelationDistance.java
@@ -25,6 +25,8 @@ package de.lmu.ifi.dbs.elki.distance.distancevalue;
import java.util.regex.Pattern;
+import de.lmu.ifi.dbs.elki.utilities.FormatUtil;
+
/**
* The correlation distance is a special Distance that indicates the
@@ -83,7 +85,7 @@ public class PCACorrelationDistance extends CorrelationDistance<PCACorrelationDi
}
if(testInputPattern(val)) {
String[] values = SEPARATOR.split(val);
- return new PCACorrelationDistance(Integer.parseInt(values[0]), Double.parseDouble(values[1]));
+ return new PCACorrelationDistance(Integer.parseInt(values[0]), FormatUtil.parseDouble(values[1]));
}
else {
throw new IllegalArgumentException("Given pattern \"" + val + "\" does not match required pattern \"" + requiredInputPattern() + "\"");
diff --git a/src/de/lmu/ifi/dbs/elki/distance/distancevalue/PreferenceVectorBasedCorrelationDistance.java b/src/de/lmu/ifi/dbs/elki/distance/distancevalue/PreferenceVectorBasedCorrelationDistance.java
index 0c82e6b5..cc15eb24 100644
--- a/src/de/lmu/ifi/dbs/elki/distance/distancevalue/PreferenceVectorBasedCorrelationDistance.java
+++ b/src/de/lmu/ifi/dbs/elki/distance/distancevalue/PreferenceVectorBasedCorrelationDistance.java
@@ -29,6 +29,8 @@ import java.io.ObjectOutput;
import java.util.BitSet;
import java.util.regex.Pattern;
+import de.lmu.ifi.dbs.elki.utilities.FormatUtil;
+
/**
* A PreferenceVectorBasedCorrelationDistance holds additionally to the
* CorrelationDistance the common preference vector of the two objects defining
@@ -181,7 +183,7 @@ public class PreferenceVectorBasedCorrelationDistance extends CorrelationDistanc
}
if(testInputPattern(pattern)) {
String[] values = SEPARATOR.split(pattern);
- return new PreferenceVectorBasedCorrelationDistance(-1, Integer.parseInt(values[0]), Double.parseDouble(values[1]), new BitSet());
+ return new PreferenceVectorBasedCorrelationDistance(-1, Integer.parseInt(values[0]), FormatUtil.parseDouble(values[1]), new BitSet());
}
else {
throw new IllegalArgumentException("Given pattern \"" + pattern + "\" does not match required pattern \"" + requiredInputPattern() + "\"");
diff --git a/src/de/lmu/ifi/dbs/elki/distance/distancevalue/SubspaceDistance.java b/src/de/lmu/ifi/dbs/elki/distance/distancevalue/SubspaceDistance.java
index bc29b382..6376c040 100644
--- a/src/de/lmu/ifi/dbs/elki/distance/distancevalue/SubspaceDistance.java
+++ b/src/de/lmu/ifi/dbs/elki/distance/distancevalue/SubspaceDistance.java
@@ -28,6 +28,8 @@ import java.io.ObjectInput;
import java.io.ObjectOutput;
import java.util.regex.Pattern;
+import de.lmu.ifi.dbs.elki.utilities.FormatUtil;
+
/**
* The subspace distance is a special distance that indicates the dissimilarity
* between subspaces of equal dimensionality. The subspace distance between two
@@ -215,7 +217,7 @@ public class SubspaceDistance extends AbstractDistance<SubspaceDistance> {
}
if (testInputPattern(val)) {
String[] values = SEPARATOR.split(val);
- return new SubspaceDistance(Double.parseDouble(values[0]), Double.parseDouble(values[1]));
+ return new SubspaceDistance(FormatUtil.parseDouble(values[0]), FormatUtil.parseDouble(values[1]));
} else {
throw new IllegalArgumentException("Given pattern \"" + val + "\" does not match required pattern \"" + requiredInputPattern() + "\"");
}
diff --git a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/AbstractDBIDSimilarityFunction.java b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/AbstractDBIDSimilarityFunction.java
index 22790a01..746f719a 100644
--- a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/AbstractDBIDSimilarityFunction.java
+++ b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/AbstractDBIDSimilarityFunction.java
@@ -49,9 +49,4 @@ public abstract class AbstractDBIDSimilarityFunction<D extends Distance<D>> exte
super();
this.database = database;
}
-
- @Override
- public boolean isSymmetric() {
- return true;
- }
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/AbstractPrimitiveSimilarityFunction.java b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/AbstractPrimitiveSimilarityFunction.java
index 2a9c2f88..2360f66e 100644
--- a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/AbstractPrimitiveSimilarityFunction.java
+++ b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/AbstractPrimitiveSimilarityFunction.java
@@ -63,4 +63,4 @@ public abstract class AbstractPrimitiveSimilarityFunction<O, D extends Distance<
public <T extends O> SimilarityQuery<T, D> instantiate(Relation<T> relation) {
return new PrimitiveSimilarityQuery<>(relation, this);
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/AbstractVectorDoubleSimilarityFunction.java b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/AbstractVectorDoubleSimilarityFunction.java
new file mode 100644
index 00000000..1f04cc98
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/AbstractVectorDoubleSimilarityFunction.java
@@ -0,0 +1,50 @@
+package de.lmu.ifi.dbs.elki.distance.similarityfunction;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
+
+/**
+ * Abstract base class for double-valued primitive similarity functions.
+ *
+ * @author Erich Schubert
+ */
+public abstract class AbstractVectorDoubleSimilarityFunction extends AbstractPrimitiveSimilarityFunction<NumberVector<?>, DoubleDistance> implements PrimitiveDoubleSimilarityFunction<NumberVector<?>> {
+ @Override
+ public DoubleDistance getDistanceFactory() {
+ return DoubleDistance.FACTORY;
+ }
+
+ @Override
+ public DoubleDistance similarity(NumberVector<?> o1, NumberVector<?> o2) {
+ return new DoubleDistance(doubleSimilarity(o1, o2));
+ }
+
+ @Override
+ public SimpleTypeInformation<? super NumberVector<?>> getInputTypeRestriction() {
+ return TypeUtil.NUMBER_VECTOR_FIELD;
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/FractionalSharedNearestNeighborSimilarityFunction.java b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/FractionalSharedNearestNeighborSimilarityFunction.java
index 2e0dd52d..741ece82 100644
--- a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/FractionalSharedNearestNeighborSimilarityFunction.java
+++ b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/FractionalSharedNearestNeighborSimilarityFunction.java
@@ -40,12 +40,14 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameteriz
*
* @author Arthur Zimek
*
- * @apiviz.has de.lmu.ifi.dbs.elki.index.preprocessed.snn.SharedNearestNeighborIndex.Factory
+ * @apiviz.has
+ * de.lmu.ifi.dbs.elki.index.preprocessed.snn.SharedNearestNeighborIndex
+ * .Factory
* @apiviz.has Instance oneway - - «create»
*
* @param <O> object type
*/
-public class FractionalSharedNearestNeighborSimilarityFunction<O> extends AbstractIndexBasedSimilarityFunction<O, SharedNearestNeighborIndex<O>, ArrayDBIDs, DoubleDistance> implements NormalizedSimilarityFunction<O, DoubleDistance> {
+public class FractionalSharedNearestNeighborSimilarityFunction<O> extends AbstractIndexBasedSimilarityFunction<O, SharedNearestNeighborIndex<O>, ArrayDBIDs, DoubleDistance> implements NormalizedSimilarityFunction<O> {
/**
* Constructor.
*
@@ -59,7 +61,7 @@ public class FractionalSharedNearestNeighborSimilarityFunction<O> extends Abstra
@Override
public <T extends O> Instance<T> instantiate(Relation<T> database) {
SharedNearestNeighborIndex<O> indexi = indexFactory.instantiate((Relation<O>) database);
- return (Instance<T>) new Instance<>((Relation<O>) database, indexi);
+ return (Instance<T>) new Instance<>((Relation<O>) database, indexi, this);
}
/**
@@ -73,13 +75,19 @@ public class FractionalSharedNearestNeighborSimilarityFunction<O> extends Abstra
*/
public static class Instance<T> extends AbstractIndexBasedSimilarityFunction.Instance<T, SharedNearestNeighborIndex<T>, ArrayDBIDs, DoubleDistance> {
/**
+ * Similarity function.
+ */
+ private FractionalSharedNearestNeighborSimilarityFunction<? super T> similarityFunction;
+
+ /**
* Constructor.
*
* @param database Database
* @param preprocessor Preprocessor
*/
- public Instance(Relation<T> database, SharedNearestNeighborIndex<T> preprocessor) {
+ public Instance(Relation<T> database, SharedNearestNeighborIndex<T> preprocessor, FractionalSharedNearestNeighborSimilarityFunction<? super T> similarityFunction) {
super(database, preprocessor);
+ this.similarityFunction = similarityFunction;
}
/**
@@ -118,6 +126,11 @@ public class FractionalSharedNearestNeighborSimilarityFunction<O> extends Abstra
}
@Override
+ public SimilarityFunction<? super T, DoubleDistance> getSimilarityFunction() {
+ return similarityFunction;
+ }
+
+ @Override
public DoubleDistance getDistanceFactory() {
return DoubleDistance.FACTORY;
}
diff --git a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/JaccardPrimitiveSimilarityFunction.java b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/JaccardPrimitiveSimilarityFunction.java
new file mode 100644
index 00000000..99fa440e
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/JaccardPrimitiveSimilarityFunction.java
@@ -0,0 +1,205 @@
+package de.lmu.ifi.dbs.elki.distance.similarityfunction;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+import de.lmu.ifi.dbs.elki.data.FeatureVector;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.query.DistanceSimilarityQuery;
+import de.lmu.ifi.dbs.elki.database.query.distance.PrimitiveDistanceSimilarityQuery;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+
+/**
+ * A flexible extension of Jaccard similarity to non-binary vectors.
+ *
+ * Jaccard coefficient is commonly defined as {@code |intersection|/|union|}.
+ *
+ * We can extend this definition as follows:
+ *
+ * {@code |non-zero and equal attributes|/|non-zero attributes|}.
+ *
+ * For binary vectors, this will obviously be the same quantity. However, this
+ * version is more useful for categorical data.
+ *
+ * Reference:
+ * <p>
+ * P. Jaccard<br />
+ * Étude comparative de la distribution florale dans une portion des Alpes et
+ * des Jura<br />
+ * Bulletin del la Société Vaudoise des Sciences Naturelles
+ * </p>
+ *
+ * TODO: add optimized implementations for binary vectors.
+ *
+ * @author Erich Schubert
+ *
+ * @param <O> Vector type
+ */
+@Reference(authors = "P. Jaccard", title = "Étude comparative de la distribution florale dans une portion des Alpes et des Jura", booktitle = "Bulletin del la Société Vaudoise des Sciences Naturelles")
+public class JaccardPrimitiveSimilarityFunction<O extends FeatureVector<?>> extends AbstractPrimitiveSimilarityFunction<O, DoubleDistance> implements NormalizedPrimitiveSimilarityFunction<O>, PrimitiveDoubleDistanceFunction<O> {
+ /**
+ * Constants for checking null.
+ */
+ private static final Integer INTEGER_NULL = Integer.valueOf(0);
+
+ /**
+ * Constants for checking null.
+ */
+ private static final Double DOUBLE_NULL = Double.valueOf(0.);
+
+ /**
+ * Empty string.
+ */
+ private static final String STRING_NULL = "";
+
+ /**
+ * Constructor. No parameters.
+ */
+ public JaccardPrimitiveSimilarityFunction() {
+ super();
+ }
+
+ @Override
+ public double doubleSimilarity(O o1, O o2) {
+ if(o1 instanceof NumberVector && o2 instanceof NumberVector) {
+ return doubleSimilarityNumberVector((NumberVector<?>) o1, (NumberVector<?>) o2);
+ }
+ final int d1 = o1.getDimensionality(), d2 = o2.getDimensionality();
+ int intersection = 0, union = 0;
+ int d = 0;
+ for(; d < d1 && d < d2; d++) {
+ Object v1 = o1.getValue(d), v2 = o2.getValue(d);
+ final boolean n1 = isNull(v1), n2 = isNull(v2);
+ if(v1 instanceof Double && Double.isNaN((Double) v1)) {
+ continue;
+ }
+ if(v2 instanceof Double && Double.isNaN((Double) v2)) {
+ continue;
+ }
+ if(!n1 || !n2) {
+ ++union;
+ if(!n1 && v1.equals(v2)) {
+ ++intersection;
+ }
+ }
+ }
+ for(; d < d1; d++) {
+ if(!isNull(o1.getValue(d))) {
+ ++union;
+ }
+ }
+ for(; d < d2; d++) {
+ if(!isNull(o2.getValue(d))) {
+ ++union;
+ }
+ }
+ return intersection / (double) union;
+ }
+
+ /**
+ * Compute Jaccard similarity for two number vectors.
+ *
+ * @param o1 First vector
+ * @param o2 Second vector
+ * @return Jaccard similarity
+ */
+ public static double doubleSimilarityNumberVector(NumberVector<?> o1, NumberVector<?> o2) {
+ final int d1 = o1.getDimensionality(), d2 = o2.getDimensionality();
+ int intersection = 0, union = 0;
+ int d = 0;
+ for(; d < d1 && d < d2; d++) {
+ double v1 = o1.doubleValue(d), v2 = o2.doubleValue(d);
+ if(v1 != v1 || v2 != v2) { // Skip NaNs.
+ continue;
+ }
+ if(v1 != 0. || v2 != 0) {
+ ++union;
+ if(v1 == v2) {
+ ++intersection;
+ }
+ }
+ }
+ for(; d < d1; d++) {
+ if(o1.doubleValue(d) != 0) {
+ ++union;
+ }
+ }
+ for(; d < d2; d++) {
+ if(o2.doubleValue(d) != 0) {
+ ++union;
+ }
+ }
+ return intersection / (double) union;
+ }
+
+ @Override
+ public DoubleDistance similarity(O o1, O o2) {
+ return new DoubleDistance(doubleSimilarity(o1, o2));
+ }
+
+ /**
+ * Test a value for null.
+ *
+ * TODO: delegate to {@link FeatureVector} instead?
+ *
+ * @param val Value
+ * @return true when null
+ */
+ private static boolean isNull(Object val) {
+ return (val == null) || STRING_NULL.equals(val) || DOUBLE_NULL.equals(val) || INTEGER_NULL.equals(val);
+ }
+
+ @Override
+ public DoubleDistance distance(O o1, O o2) {
+ return new DoubleDistance(1. - doubleSimilarity(o1, o2));
+ }
+
+ @Override
+ public double doubleDistance(O o1, O o2) {
+ return 1. - doubleSimilarity(o1, o2);
+ }
+
+ @Override
+ public boolean isMetric() {
+ return true;
+ }
+
+ @Override
+ public DoubleDistance getDistanceFactory() {
+ return DoubleDistance.FACTORY;
+ }
+
+ @Override
+ public SimpleTypeInformation<? super O> getInputTypeRestriction() {
+ return TypeUtil.FEATURE_VECTORS;
+ }
+
+ @Override
+ public <T extends O> DistanceSimilarityQuery<T, DoubleDistance> instantiate(Relation<T> relation) {
+ return new PrimitiveDistanceSimilarityQuery<>(relation, this, this);
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/Kulczynski1SimilarityFunction.java b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/Kulczynski1SimilarityFunction.java
index 462279d5..d307bec8 100644
--- a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/Kulczynski1SimilarityFunction.java
+++ b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/Kulczynski1SimilarityFunction.java
@@ -24,9 +24,7 @@ package de.lmu.ifi.dbs.elki.distance.similarityfunction;
*/
import de.lmu.ifi.dbs.elki.data.NumberVector;
-import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
-import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.AbstractVectorDoubleDistanceFunction;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
@@ -38,11 +36,11 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
* M.-M. Deza and E. Deza<br />
* Dictionary of distances
* </p>
- *
+ *
* @author Erich Schubert
*/
@Reference(authors = "M.-M. Deza and E. Deza", title = "Dictionary of distances", booktitle = "Dictionary of distances")
-public class Kulczynski1SimilarityFunction extends AbstractPrimitiveSimilarityFunction<NumberVector<?>, DoubleDistance> {
+public class Kulczynski1SimilarityFunction extends AbstractVectorDoubleSimilarityFunction {
/**
* Static instance.
*/
@@ -59,34 +57,10 @@ public class Kulczynski1SimilarityFunction extends AbstractPrimitiveSimilarityFu
}
@Override
- public DoubleDistance getDistanceFactory() {
- return DoubleDistance.FACTORY;
- }
-
- @Override
- public SimpleTypeInformation<? super NumberVector<?>> getInputTypeRestriction() {
- return TypeUtil.NUMBER_VECTOR_FIELD;
- }
-
- @Override
- public DoubleDistance similarity(NumberVector<?> o1, NumberVector<?> o2) {
- return new DoubleDistance(doubleSimilarity(o1, o2));
- }
-
- /**
- * Compute the similarity.
- *
- * @param v1 First vector
- * @param v2 Second vector
- * @return Similarity
- */
public double doubleSimilarity(NumberVector<?> v1, NumberVector<?> v2) {
- final int dim1 = v1.getDimensionality();
- if (dim1 != v2.getDimensionality()) {
- throw new IllegalArgumentException("Different dimensionality of FeatureVectors" + "\n first argument: " + v1.toString() + "\n second argument: " + v2.toString() + "\n" + v1.getDimensionality() + "!=" + v2.getDimensionality());
- }
+ final int dim = AbstractVectorDoubleDistanceFunction.dimensionality(v1, v2);
double sumdiff = 0., summin = 0.;
- for (int i = 0; i < dim1; i++) {
+ for (int i = 0; i < dim; i++) {
double xi = v1.doubleValue(i), yi = v2.doubleValue(i);
sumdiff += Math.abs(xi - yi);
summin += Math.min(xi, yi);
diff --git a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/Kulczynski2SimilarityFunction.java b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/Kulczynski2SimilarityFunction.java
index a59010b8..8c678601 100644
--- a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/Kulczynski2SimilarityFunction.java
+++ b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/Kulczynski2SimilarityFunction.java
@@ -24,9 +24,7 @@ package de.lmu.ifi.dbs.elki.distance.similarityfunction;
*/
import de.lmu.ifi.dbs.elki.data.NumberVector;
-import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
-import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.AbstractVectorDoubleDistanceFunction;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
@@ -44,7 +42,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
* @author Erich Schubert
*/
@Reference(authors = "M.-M. Deza and E. Deza", title = "Dictionary of distances", booktitle = "Dictionary of distances")
-public class Kulczynski2SimilarityFunction extends AbstractPrimitiveSimilarityFunction<NumberVector<?>, DoubleDistance> {
+public class Kulczynski2SimilarityFunction extends AbstractVectorDoubleSimilarityFunction {
/**
* Static instance.
*/
@@ -61,40 +59,16 @@ public class Kulczynski2SimilarityFunction extends AbstractPrimitiveSimilarityFu
}
@Override
- public DoubleDistance getDistanceFactory() {
- return DoubleDistance.FACTORY;
- }
-
- @Override
- public SimpleTypeInformation<? super NumberVector<?>> getInputTypeRestriction() {
- return TypeUtil.NUMBER_VECTOR_FIELD;
- }
-
- @Override
- public DoubleDistance similarity(NumberVector<?> o1, NumberVector<?> o2) {
- return new DoubleDistance(doubleSimilarity(o1, o2));
- }
-
- /**
- * Compute the similarity.
- *
- * @param v1 First vector
- * @param v2 Second vector
- * @return Similarity
- */
public double doubleSimilarity(NumberVector<?> v1, NumberVector<?> v2) {
- final int dim1 = v1.getDimensionality();
- if (dim1 != v2.getDimensionality()) {
- throw new IllegalArgumentException("Different dimensionality of FeatureVectors" + "\n first argument: " + v1.toString() + "\n second argument: " + v2.toString() + "\n" + v1.getDimensionality() + "!=" + v2.getDimensionality());
- }
+ final int dim = AbstractVectorDoubleDistanceFunction.dimensionality(v1, v2);
double sumx = 0., sumy = 0., summin = 0.;
- for (int i = 0; i < dim1; i++) {
+ for (int i = 0; i < dim; i++) {
double xi = v1.doubleValue(i), yi = v2.doubleValue(i);
sumx += xi;
sumy += yi;
summin += Math.min(xi, yi);
}
- return dim1 * .5 * (dim1 / sumx + dim1 / sumy) * summin;
+ return dim * .5 * (dim / sumx + dim / sumy) * summin;
}
/**
diff --git a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/NormalizedPrimitiveSimilarityFunction.java b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/NormalizedPrimitiveSimilarityFunction.java
index 6e8bd76a..83a0edfa 100644
--- a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/NormalizedPrimitiveSimilarityFunction.java
+++ b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/NormalizedPrimitiveSimilarityFunction.java
@@ -23,8 +23,6 @@ package de.lmu.ifi.dbs.elki.distance.similarityfunction;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
-
/**
* Marker interface for similarity functions working on primitive objects, and
* limited to the 0-1 value range.
@@ -32,8 +30,7 @@ import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
* @author Erich Schubert
*
* @param <O> Object type
- * @param <D> Distance type
*/
-public interface NormalizedPrimitiveSimilarityFunction<O, D extends Distance<D>> extends PrimitiveSimilarityFunction<O, D>, NormalizedSimilarityFunction<O, D> {
+public interface NormalizedPrimitiveSimilarityFunction<O> extends PrimitiveDoubleSimilarityFunction<O>, NormalizedSimilarityFunction<O> {
// empty marker interface
}
diff --git a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/NormalizedSimilarityFunction.java b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/NormalizedSimilarityFunction.java
index dcfd8e74..91e94498 100644
--- a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/NormalizedSimilarityFunction.java
+++ b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/NormalizedSimilarityFunction.java
@@ -23,7 +23,7 @@ package de.lmu.ifi.dbs.elki.distance.similarityfunction;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
/**
* Marker interface to signal that the similarity function is normalized to
@@ -31,9 +31,8 @@ import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
*
* @author Erich Schubert
* @param <O> object type
- * @param <D> distance type
*
*/
-public interface NormalizedSimilarityFunction<O, D extends Distance<?>> extends SimilarityFunction<O, D> {
+public interface NormalizedSimilarityFunction<O> extends SimilarityFunction<O, DoubleDistance> {
// Empty - marker interface.
}
diff --git a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/PrimitiveDoubleSimilarityFunction.java b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/PrimitiveDoubleSimilarityFunction.java
new file mode 100644
index 00000000..2d886706
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/PrimitiveDoubleSimilarityFunction.java
@@ -0,0 +1,49 @@
+package de.lmu.ifi.dbs.elki.distance.similarityfunction;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
+
+/**
+ * Interface for similarity functions that can provide a raw double value.
+ *
+ * This is for use in performance-critical situations that need to avoid the
+ * boxing/unboxing cost of regular distance API.
+ *
+ * @author Erich Schubert
+ *
+ * @param <O> Object type
+ */
+public interface PrimitiveDoubleSimilarityFunction<O> extends PrimitiveSimilarityFunction<O, DoubleDistance> {
+ /**
+ * Computes the similarity between two given Objects according to this
+ * similarity function.
+ *
+ * @param o1 first Object
+ * @param o2 second Object
+ * @return the similarity between two given Objects according to this
+ * similarity function
+ */
+ double doubleSimilarity(O o1, O o2);
+}
diff --git a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/PrimitiveSimilarityFunction.java b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/PrimitiveSimilarityFunction.java
index d4d84734..5188e9d9 100644
--- a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/PrimitiveSimilarityFunction.java
+++ b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/PrimitiveSimilarityFunction.java
@@ -38,7 +38,7 @@ import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
* @param <O> object type
* @param <D> distance type
*/
-public interface PrimitiveSimilarityFunction<O, D extends Distance<D>> extends SimilarityFunction<O, D> {
+public interface PrimitiveSimilarityFunction<O, D extends Distance<?>> extends SimilarityFunction<O, D> {
/**
* Computes the similarity between two given DatabaseObjects according to this
* similarity function.
diff --git a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/SharedNearestNeighborSimilarityFunction.java b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/SharedNearestNeighborSimilarityFunction.java
index 5faba431..89661f13 100644
--- a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/SharedNearestNeighborSimilarityFunction.java
+++ b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/SharedNearestNeighborSimilarityFunction.java
@@ -40,7 +40,9 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameteriz
*
* @author Arthur Zimek
*
- * @apiviz.has de.lmu.ifi.dbs.elki.index.preprocessed.snn.SharedNearestNeighborIndex.Factory
+ * @apiviz.has
+ * de.lmu.ifi.dbs.elki.index.preprocessed.snn.SharedNearestNeighborIndex
+ * .Factory
* @apiviz.has Instance oneway - - «create»
*
* @param <O> object type
@@ -72,17 +74,15 @@ public class SharedNearestNeighborSimilarityFunction<O> extends AbstractIndexBas
int intersection = 0;
DBIDIter iter1 = neighbors1.iter();
DBIDIter iter2 = neighbors2.iter();
- while(iter1.valid() && iter2.valid()) {
+ while (iter1.valid() && iter2.valid()) {
final int comp = DBIDUtil.compare(iter1, iter2);
- if(comp == 0) {
+ if (comp == 0) {
intersection++;
iter1.advance();
iter2.advance();
- }
- else if(comp < 0) {
+ } else if (comp < 0) {
iter1.advance();
- }
- else // iter2 < iter1
+ } else // iter2 < iter1
{
iter2.advance();
}
@@ -94,7 +94,7 @@ public class SharedNearestNeighborSimilarityFunction<O> extends AbstractIndexBas
@Override
public <T extends O> Instance<T> instantiate(Relation<T> database) {
SharedNearestNeighborIndex<O> indexi = indexFactory.instantiate((Relation<O>) database);
- return (Instance<T>) new Instance<>((Relation<O>) database, indexi);
+ return (Instance<T>) new Instance<>((Relation<O>) database, indexi, this);
}
/**
@@ -108,13 +108,19 @@ public class SharedNearestNeighborSimilarityFunction<O> extends AbstractIndexBas
*/
public static class Instance<O> extends AbstractIndexBasedSimilarityFunction.Instance<O, SharedNearestNeighborIndex<O>, SetDBIDs, IntegerDistance> {
/**
+ * Similarity function.
+ */
+ private SharedNearestNeighborSimilarityFunction<? super O> similarityFunction;
+
+ /**
* Constructor.
- *
+ *
* @param database Database
* @param preprocessor Index
*/
- public Instance(Relation<O> database, SharedNearestNeighborIndex<O> preprocessor) {
+ public Instance(Relation<O> database, SharedNearestNeighborIndex<O> preprocessor, SharedNearestNeighborSimilarityFunction<? super O> similarityFunction) {
super(database, preprocessor);
+ this.similarityFunction = similarityFunction;
}
@Override
@@ -128,6 +134,11 @@ public class SharedNearestNeighborSimilarityFunction<O> extends AbstractIndexBas
public IntegerDistance getDistanceFactory() {
return IntegerDistance.FACTORY;
}
+
+ @Override
+ public SimilarityFunction<? super O, IntegerDistance> getSimilarityFunction() {
+ return similarityFunction;
+ }
}
/**
@@ -151,4 +162,4 @@ public class SharedNearestNeighborSimilarityFunction<O> extends AbstractIndexBas
return new SharedNearestNeighborSimilarityFunction<>(factory);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/FooKernelFunction.java b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/FooKernelFunction.java
deleted file mode 100644
index d56b3f04..00000000
--- a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/FooKernelFunction.java
+++ /dev/null
@@ -1,137 +0,0 @@
-package de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel;
-
-/*
- This file is part of ELKI:
- Environment for Developing KDD-Applications Supported by Index-Structures
-
- Copyright (C) 2013
- Ludwig-Maximilians-Universität München
- Lehr- und Forschungseinheit für Datenbanksysteme
- ELKI Development Team
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-import de.lmu.ifi.dbs.elki.data.NumberVector;
-import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
-import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
-import de.lmu.ifi.dbs.elki.database.query.DistanceSimilarityQuery;
-import de.lmu.ifi.dbs.elki.database.query.distance.PrimitiveDistanceSimilarityQuery;
-import de.lmu.ifi.dbs.elki.database.relation.Relation;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.AbstractPrimitiveDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
-import de.lmu.ifi.dbs.elki.distance.similarityfunction.PrimitiveSimilarityFunction;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
-
-/**
- * Provides an experimental KernelDistanceFunction for NumberVectors. Currently
- * only supports 2D data and x1^2 ~ x2 correlations.
- *
- * @author Simon Paradies
- */
-public class FooKernelFunction extends AbstractPrimitiveDistanceFunction<NumberVector<?>, DoubleDistance> implements PrimitiveSimilarityFunction<NumberVector<?>, DoubleDistance> {
- /**
- * The default max_degree.
- */
- public static final int DEFAULT_MAX_DEGREE = 2;
-
- /**
- * Parameter for the maximum degree
- */
- public static final OptionID MAX_DEGREE_ID = new OptionID("fookernel.max_degree", "The max degree of the" + FooKernelFunction.class.getSimpleName() + ". Default: " + DEFAULT_MAX_DEGREE);
-
- /**
- * Degree of the polynomial kernel function
- */
- private int max_degree;
-
- /**
- * Constructor.
- *
- * @param max_degree Maximum degree-
- */
- public FooKernelFunction(int max_degree) {
- super();
- this.max_degree = max_degree;
- }
-
- /**
- * Provides an experimental kernel similarity between the given two vectors.
- *
- * @param o1 first vector
- * @param o2 second vector
- * @return the experimental kernel similarity between the given two vectors as
- * an instance of {@link DoubleDistance DoubleDistance}.
- */
- @Override
- public DoubleDistance similarity(final NumberVector<?> o1, final NumberVector<?> o2) {
- if(o1.getDimensionality() != o2.getDimensionality()) {
- throw new IllegalArgumentException("Different dimensionality of FeatureVectors\n first argument: " + o1.toString() + "\n second argument: " + o2.toString());
- }
- double sim = 0.0;
- // iterate over differently powered dimensions
- for(int degree = 0; degree < max_degree; degree++) {
- sim += Math.pow(o1.doubleValue(degree) * o2.doubleValue(degree), degree);
- }
- return new DoubleDistance(sim);
- }
-
- @Override
- public DoubleDistance distance(final NumberVector<?> fv1, final NumberVector<?> fv2) {
- return new DoubleDistance(Math.sqrt(similarity(fv1, fv1).doubleValue() + similarity(fv2, fv2).doubleValue() - 2 * similarity(fv1, fv2).doubleValue()));
- }
-
- @Override
- public VectorFieldTypeInformation<? super NumberVector<?>> getInputTypeRestriction() {
- return TypeUtil.NUMBER_VECTOR_FIELD;
- }
-
- @Override
- public DoubleDistance getDistanceFactory() {
- return DoubleDistance.FACTORY;
- }
-
- @Override
- public <T extends NumberVector<?>> DistanceSimilarityQuery<T, DoubleDistance> instantiate(Relation<T> database) {
- return new PrimitiveDistanceSimilarityQuery<>(database, this, this);
- }
-
- /**
- * Parameterization class.
- *
- * @author Erich Schubert
- *
- * @apiviz.exclude
- */
- public static class Parameterizer extends AbstractParameterizer {
- protected int max_degree = 0;
-
- @Override
- protected void makeOptions(Parameterization config) {
- super.makeOptions(config);
- final IntParameter max_degreeP = new IntParameter(MAX_DEGREE_ID, DEFAULT_MAX_DEGREE);
- if(config.grab(max_degreeP)) {
- max_degree = max_degreeP.getValue();
- }
- }
-
- @Override
- protected FooKernelFunction makeInstance() {
- return new FooKernelFunction(max_degree);
- }
- }
-} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/KernelMatrix.java b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/KernelMatrix.java
index ed3731ba..39fb97a5 100644
--- a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/KernelMatrix.java
+++ b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/KernelMatrix.java
@@ -23,16 +23,18 @@ package de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import java.util.Collection;
-import java.util.Iterator;
-import java.util.List;
import java.util.logging.Level;
-import de.lmu.ifi.dbs.elki.data.FeatureVector;
-import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDRange;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.query.similarity.SimilarityQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.distance.similarityfunction.PrimitiveSimilarityFunction;
import de.lmu.ifi.dbs.elki.logging.LoggingUtil;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
@@ -43,52 +45,146 @@ import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
*
* @author Simon Paradies
*
- * @apiviz.uses de.lmu.ifi.dbs.elki.distance.similarityfunction.PrimitiveSimilarityFunction
+ * @apiviz.uses PrimitiveSimilarityFunction
*/
public class KernelMatrix {
/**
* The kernel matrix
*/
Matrix kernel;
-
+
+ /**
+ * Static mapping from DBIDs to indexes.
+ */
+ DBIDMap idmap;
+
/**
- * Wraps the matrixArray in a KernelMatrix
+ * Map a DBID to its offset
*
- * @param matrixArray two dimensional double array
+ * TODO: move to shared code.
+ *
+ * @author Erich Schubert
+ * @apiviz.exclude
*/
- public KernelMatrix(final double[][] matrixArray) {
- kernel = new Matrix(matrixArray);
+ private static interface DBIDMap {
+ /**
+ * Get the offset of the DBID in the range.
+ *
+ * @param id ID
+ * @return Offset
+ */
+ int getOffset(DBIDRef id);
+
+ /**
+ * Get an array iterator, for scanning.
+ *
+ * @return Array iterator
+ */
+ DBIDArrayIter iter();
+ }
+
+ /**
+ * Map a DBID to an integer offset, DBIDRange version.
+ *
+ * @author Erich Schubert
+ * @apiviz.exclude
+ */
+ private static class RangeMap implements DBIDMap {
+ DBIDRange range;
+
+ public RangeMap(DBIDRange range) {
+ super();
+ this.range = range;
+ }
+
+ @Override
+ public int getOffset(DBIDRef id) {
+ return range.getOffset(id);
+ }
+
+ @Override
+ public DBIDArrayIter iter() {
+ return range.iter();
+ }
+ }
+
+ /**
+ * Map a DBID to an integer offset, Version to support arbitrary DBIDs.
+ *
+ * @author Erich Schubert
+ * @apiviz.exclude
+ */
+ private static class SortedArrayMap implements DBIDMap {
+ ArrayModifiableDBIDs ids;
+
+ public SortedArrayMap(DBIDs ids) {
+ super();
+ this.ids = DBIDUtil.newArray(ids);
+ this.ids.sort();
+ }
+
+ @Override
+ public int getOffset(DBIDRef id) {
+ return ids.binarySearch(id);
+ }
+
+ @Override
+ public DBIDArrayIter iter() {
+ return ids.iter();
+ }
}
/**
* Provides a new kernel matrix.
*
* @param kernelFunction the kernel function used to compute the kernel matrix
- * @param database the database for which the kernel matrix is computed
- *
- * @deprecated ID mapping is not reliable!
+ * @param relation the database that holds the objects
+ * @param ids the IDs of those objects for which the kernel matrix is computed
*/
- @Deprecated
- public <O extends FeatureVector<?>> KernelMatrix(final PrimitiveSimilarityFunction<? super O, DoubleDistance> kernelFunction, final Relation<? extends O> database) {
- this(kernelFunction, database, DBIDUtil.ensureArray(database.getDBIDs()));
+ public <O, D extends NumberDistance<?, ?>> KernelMatrix(PrimitiveSimilarityFunction<? super O, D> kernelFunction, final Relation<? extends O> relation, final DBIDs ids) {
+ LoggingUtil.logExpensive(Level.FINER, "Computing kernel matrix");
+ this.kernel = new Matrix(ids.size(), ids.size());
+ if(ids instanceof DBIDRange) {
+ this.idmap = new RangeMap((DBIDRange) ids);
+ }
+ else {
+ this.idmap = new SortedArrayMap(ids);
+ }
+
+ DBIDArrayIter i1 = this.idmap.iter(), i2 = this.idmap.iter();
+ for(i1.seek(0); i1.valid(); i1.advance()) {
+ O o1 = relation.get(i1);
+ for(i2.seek(i1.getOffset()); i2.valid(); i2.advance()) {
+ double value = kernelFunction.similarity(o1, relation.get(i2)).doubleValue();
+ kernel.set(i1.getOffset(), i2.getOffset(), value);
+ kernel.set(i2.getOffset(), i1.getOffset(), value);
+ }
+ }
}
/**
* Provides a new kernel matrix.
*
* @param kernelFunction the kernel function used to compute the kernel matrix
- * @param database the database that holds the objects
+ * @param relation the database that holds the objects
* @param ids the IDs of those objects for which the kernel matrix is computed
*/
- public <O extends FeatureVector<?>> KernelMatrix(final PrimitiveSimilarityFunction<? super O, DoubleDistance> kernelFunction, final Relation<? extends O> database, final ArrayDBIDs ids) {
+ public <O, D extends NumberDistance<?, ?>> KernelMatrix(SimilarityQuery<? super O, D> kernelFunction, final Relation<? extends O> relation, final DBIDs ids) {
LoggingUtil.logExpensive(Level.FINER, "Computing kernel matrix");
kernel = new Matrix(ids.size(), ids.size());
- double value;
- for(int idx = 0; idx < ids.size(); idx++) {
- for(int idy = idx; idy < ids.size(); idy++) {
- value = kernelFunction.similarity(database.get(ids.get(idx)), database.get(ids.get(idy))).doubleValue();
- kernel.set(idx, idy, value);
- kernel.set(idy, idx, value);
+ if(ids instanceof DBIDRange) {
+ this.idmap = new RangeMap((DBIDRange) ids);
+ }
+ else {
+ this.idmap = new SortedArrayMap(ids);
+ }
+ DBIDArrayIter i1 = idmap.iter(), i2 = idmap.iter();
+ for(i1.seek(0); i1.valid(); i1.advance()) {
+ O o1 = relation.get(i1);
+ for(i2.seek(i1.getOffset()); i2.valid(); i2.advance()) {
+ double value = kernelFunction.similarity(o1, i2).doubleValue();
+ kernel.set(i1.getOffset(), i2.getOffset(), value);
+ kernel.set(i2.getOffset(), i1.getOffset(), value);
}
}
}
@@ -109,8 +205,7 @@ public class KernelMatrix {
* @param o2 second ObjectID
* @return the distance between the two objects
*/
- // FIXME: really use objectids!
- public double getDistance(final int o1, final int o2) {
+ public double getDistance(final DBIDRef o1, final DBIDRef o2) {
return Math.sqrt(getSquaredDistance(o1, o2));
}
@@ -124,40 +219,32 @@ public class KernelMatrix {
}
/**
- * Returns the kernel value of object o1 and object o2
- *
- * @param o1 ID of first object
- * @param o2 ID of second object
- * @return the kernel value of object o1 and object o2
- */
- public double getSimilarity(final int o1, final int o2) {
- return kernel.get(o1 - 1, o2 - 1); // correct index shifts.
- }
-
- /**
* Returns the squared kernel distance between the two specified objects.
*
- * @param o1 first ObjectID
- * @param o2 second ObjectID
+ * @param id1 first ObjectID
+ * @param id2 second ObjectID
* @return the distance between the two objects
*/
- public double getSquaredDistance(final int o1, final int o2) {
- return getSimilarity(o1, o1) + getSimilarity(o2, o2) - 2 * getSimilarity(o1, o2);
+ public double getSquaredDistance(final DBIDRef id1, final DBIDRef id2) {
+ final int o1 = idmap.getOffset(id1), o2 = idmap.getOffset(id2);
+ return kernel.get(o1, o1) + kernel.get(o2, o2) - 2 * kernel.get(o1, o2);
}
/**
* Returns the ith kernel matrix column for all objects in ids
*
- * @param i the column which should be returned
+ * @param i1 the column which should be returned
* @param ids the objects
* @return the ith kernel matrix column for all objects in ids
*/
- public Matrix getSubColumn(final int i, final List<Integer> ids) {
+ @Deprecated
+ public Matrix getSubColumn(final DBIDRef i1, final DBIDs ids) {
final int[] ID = new int[1];
- ID[0] = i - 1; // correct index shift
+ ID[0] = idmap.getOffset(i1);
final int[] IDs = new int[ids.size()];
- for(int x = 0; x < IDs.length; x++) {
- IDs[x] = ids.get(x) - 1; // correct index shift
+ int i = 0;
+ for(DBIDIter it = ids.iter(); it.valid(); it.advance(), i++) {
+ IDs[i] = idmap.getOffset(it);
}
return kernel.getMatrix(IDs, ID);
}
@@ -168,17 +255,17 @@ public class KernelMatrix {
* @param ids the objects
* @return a sub kernel matrix for all objects in ids.
*/
- public Matrix getSubMatrix(final Collection<Integer> ids) {
+ public Matrix getSubMatrix(DBIDs ids) {
final int[] IDs = new int[ids.size()];
int i = 0;
- for(Iterator<Integer> it = ids.iterator(); it.hasNext(); i++) {
- IDs[i] = it.next() - 1; // correct index shift
+ for(DBIDIter it = ids.iter(); it.valid(); it.advance(), i++) {
+ IDs[i] = idmap.getOffset(it);
}
return kernel.getMatrix(IDs, IDs);
}
/**
- * Centers the matrix in feature space according to Smola et. Schoelkopf,
+ * Centers the matrix in feature space according to Smola et Schoelkopf,
* Learning with Kernels p. 431 Alters the input matrix. If you still need the
* original matrix, use
* <code>centeredMatrix = centerKernelMatrix(uncenteredMatrix.copy()) {</code>
@@ -187,6 +274,7 @@ public class KernelMatrix {
* @return centered matrix (for convenience)
*/
public static Matrix centerMatrix(final Matrix matrix) {
+ // FIXME: implement more efficiently. Maybe in matrix class itself?
final Matrix normalizingMatrix = new Matrix(matrix.getRowDimensionality(), matrix.getColumnDimensionality(), 1.0 / matrix.getColumnDimensionality());
return matrix.minusEquals(normalizingMatrix.times(matrix)).minusEquals(matrix.times(normalizingMatrix)).plusEquals(normalizingMatrix.times(matrix).times(normalizingMatrix));
}
@@ -208,4 +296,15 @@ public class KernelMatrix {
public static Matrix centerKernelMatrix(final KernelMatrix kernelMatrix) {
return centerMatrix(kernelMatrix.getKernel());
}
+
+ /**
+ * Get the kernel similarity for the given objects.
+ *
+ * @param id1 First object
+ * @param id2 Second object
+ * @return Similarity.
+ */
+ public double getSimilarity(DBIDRef id1, DBIDRef id2) {
+ return kernel.get(idmap.getOffset(id1), idmap.getOffset(id2));
+ }
}
diff --git a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/LaplaceKernelFunction.java b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/LaplaceKernelFunction.java
new file mode 100644
index 00000000..2a5f6028
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/LaplaceKernelFunction.java
@@ -0,0 +1,100 @@
+package de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.AbstractVectorDoubleDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.similarityfunction.AbstractVectorDoubleSimilarityFunction;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+
+/**
+ * Provides the laplace / exponential radial basis function kernel.
+ *
+ * @author Erich Schubert
+ */
+public class LaplaceKernelFunction extends AbstractVectorDoubleSimilarityFunction {
+ /**
+ * Scaling factor mgamma. (= - 1/sigma)
+ */
+ private final double mgamma;
+
+ /**
+ * Constructor.
+ *
+ * @param sigma Scaling parameter sigma (as in laplace kernel)
+ */
+ public LaplaceKernelFunction(double sigma) {
+ super();
+ this.mgamma = -.5 / (sigma * sigma);
+ }
+
+ @Override
+ public double doubleSimilarity(NumberVector<?> o1, NumberVector<?> o2) {
+ final int dim = AbstractVectorDoubleDistanceFunction.dimensionality(o1, o2);
+ double sim = 0.;
+ for(int i = 0; i < dim; i++) {
+ final double v = o1.doubleValue(i) - o2.doubleValue(i);
+ sim += v * v;
+ }
+ return Math.exp(mgamma * Math.sqrt(sim));
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ /**
+ * Sigma parameter: standard deviation.
+ */
+ public static final OptionID SIGMA_ID = new OptionID("kernel.laplace.sigma", "Standard deviation of the laplace RBF kernel.");
+
+ /**
+ * Sigma parameter
+ */
+ protected double sigma = 1.;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ final DoubleParameter sigmaP = new DoubleParameter(SIGMA_ID, 1.);
+ sigmaP.addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE);
+ if(config.grab(sigmaP)) {
+ sigma = sigmaP.doubleValue();
+ }
+ }
+
+ @Override
+ protected LaplaceKernelFunction makeInstance() {
+ return new LaplaceKernelFunction(sigma);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/LinearKernelFunction.java b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/LinearKernelFunction.java
index 0ae44b29..a86ad55d 100644
--- a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/LinearKernelFunction.java
+++ b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/LinearKernelFunction.java
@@ -24,74 +24,58 @@ package de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel;
*/
import de.lmu.ifi.dbs.elki.data.NumberVector;
-import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
-import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
-import de.lmu.ifi.dbs.elki.database.query.DistanceSimilarityQuery;
-import de.lmu.ifi.dbs.elki.database.query.distance.PrimitiveDistanceSimilarityQuery;
-import de.lmu.ifi.dbs.elki.database.relation.Relation;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
-import de.lmu.ifi.dbs.elki.distance.similarityfunction.AbstractPrimitiveSimilarityFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.AbstractVectorDoubleDistanceFunction;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
/**
* Provides a linear Kernel function that computes a similarity between the two
* feature vectors V1 and V2 defined by V1^T*V2.
*
+ * Note: this is effectively equivalent to using
+ * {@link de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction}
+ *
* @author Simon Paradies
- * @param <O> vector type
*/
-public class LinearKernelFunction<O extends NumberVector<?>> extends AbstractPrimitiveSimilarityFunction<O, DoubleDistance> implements PrimitiveDistanceFunction<O, DoubleDistance> {
+public class LinearKernelFunction extends PolynomialKernelFunction {
/**
- * Provides a linear Kernel function that computes a similarity between the
- * two vectors V1 and V2 defined by V1^T*V2.
+ * Static instance.
*/
- public LinearKernelFunction() {
- super();
- }
+ public static final LinearKernelFunction STATIC = new LinearKernelFunction();
/**
- * Provides a linear Kernel function that computes a similarity between the
- * two feature vectors V1 and V2 definded by V1^T*V2.
- *
- * @param o1 first feature vector
- * @param o2 second feature vector
- * @return the linear kernel similarity between the given two vectors as an
- * instance of {@link DoubleDistance DoubleDistance}.
+ * Linear kernel. Use static instance {@link #STATIC}!
*/
- @Override
- public DoubleDistance similarity(final O o1, final O o2) {
- if(o1.getDimensionality() != o2.getDimensionality()) {
- throw new IllegalArgumentException("Different dimensionality of Feature-Vectors" + "\n first argument: " + o1.toString() + "\n second argument: " + o2.toString());
- }
- double sim = 0;
- for(int i = 0; i < o1.getDimensionality(); i++) {
- sim += o1.doubleValue(i) * o2.doubleValue(i);
- }
- return new DoubleDistance(sim);
- }
-
- @Override
- public DoubleDistance distance(final O fv1, final O fv2) {
- return new DoubleDistance(Math.sqrt(similarity(fv1, fv1).doubleValue() + similarity(fv2, fv2).doubleValue() - 2 * similarity(fv1, fv2).doubleValue()));
- }
-
- @Override
- public VectorFieldTypeInformation<? super O> getInputTypeRestriction() {
- return TypeUtil.NUMBER_VECTOR_FIELD;
+ @Deprecated
+ public LinearKernelFunction() {
+ super(1, 0.);
}
@Override
- public DoubleDistance getDistanceFactory() {
- return DoubleDistance.FACTORY;
+ public double doubleSimilarity(final NumberVector<?> o1, final NumberVector<?> o2) {
+ final int dim = AbstractVectorDoubleDistanceFunction.dimensionality(o1, o2);
+ double sim = 0.;
+ for (int i = 0; i < dim; i++) {
+ sim += o1.doubleValue(i) * o2.doubleValue(i);
+ }
+ return sim;
}
@Override
- public boolean isMetric() {
- return false;
+ public double doubleDistance(final NumberVector<?> fv1, final NumberVector<?> fv2) {
+ return Math.sqrt(doubleSimilarity(fv1, fv1) + doubleSimilarity(fv2, fv2) - 2 * doubleSimilarity(fv1, fv2));
}
- @Override
- public <T extends O> DistanceSimilarityQuery<T, DoubleDistance> instantiate(Relation<T> database) {
- return new PrimitiveDistanceSimilarityQuery<>(database, this, this);
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ @Override
+ protected LinearKernelFunction makeInstance() {
+ return LinearKernelFunction.STATIC;
+ }
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/PolynomialKernelFunction.java b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/PolynomialKernelFunction.java
index 07963962..2b25ba19 100644
--- a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/PolynomialKernelFunction.java
+++ b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/PolynomialKernelFunction.java
@@ -24,18 +24,20 @@ package de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel;
*/
import de.lmu.ifi.dbs.elki.data.NumberVector;
-import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
-import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
import de.lmu.ifi.dbs.elki.database.query.DistanceSimilarityQuery;
import de.lmu.ifi.dbs.elki.database.query.distance.PrimitiveDistanceSimilarityQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.AbstractPrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.AbstractVectorDoubleDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
-import de.lmu.ifi.dbs.elki.distance.similarityfunction.PrimitiveSimilarityFunction;
+import de.lmu.ifi.dbs.elki.distance.similarityfunction.AbstractVectorDoubleSimilarityFunction;
+import de.lmu.ifi.dbs.elki.math.MathUtil;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
/**
* Provides a polynomial Kernel function that computes a similarity between the
@@ -43,66 +45,66 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
*
* @author Simon Paradies
*/
-public class PolynomialKernelFunction extends AbstractPrimitiveDistanceFunction<NumberVector<?>, DoubleDistance> implements PrimitiveSimilarityFunction<NumberVector<?>, DoubleDistance> {
+public class PolynomialKernelFunction extends AbstractVectorDoubleSimilarityFunction implements PrimitiveDoubleDistanceFunction<NumberVector<?>> {
/**
* The default degree.
*/
- public static final double DEFAULT_DEGREE = 2.0;
+ public static final int DEFAULT_DEGREE = 2;
/**
- * Degree parameter.
+ * Degree of the polynomial kernel function.
*/
- public static final OptionID DEGREE_ID = new OptionID("kernel.degree", "The degree of the polynomial kernel function. Default: " + DEFAULT_DEGREE);
+ private final int degree;
/**
- * Degree of the polynomial kernel function.
+ * Bias of the similarity function.
*/
- private double degree = 0.0;
+ private final double bias;
/**
* Constructor.
*
* @param degree Kernel degree
+ * @param bias Bias offset
*/
- public PolynomialKernelFunction(double degree) {
+ public PolynomialKernelFunction(int degree, double bias) {
super();
this.degree = degree;
+ this.bias = bias;
}
/**
- * Provides the linear kernel similarity between the given two vectors.
+ * Constructor.
*
- * @param o1 first vector
- * @param o2 second vector
- * @return the linear kernel similarity between the given two vectors as an
- * instance of {@link DoubleDistance DoubleDistance}.
+ * @param degree Kernel degree
*/
- @Override
- public DoubleDistance similarity(NumberVector<?> o1, NumberVector<?> o2) {
- if(o1.getDimensionality() != o2.getDimensionality()) {
- throw new IllegalArgumentException("Different dimensionality of Feature-Vectors" + "\n first argument: " + o1.toString() + "\n second argument: " + o2.toString());
- }
+ public PolynomialKernelFunction(int degree) {
+ this(degree, 0.);
+ }
- double sim = 0;
- for(int i = 0; i < o1.getDimensionality(); i++) {
+ @Override
+ public double doubleSimilarity(NumberVector<?> o1, NumberVector<?> o2) {
+ final int dim = AbstractVectorDoubleDistanceFunction.dimensionality(o1, o2);
+ double sim = 0.;
+ for(int i = 0; i < dim; i++) {
sim += o1.doubleValue(i) * o2.doubleValue(i);
}
- return new DoubleDistance(Math.pow(sim, degree));
+ return MathUtil.powi(sim + bias, degree);
}
@Override
public DoubleDistance distance(final NumberVector<?> fv1, final NumberVector<?> fv2) {
- return new DoubleDistance(Math.sqrt(similarity(fv1, fv1).doubleValue() + similarity(fv2, fv2).doubleValue() - 2 * similarity(fv1, fv2).doubleValue()));
+ return new DoubleDistance(doubleDistance(fv1, fv2));
}
@Override
- public VectorFieldTypeInformation<? super NumberVector<?>> getInputTypeRestriction() {
- return TypeUtil.NUMBER_VECTOR_FIELD;
+ public boolean isMetric() {
+ return true;
}
@Override
- public DoubleDistance getDistanceFactory() {
- return DoubleDistance.FACTORY;
+ public double doubleDistance(NumberVector<?> fv1, NumberVector<?> fv2) {
+ return Math.sqrt(doubleSimilarity(fv1, fv1) + doubleSimilarity(fv2, fv2) - 2 * doubleSimilarity(fv1, fv2));
}
@Override
@@ -119,22 +121,46 @@ public class PolynomialKernelFunction extends AbstractPrimitiveDistanceFunction<
*/
public static class Parameterizer extends AbstractParameterizer {
/**
+ * Degree parameter.
+ */
+ public static final OptionID DEGREE_ID = new OptionID("kernel.polynomial.degree", "The degree of the polynomial kernel function. Default: " + DEFAULT_DEGREE);
+
+ /**
+ * Bias parameter.
+ */
+ public static final OptionID BIAS_ID = new OptionID("kernel.polynomial.bias", "The bias of the polynomial kernel, a constant that is added to the scalar product.");
+
+ /**
* Degree of the polynomial kernel function.
*/
- protected double degree = 0;
+ protected int degree = 0;
+
+ /**
+ * Bias parameter.
+ */
+ protected double bias = 0.;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final DoubleParameter degreeP = new DoubleParameter(DEGREE_ID, DEFAULT_DEGREE);
+ final IntParameter degreeP = new IntParameter(DEGREE_ID, DEFAULT_DEGREE);
+ degreeP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
if(config.grab(degreeP)) {
- degree = degreeP.getValue();
+ degree = degreeP.intValue();
+ }
+ final DoubleParameter biasP = new DoubleParameter(BIAS_ID);
+ biasP.setOptional(true);
+ if(config.grab(biasP)) {
+ bias = biasP.doubleValue();
}
}
@Override
protected PolynomialKernelFunction makeInstance() {
- return new PolynomialKernelFunction(degree);
+ if(degree == 1 && (bias == 0.)) {
+ return LinearKernelFunction.STATIC;
+ }
+ return new PolynomialKernelFunction(degree, bias);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/RadialBasisFunctionKernelFunction.java b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/RadialBasisFunctionKernelFunction.java
new file mode 100644
index 00000000..a7613a78
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/RadialBasisFunctionKernelFunction.java
@@ -0,0 +1,102 @@
+package de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.AbstractVectorDoubleDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.similarityfunction.AbstractVectorDoubleSimilarityFunction;
+import de.lmu.ifi.dbs.elki.utilities.Alias;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+
+/**
+ * Provides the Gaussian radial basis function kernel (RBF Kernel).
+ *
+ * @author Erich Schubert
+ */
+@Alias({ "rbf" })
+public class RadialBasisFunctionKernelFunction extends AbstractVectorDoubleSimilarityFunction {
+ /**
+ * Scaling factor gamma. (= - 1/(2sigma^2))
+ */
+ private final double gamma;
+
+ /**
+ * Constructor.
+ *
+ * @param sigma Scaling parameter sigma
+ */
+ public RadialBasisFunctionKernelFunction(double sigma) {
+ super();
+ this.gamma = -.5 / (sigma * sigma);
+ }
+
+ @Override
+ public double doubleSimilarity(NumberVector<?> o1, NumberVector<?> o2) {
+ final int dim = AbstractVectorDoubleDistanceFunction.dimensionality(o1, o2);
+ double sim = 0.;
+ for(int i = 0; i < dim; i++) {
+ final double v = o1.doubleValue(i) - o2.doubleValue(i);
+ sim += v * v;
+ }
+ return Math.exp(gamma * sim);
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ /**
+ * Sigma parameter: standard deviation.
+ */
+ public static final OptionID SIGMA_ID = new OptionID("kernel.rbf.sigma", "Standard deviation of the Gaussian RBF kernel.");
+
+ /**
+ * Sigma parameter
+ */
+ protected double sigma = 1.;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ final DoubleParameter sigmaP = new DoubleParameter(SIGMA_ID, 1.);
+ sigmaP.addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE);
+ if(config.grab(sigmaP)) {
+ sigma = sigmaP.doubleValue();
+ }
+ }
+
+ @Override
+ protected RadialBasisFunctionKernelFunction makeInstance() {
+ return new RadialBasisFunctionKernelFunction(sigma);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/RationalQuadraticKernelFunction.java b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/RationalQuadraticKernelFunction.java
new file mode 100644
index 00000000..0a3dc45c
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/RationalQuadraticKernelFunction.java
@@ -0,0 +1,101 @@
+package de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.AbstractVectorDoubleDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.similarityfunction.AbstractVectorDoubleSimilarityFunction;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+
+/**
+ * Provides the rational quadratic kernel, a less computational approximation of
+ * the Gaussian RBF kerne ({@link RadialBasisFunctionKernelFunction}).
+ *
+ * @author Erich Schubert
+ */
+public class RationalQuadraticKernelFunction extends AbstractVectorDoubleSimilarityFunction {
+ /**
+ * Constant term c.
+ */
+ private final double c;
+
+ /**
+ * Constructor.
+ *
+ * @param c Constant term c.
+ */
+ public RationalQuadraticKernelFunction(double c) {
+ super();
+ this.c = c;
+ }
+
+ @Override
+ public double doubleSimilarity(NumberVector<?> o1, NumberVector<?> o2) {
+ final int dim = AbstractVectorDoubleDistanceFunction.dimensionality(o1, o2);
+ double sim = 0.;
+ for(int i = 0; i < dim; i++) {
+ final double v = o1.doubleValue(i) - o2.doubleValue(i);
+ sim += v * v;
+ }
+ return 1. - sim / (sim + c);
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ /**
+ * C parameter
+ */
+ public static final OptionID C_ID = new OptionID("kernel.rationalquadratic.c", "Constant term in the rational quadratic kernel.");
+
+ /**
+ * C parameter
+ */
+ protected double c = 1.;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ final DoubleParameter cP = new DoubleParameter(C_ID, 1.);
+ cP.addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE);
+ if(config.grab(cP)) {
+ c = cP.doubleValue();
+ }
+ }
+
+ @Override
+ protected RationalQuadraticKernelFunction makeInstance() {
+ return new RationalQuadraticKernelFunction(c);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/SigmoidKernelFunction.java b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/SigmoidKernelFunction.java
new file mode 100644
index 00000000..a88225f9
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/distance/similarityfunction/kernel/SigmoidKernelFunction.java
@@ -0,0 +1,110 @@
+package de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.AbstractVectorDoubleDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.similarityfunction.AbstractVectorDoubleSimilarityFunction;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+
+/**
+ * Sigmoid kernel function (aka: hyperbolic tangent kernel, multilayer
+ * perceptron MLP kernel).
+ *
+ * @author Erich Schubert
+ */
+public class SigmoidKernelFunction extends AbstractVectorDoubleSimilarityFunction {
+ /**
+ * Scaling factor c, bias theta
+ */
+ private final double c, theta;
+
+ /**
+ * Constructor.
+ *
+ * @param c Scaling factor c.
+ * @param theta Bias parameter theta.
+ */
+ public SigmoidKernelFunction(double c, double theta) {
+ super();
+ this.c = c;
+ this.theta = theta;
+ }
+
+ @Override
+ public double doubleSimilarity(NumberVector<?> o1, NumberVector<?> o2) {
+ final int dim = AbstractVectorDoubleDistanceFunction.dimensionality(o1, o2);
+ double sim = 0.;
+ for (int i = 0; i < dim; i++) {
+ final double v = o1.doubleValue(i) * o2.doubleValue(i);
+ sim += v;
+ }
+ return Math.tanh(c * sim + theta);
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ /**
+ * C parameter: scaling
+ */
+ public static final OptionID C_ID = new OptionID("kernel.sigmoid.c", "Sigmoid c parameter (scaling).");
+
+ /**
+ * Theta parameter: bias
+ */
+ public static final OptionID THETA_ID = new OptionID("kernel.sigmoid.theta", "Sigmoid theta parameter (bias).");
+
+ /**
+ * C parameter, theta parameter
+ */
+ protected double c = 1., theta = 0.;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ final DoubleParameter cP = new DoubleParameter(C_ID, 1.);
+ if (config.grab(cP)) {
+ c = cP.doubleValue();
+ }
+ final DoubleParameter thetaP = new DoubleParameter(THETA_ID, 0.);
+ if (config.grab(thetaP)) {
+ theta = thetaP.doubleValue();
+ }
+ }
+
+ @Override
+ protected SigmoidKernelFunction makeInstance() {
+ return new SigmoidKernelFunction(c, theta);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/clustering/PairCounting.java b/src/de/lmu/ifi/dbs/elki/evaluation/clustering/PairCounting.java
index 24aa17ff..ef7935d8 100644
--- a/src/de/lmu/ifi/dbs/elki/evaluation/clustering/PairCounting.java
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/clustering/PairCounting.java
@@ -54,18 +54,16 @@ public class PairCounting {
long inBoth = 0, in1 = 0, in2 = 0, total = 0;
// Process first clustering:
{
- for(int i1 = 0; i1 < table.size1; i1++) {
+ for (int i1 = 0; i1 < table.size1; i1++) {
final int size = table.contingency[i1][table.size2 + 1];
- if(table.breakNoiseClusters && table.noise1.get(i1)) {
- if(table.selfPairing) {
+ if (table.breakNoiseClusters && table.noise1.get(i1)) {
+ if (table.selfPairing) {
in1 += size;
} // else: 0
- }
- else {
- if(table.selfPairing) {
+ } else {
+ if (table.selfPairing) {
in1 += size * size;
- }
- else {
+ } else {
in1 += size * (size - 1);
}
}
@@ -73,37 +71,33 @@ public class PairCounting {
}
// Process second clustering:
{
- for(int i2 = 0; i2 < table.size2; i2++) {
+ for (int i2 = 0; i2 < table.size2; i2++) {
final int size = table.contingency[table.size1 + 1][i2];
- if(table.breakNoiseClusters && table.noise2.get(i2)) {
- if(table.selfPairing) {
+ if (table.breakNoiseClusters && table.noise2.get(i2)) {
+ if (table.selfPairing) {
in2 += size;
} // else: 0
- }
- else {
- if(table.selfPairing) {
+ } else {
+ if (table.selfPairing) {
in2 += size * size;
- }
- else {
+ } else {
in2 += size * (size - 1);
}
}
}
}
// Process combinations
- for(int i1 = 0; i1 < table.size1; i1++) {
- for(int i2 = 0; i2 < table.size2; i2++) {
+ for (int i1 = 0; i1 < table.size1; i1++) {
+ for (int i2 = 0; i2 < table.size2; i2++) {
final int size = table.contingency[i1][i2];
- if(table.breakNoiseClusters && (table.noise1.get(i1) || table.noise2.get(i2))) {
- if(table.selfPairing) {
+ if (table.breakNoiseClusters && (table.noise1.get(i1) || table.noise2.get(i2))) {
+ if (table.selfPairing) {
inBoth += size;
} // else: 0
- }
- else {
- if(table.selfPairing) {
+ } else {
+ if (table.selfPairing) {
inBoth += size * size;
- }
- else {
+ } else {
inBoth += size * (size - 1);
}
}
@@ -111,16 +105,15 @@ public class PairCounting {
}
// The official sum
int tsize = table.contingency[table.size1][table.size2];
- if(table.contingency[table.size1][table.size2 + 1] != tsize || table.contingency[table.size1 + 1][table.size2] != tsize) {
+ if (table.contingency[table.size1][table.size2 + 1] != tsize || table.contingency[table.size1 + 1][table.size2] != tsize) {
LoggingUtil.warning("PairCounting F-Measure is not well defined for overlapping and incomplete clusterings. The number of elements are: " + table.contingency[table.size1][table.size2 + 1] + " != " + table.contingency[table.size1 + 1][table.size2] + " elements.");
}
- if(tsize < 0 || tsize >= MAX_SIZE) {
+ if (tsize < 0 || tsize >= MAX_SIZE) {
LoggingUtil.warning("Your data set size probably is too big for this implementation, which uses only long precision.");
}
- if(table.selfPairing) {
+ if (table.selfPairing) {
total = tsize * tsize;
- }
- else {
+ } else {
total = tsize * (tsize - 1);
}
long inFirst = in1 - inBoth, inSecond = in2 - inBoth;
@@ -210,7 +203,11 @@ public class PairCounting {
final double nom = pairconfuse[0] * pairconfuse[3] - pairconfuse[1] * pairconfuse[2];
final long d1 = (pairconfuse[0] + pairconfuse[1]) * (pairconfuse[1] + pairconfuse[3]);
final long d2 = (pairconfuse[0] + pairconfuse[2]) * (pairconfuse[2] + pairconfuse[3]);
- return 2 * nom / (d1 + d2);
+ if (d1 + d2 > 0) {
+ return 2 * nom / (d1 + d2);
+ } else {
+ return 1.;
+ }
}
/**
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/histogram/ComputeOutlierHistogram.java b/src/de/lmu/ifi/dbs/elki/evaluation/histogram/ComputeOutlierHistogram.java
index cb740e3e..a9c24172 100644
--- a/src/de/lmu/ifi/dbs/elki/evaluation/histogram/ComputeOutlierHistogram.java
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/histogram/ComputeOutlierHistogram.java
@@ -47,7 +47,7 @@ import de.lmu.ifi.dbs.elki.utilities.datastructures.histogram.AbstractObjStaticH
import de.lmu.ifi.dbs.elki.utilities.datastructures.histogram.ObjHistogram;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -152,7 +152,7 @@ public class ComputeOutlierHistogram implements Evaluator {
* @return Result
*/
public HistogramResult<DoubleVector> evaluateOutlierResult(Database database, OutlierResult or) {
- if (scaling instanceof OutlierScalingFunction) {
+ if(scaling instanceof OutlierScalingFunction) {
OutlierScalingFunction oscaling = (OutlierScalingFunction) scaling;
oscaling.prepare(or);
}
@@ -164,7 +164,7 @@ public class ComputeOutlierHistogram implements Evaluator {
double min = scaling.getMin();
double max = scaling.getMax();
final ObjHistogram<DoubleDoublePair> hist;
- if (Double.isInfinite(min) || Double.isNaN(min) || Double.isInfinite(max) || Double.isNaN(max)) {
+ if(Double.isInfinite(min) || Double.isNaN(min) || Double.isInfinite(max) || Double.isNaN(max)) {
hist = new AbstractObjDynamicHistogram<DoubleDoublePair>(bins) {
@Override
public DoubleDoublePair aggregate(DoubleDoublePair first, DoubleDoublePair second) {
@@ -186,9 +186,9 @@ public class ComputeOutlierHistogram implements Evaluator {
@Override
protected DoubleDoublePair downsample(Object[] data, int start, int end, int size) {
DoubleDoublePair sum = new DoubleDoublePair(0, 0);
- for (int i = start; i < end; i++) {
+ for(int i = start; i < end; i++) {
DoubleDoublePair p = (DoubleDoublePair) data[i];
- if (p != null) {
+ if(p != null) {
sum.first += p.first;
sum.second += p.second;
}
@@ -196,7 +196,8 @@ public class ComputeOutlierHistogram implements Evaluator {
return sum;
}
};
- } else {
+ }
+ else {
hist = new AbstractObjStaticHistogram<DoubleDoublePair>(bins, min, max) {
@Override
protected DoubleDoublePair makeObject() {
@@ -214,27 +215,28 @@ public class ComputeOutlierHistogram implements Evaluator {
// first fill histogram only with values of outliers
DoubleDoublePair negative, positive;
- if (!splitfreq) {
+ if(!splitfreq) {
negative = new DoubleDoublePair(1. / ids.size(), 0);
positive = new DoubleDoublePair(0, 1. / ids.size());
- } else {
+ }
+ else {
negative = new DoubleDoublePair(1. / (ids.size() - outlierIds.size()), 0);
positive = new DoubleDoublePair(0, 1. / outlierIds.size());
}
ids.removeDBIDs(outlierIds);
// fill histogram with values of each object
- for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
double result = or.getScores().get(iter);
result = scaling.getScaled(result);
hist.putData(result, negative);
}
- for (DBIDIter iter = outlierIds.iter(); iter.valid(); iter.advance()) {
+ for(DBIDIter iter = outlierIds.iter(); iter.valid(); iter.advance()) {
double result = or.getScores().get(iter);
result = scaling.getScaled(result);
hist.putData(result, positive);
}
Collection<DoubleVector> collHist = new ArrayList<>(hist.getNumBins());
- for (ObjHistogram.Iter<DoubleDoublePair> iter = hist.iter(); iter.valid(); iter.advance()) {
+ for(ObjHistogram.Iter<DoubleDoublePair> iter = hist.iter(); iter.valid(); iter.advance()) {
DoubleDoublePair data = iter.getValue();
DoubleVector row = new DoubleVector(new double[] { iter.getCenter(), data.first, data.second });
collHist.add(row);
@@ -246,12 +248,12 @@ public class ComputeOutlierHistogram implements Evaluator {
public void processNewResult(HierarchicalResult baseResult, Result result) {
final Database db = ResultUtil.findDatabase(baseResult);
List<OutlierResult> ors = ResultUtil.filterResults(result, OutlierResult.class);
- if (ors == null || ors.size() <= 0) {
+ if(ors == null || ors.size() <= 0) {
// logger.warning("No outlier results found for "+ComputeOutlierHistogram.class.getSimpleName());
return;
}
- for (OutlierResult or : ors) {
+ for(OutlierResult or : ors) {
db.getHierarchy().add(or, evaluateOutlierResult(db, or));
}
}
@@ -289,23 +291,23 @@ public class ComputeOutlierHistogram implements Evaluator {
super.makeOptions(config);
PatternParameter positiveClassNameP = new PatternParameter(POSITIVE_CLASS_NAME_ID);
positiveClassNameP.setOptional(true);
- if (config.grab(positiveClassNameP)) {
+ if(config.grab(positiveClassNameP)) {
positiveClassName = positiveClassNameP.getValue();
}
IntParameter binsP = new IntParameter(BINS_ID, 50);
- binsP.addConstraint(new GreaterConstraint(1));
- if (config.grab(binsP)) {
+ binsP.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
+ if(config.grab(binsP)) {
bins = binsP.getValue();
}
ObjectParameter<ScalingFunction> scalingP = new ObjectParameter<>(SCALING_ID, ScalingFunction.class, IdentityScaling.class);
- if (config.grab(scalingP)) {
+ if(config.grab(scalingP)) {
scaling = scalingP.instantiateClass(config);
}
Flag splitfreqF = new Flag(SPLITFREQ_ID);
- if (config.grab(splitfreqF)) {
+ if(config.grab(splitfreqF)) {
splitfreq = splitfreqF.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/gui/util/LogPanel.java b/src/de/lmu/ifi/dbs/elki/gui/util/LogPanel.java
index 23bbb3f2..11080cc9 100644
--- a/src/de/lmu/ifi/dbs/elki/gui/util/LogPanel.java
+++ b/src/de/lmu/ifi/dbs/elki/gui/util/LogPanel.java
@@ -39,6 +39,7 @@ import javax.swing.SwingUtilities;
import de.lmu.ifi.dbs.elki.logging.LoggingConfiguration;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress;
+import de.lmu.ifi.dbs.elki.logging.progress.MutableProgress;
import de.lmu.ifi.dbs.elki.logging.progress.Progress;
import de.lmu.ifi.dbs.elki.logging.progress.ProgressLogRecord;
import de.lmu.ifi.dbs.elki.logging.progress.StepProgress;
@@ -159,6 +160,10 @@ public class LogPanel extends JPanel {
pbar.setIndeterminate(true);
pbar.setStringPainted(true);
}
+ else if(prog instanceof MutableProgress) {
+ pbar = new JProgressBar(0, ((MutableProgress) prog).getTotal());
+ pbar.setStringPainted(true);
+ }
else {
throw new RuntimeException("Unsupported progress record");
}
@@ -189,6 +194,11 @@ public class LogPanel extends JPanel {
pbar.setValue(((IndefiniteProgress) prog).getProcessed());
pbar.setString(((IndefiniteProgress) prog).toString());
}
+ else if(prog instanceof MutableProgress) {
+ pbar.setValue(((MutableProgress) prog).getProcessed());
+ pbar.setMaximum(((MutableProgress) prog).getProcessed());
+ pbar.setString(((MutableProgress) prog).toString());
+ }
else {
throw new RuntimeException("Unsupported progress record");
}
diff --git a/src/de/lmu/ifi/dbs/elki/index/lsh/InMemoryLSHIndex.java b/src/de/lmu/ifi/dbs/elki/index/lsh/InMemoryLSHIndex.java
index c1bd43be..8ae3cd69 100644
--- a/src/de/lmu/ifi/dbs/elki/index/lsh/InMemoryLSHIndex.java
+++ b/src/de/lmu/ifi/dbs/elki/index/lsh/InMemoryLSHIndex.java
@@ -55,7 +55,7 @@ import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
@@ -159,15 +159,15 @@ public class InMemoryLSHIndex<V> implements IndexFactory<V, InMemoryLSHIndex<V>.
public void initialize() {
final int numhash = hashfunctions.size();
hashtables = new ArrayList<>(numhash);
- for (int i = 0; i < numhash; i++) {
+ for(int i = 0; i < numhash; i++) {
hashtables.add(new TIntObjectHashMap<DBIDs>(numberOfBuckets));
}
FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Building LSH index.", relation.size(), LOG) : null;
int expect = Math.max(2, (int) Math.ceil(relation.size() / (double) numberOfBuckets));
- for (DBIDIter iter = relation.getDBIDs().iter(); iter.valid(); iter.advance()) {
+ for(DBIDIter iter = relation.getDBIDs().iter(); iter.valid(); iter.advance()) {
V obj = relation.get(iter);
- for (int i = 0; i < numhash; i++) {
+ for(int i = 0; i < numhash; i++) {
final TIntObjectMap<DBIDs> table = hashtables.get(i);
final LocalitySensitiveHashFunction<? super V> hashfunc = hashfunctions.get(i);
// Get the initial (unbounded) hash code:
@@ -175,35 +175,37 @@ public class InMemoryLSHIndex<V> implements IndexFactory<V, InMemoryLSHIndex<V>.
// Reduce to hash table size
int bucket = hash % numberOfBuckets;
DBIDs cur = table.get(bucket);
- if (cur == null) {
+ if(cur == null) {
table.put(bucket, DBIDUtil.deref(iter));
- } else if (cur.size() > 1) {
+ }
+ else if(cur.size() > 1) {
((ModifiableDBIDs) cur).add(iter);
- } else {
+ }
+ else {
ModifiableDBIDs newbuck = DBIDUtil.newArray(expect);
newbuck.addDBIDs(cur);
newbuck.add(iter);
table.put(bucket, newbuck);
}
}
- if (progress != null) {
+ if(progress != null) {
progress.incrementProcessed(LOG);
}
}
- if (progress != null) {
+ if(progress != null) {
progress.ensureCompleted(LOG);
}
- if (LOG.isStatistics()) {
+ if(LOG.isStatistics()) {
int min = Integer.MAX_VALUE, max = 0;
- for (int i = 0; i < numhash; i++) {
+ for(int i = 0; i < numhash; i++) {
final TIntObjectMap<DBIDs> table = hashtables.get(i);
- for (TIntObjectIterator<DBIDs> iter = table.iterator(); iter.hasNext();) {
+ for(TIntObjectIterator<DBIDs> iter = table.iterator(); iter.hasNext();) {
iter.advance();
int size = iter.value().size();
- if (size < min) {
+ if(size < min) {
min = size;
}
- if (size > max) {
+ if(size > max) {
max = size;
}
}
@@ -221,13 +223,13 @@ public class InMemoryLSHIndex<V> implements IndexFactory<V, InMemoryLSHIndex<V>.
@Override
public <D extends Distance<D>> KNNQuery<V, D> getKNNQuery(DistanceQuery<V, D> distanceQuery, Object... hints) {
- for (Object hint : hints) {
- if (DatabaseQuery.HINT_EXACT.equals(hint)) {
+ for(Object hint : hints) {
+ if(DatabaseQuery.HINT_EXACT.equals(hint)) {
return null;
}
}
DistanceFunction<? super V, D> df = distanceQuery.getDistanceFunction();
- if (!family.isCompatible(df)) {
+ if(!family.isCompatible(df)) {
return null;
}
return (KNNQuery<V, D>) new LSHKNNQuery<>(distanceQuery);
@@ -235,13 +237,13 @@ public class InMemoryLSHIndex<V> implements IndexFactory<V, InMemoryLSHIndex<V>.
@Override
public <D extends Distance<D>> RangeQuery<V, D> getRangeQuery(DistanceQuery<V, D> distanceQuery, Object... hints) {
- for (Object hint : hints) {
- if (DatabaseQuery.HINT_EXACT.equals(hint)) {
+ for(Object hint : hints) {
+ if(DatabaseQuery.HINT_EXACT.equals(hint)) {
return null;
}
}
DistanceFunction<? super V, D> df = distanceQuery.getDistanceFunction();
- if (!family.isCompatible(df)) {
+ if(!family.isCompatible(df)) {
return null;
}
return (RangeQuery<V, D>) new LSHRangeQuery<>(distanceQuery);
@@ -270,7 +272,7 @@ public class InMemoryLSHIndex<V> implements IndexFactory<V, InMemoryLSHIndex<V>.
public KNNList<D> getKNNForObject(V obj, int k) {
ModifiableDBIDs candidates = null;
final int numhash = hashtables.size();
- for (int i = 0; i < numhash; i++) {
+ for(int i = 0; i < numhash; i++) {
final TIntObjectMap<DBIDs> table = hashtables.get(i);
final LocalitySensitiveHashFunction<? super V> hashfunc = hashfunctions.get(i);
// Get the initial (unbounded) hash code:
@@ -278,23 +280,23 @@ public class InMemoryLSHIndex<V> implements IndexFactory<V, InMemoryLSHIndex<V>.
// Reduce to hash table size
int bucket = hash % numberOfBuckets;
DBIDs cur = table.get(bucket);
- if (cur != null) {
- if (candidates == null) {
+ if(cur != null) {
+ if(candidates == null) {
candidates = DBIDUtil.newHashSet(cur.size() * numhash + k);
}
candidates.addDBIDs(cur);
}
}
- if (candidates == null) {
+ if(candidates == null) {
candidates = DBIDUtil.newArray();
}
// Refine.
KNNHeap<D> heap = DBIDUtil.newHeap(distanceQuery.getDistanceFactory(), k);
- for (DBIDIter iter = candidates.iter(); iter.valid(); iter.advance()) {
+ for(DBIDIter iter = candidates.iter(); iter.valid(); iter.advance()) {
final D dist = distanceQuery.distance(obj, iter);
super.incRefinements(1);
- heap.add(dist, iter);
+ heap.insert(dist, iter);
}
return heap.toKNNList();
}
@@ -323,7 +325,7 @@ public class InMemoryLSHIndex<V> implements IndexFactory<V, InMemoryLSHIndex<V>.
public DistanceDBIDList<D> getRangeForObject(V obj, D range) {
ModifiableDBIDs candidates = DBIDUtil.newHashSet();
final int numhash = hashtables.size();
- for (int i = 0; i < numhash; i++) {
+ for(int i = 0; i < numhash; i++) {
final TIntObjectMap<DBIDs> table = hashtables.get(i);
final LocalitySensitiveHashFunction<? super V> hashfunc = hashfunctions.get(i);
// Get the initial (unbounded) hash code:
@@ -331,17 +333,17 @@ public class InMemoryLSHIndex<V> implements IndexFactory<V, InMemoryLSHIndex<V>.
// Reduce to hash table size
int bucket = hash % numberOfBuckets;
DBIDs cur = table.get(bucket);
- if (cur != null) {
+ if(cur != null) {
candidates.addDBIDs(cur);
}
}
// Refine.
GenericDistanceDBIDList<D> result = new GenericDistanceDBIDList<>();
- for (DBIDIter iter = candidates.iter(); iter.valid(); iter.advance()) {
+ for(DBIDIter iter = candidates.iter(); iter.valid(); iter.advance()) {
final D dist = distanceQuery.distance(obj, iter);
super.incRefinements(1);
- if (range.compareTo(dist) >= 0) {
+ if(range.compareTo(dist) >= 0) {
result.add(dist, iter);
}
}
@@ -392,20 +394,20 @@ public class InMemoryLSHIndex<V> implements IndexFactory<V, InMemoryLSHIndex<V>.
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
ObjectParameter<LocalitySensitiveHashFunctionFamily<? super V>> familyP = new ObjectParameter<>(FAMILY_ID, LocalitySensitiveHashFunctionFamily.class);
- if (config.grab(familyP)) {
+ if(config.grab(familyP)) {
family = familyP.instantiateClass(config);
}
IntParameter lP = new IntParameter(L_ID);
- lP.addConstraint(new GreaterConstraint(0));
- if (config.grab(lP)) {
+ lP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
+ if(config.grab(lP)) {
l = lP.intValue();
}
IntParameter bucketsP = new IntParameter(BUCKETS_ID);
bucketsP.setDefaultValue(7919); // Primes work best, apparently.
- bucketsP.addConstraint(new GreaterConstraint(1));
- if (config.grab(bucketsP)) {
+ bucketsP.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
+ if(config.grab(bucketsP)) {
numberOfBuckets = bucketsP.intValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/index/lsh/hashfamilies/AbstractHashFunctionFamily.java b/src/de/lmu/ifi/dbs/elki/index/lsh/hashfamilies/AbstractHashFunctionFamily.java
index 1ed350f0..1c5502eb 100644
--- a/src/de/lmu/ifi/dbs/elki/index/lsh/hashfamilies/AbstractHashFunctionFamily.java
+++ b/src/de/lmu/ifi/dbs/elki/index/lsh/hashfamilies/AbstractHashFunctionFamily.java
@@ -37,7 +37,7 @@ import de.lmu.ifi.dbs.elki.math.linearalgebra.randomprojections.RandomProjection
import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -89,8 +89,8 @@ public abstract class AbstractHashFunctionFamily implements LocalitySensitiveHas
public ArrayList<? extends LocalitySensitiveHashFunction<? super NumberVector<?>>> generateHashFunctions(Relation<? extends NumberVector<?>> relation, int l) {
int dim = RelationUtil.dimensionality(relation);
ArrayList<LocalitySensitiveHashFunction<? super NumberVector<?>>> ps = new ArrayList<>(l);
- final Random rnd = random.getRandom();
- for (int i = 0; i < l; i++) {
+ final Random rnd = random.getSingleThreadedRandom();
+ for(int i = 0; i < l; i++) {
RandomProjectionFamily.Projection mat = proj.generateProjection(dim, k);
ps.add(new MultipleProjectionsLocalitySensitiveHashFunction(mat, width, rnd));
}
@@ -144,19 +144,19 @@ public abstract class AbstractHashFunctionFamily implements LocalitySensitiveHas
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
RandomParameter randP = new RandomParameter(RANDOM_ID, RandomFactory.DEFAULT);
- if (config.grab(randP)) {
+ if(config.grab(randP)) {
random = randP.getValue();
}
DoubleParameter widthP = new DoubleParameter(WIDTH_ID);
- widthP.addConstraint(new GreaterConstraint(0.0));
- if (config.grab(widthP)) {
+ widthP.addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE);
+ if(config.grab(widthP)) {
width = widthP.doubleValue();
}
IntParameter lP = new IntParameter(NUMPROJ_ID);
- lP.addConstraint(new GreaterConstraint(0));
- if (config.grab(lP)) {
+ lP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
+ if(config.grab(lP)) {
k = lP.intValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/AbstractMaterializeKNNPreprocessor.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/AbstractMaterializeKNNPreprocessor.java
index 570bd660..927678c2 100644
--- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/AbstractMaterializeKNNPreprocessor.java
+++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/AbstractMaterializeKNNPreprocessor.java
@@ -41,7 +41,7 @@ import de.lmu.ifi.dbs.elki.index.KNNIndex;
import de.lmu.ifi.dbs.elki.index.preprocessed.AbstractPreprocessorIndex;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
@@ -280,7 +280,7 @@ public abstract class AbstractMaterializeKNNPreprocessor<O, D extends Distance<D
super.makeOptions(config);
// number of neighbors
final IntParameter kP = new IntParameter(K_ID);
- kP.addConstraint(new GreaterConstraint(1));
+ kP.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
if(config.grab(kP)) {
k = kP.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/MaterializeKNNAndRKNNPreprocessor.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/MaterializeKNNAndRKNNPreprocessor.java
index 06375d51..fba2b168 100644
--- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/MaterializeKNNAndRKNNPreprocessor.java
+++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/MaterializeKNNAndRKNNPreprocessor.java
@@ -204,7 +204,7 @@ public class MaterializeKNNAndRKNNPreprocessor<O, D extends Distance<D>> extends
if(heap == null) {
heap = DBIDUtil.newHeap(oldkNNs);
}
- heap.add(dist, newid);
+ heap.insert(dist, newid);
}
}
// kNNs for oldid have changed:
diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/MaterializeKNNPreprocessor.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/MaterializeKNNPreprocessor.java
index 3f79d748..50ef21ed 100644
--- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/MaterializeKNNPreprocessor.java
+++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/MaterializeKNNPreprocessor.java
@@ -236,7 +236,7 @@ public class MaterializeKNNPreprocessor<O, D extends Distance<D>> extends Abstra
if (heap == null) {
heap = DBIDUtil.newHeap(kNNs);
}
- heap.add(dist, iter2);
+ heap.insert(dist, iter2);
}
}
if (heap != null) {
diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/MetricalIndexApproximationMaterializeKNNPreprocessor.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/MetricalIndexApproximationMaterializeKNNPreprocessor.java
index 1f17e6b4..43798fbc 100644
--- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/MetricalIndexApproximationMaterializeKNNPreprocessor.java
+++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/MetricalIndexApproximationMaterializeKNNPreprocessor.java
@@ -121,12 +121,12 @@ public class MetricalIndexApproximationMaterializeKNNPreprocessor<O extends Numb
D d = cache.remove(key);
if(d != null) {
// consume the previous result.
- kNN.add(d, id2);
+ kNN.insert(d, id2);
}
else {
// compute new and store the previous result.
d = distanceQuery.distance(id, id2);
- kNN.add(d, id2);
+ kNN.insert(d, id2);
// put it into the cache, but with the keys reversed
key = DBIDUtil.newPair(id2, id);
cache.put(key, d);
diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/PartitionApproximationMaterializeKNNPreprocessor.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/PartitionApproximationMaterializeKNNPreprocessor.java
index 1d2a2a1b..b6cc5103 100644
--- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/PartitionApproximationMaterializeKNNPreprocessor.java
+++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/PartitionApproximationMaterializeKNNPreprocessor.java
@@ -27,8 +27,7 @@ import java.util.HashMap;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
-import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter;
+import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDPair;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
@@ -45,7 +44,7 @@ import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
@@ -99,45 +98,31 @@ public class PartitionApproximationMaterializeKNNPreprocessor<O, D extends Dista
DistanceQuery<O, D> distanceQuery = relation.getDatabase().getDistanceQuery(relation, distanceFunction);
storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, KNNList.class);
MeanVariance ksize = new MeanVariance();
- if (LOG.isVerbose()) {
+ if(LOG.isVerbose()) {
LOG.verbose("Approximating nearest neighbor lists to database objects");
}
// Produce a random shuffling of the IDs:
- ArrayModifiableDBIDs aids = DBIDUtil.newArray(relation.getDBIDs());
- DBIDUtil.randomShuffle(aids, rnd);
- int minsize = (int) Math.floor(aids.size() / partitions);
+ ArrayDBIDs[] parts = DBIDUtil.randomSplit(relation.getDBIDs(), partitions, rnd);
FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Processing partitions.", partitions, LOG) : null;
- for (int part = 0; part < partitions; part++) {
- int size = (partitions * minsize + part >= aids.size()) ? minsize : minsize + 1;
- // Collect the ids in this node.
- ArrayModifiableDBIDs ids = DBIDUtil.newArray(size);
- { // TODO: this is a bit overly complicated. The code dates back to when
- // we were not shuffling the array beforehand. Right now, we could just
- // compute the proper partition sizes and split it directly. But
- // ArrayDBIDs does not have a "sublist" function yet, anyway.
- DBIDArrayIter iter = aids.iter();
- // Offset - really cheap on array iterators.
- iter.seek(part);
- // Seek in steps of "partitions". Also just a += instead of ++ op!
- for (; iter.valid(); iter.advance(partitions)) {
- ids.add(iter);
- }
- }
+ for(int part = 0; part < partitions; part++) {
+ final ArrayDBIDs ids = parts[part];
+ final int size = ids.size();
HashMap<DBIDPair, D> cache = new HashMap<>((size * size * 3) >> 3);
- for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
KNNHeap<D> kNN = DBIDUtil.newHeap(distanceFunction.getDistanceFactory(), k);
- for (DBIDIter iter2 = ids.iter(); iter2.valid(); iter2.advance()) {
+ for(DBIDIter iter2 = ids.iter(); iter2.valid(); iter2.advance()) {
DBIDPair key = DBIDUtil.newPair(iter, iter2);
D d = cache.remove(key);
- if (d != null) {
+ if(d != null) {
// consume the previous result.
- kNN.add(d, iter2);
- } else {
+ kNN.insert(d, iter2);
+ }
+ else {
// compute new and store the previous result.
d = distanceQuery.distance(iter, iter2);
- kNN.add(d, iter2);
+ kNN.insert(d, iter2);
// put it into the cache, but with the keys reversed
key = DBIDUtil.newPair(iter2, iter);
cache.put(key, d);
@@ -146,19 +131,19 @@ public class PartitionApproximationMaterializeKNNPreprocessor<O, D extends Dista
ksize.put(kNN.size());
storage.put(iter, kNN.toKNNList());
}
- if (LOG.isDebugging()) {
- if (cache.size() > 0) {
+ if(LOG.isDebugging()) {
+ if(cache.size() > 0) {
LOG.warning("Cache should be empty after each run, but still has " + cache.size() + " elements.");
}
}
- if (progress != null) {
+ if(progress != null) {
progress.incrementProcessed(LOG);
}
}
- if (progress != null) {
+ if(progress != null) {
progress.ensureCompleted(LOG);
}
- if (LOG.isVerbose()) {
+ if(LOG.isVerbose()) {
LOG.verbose("On average, " + ksize.getMean() + " +- " + ksize.getSampleStddev() + " neighbors returned.");
}
}
@@ -265,12 +250,12 @@ public class PartitionApproximationMaterializeKNNPreprocessor<O, D extends Dista
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
final IntParameter partitionsP = new IntParameter(PARTITIONS_ID);
- partitionsP.addConstraint(new GreaterConstraint(1));
- if (config.grab(partitionsP)) {
+ partitionsP.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
+ if(config.grab(partitionsP)) {
partitions = partitionsP.getValue();
}
RandomParameter rndP = new RandomParameter(SEED_ID);
- if (config.grab(rndP)) {
+ if(config.grab(rndP)) {
rnd = rndP.getValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/RandomSampleKNNPreprocessor.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/RandomSampleKNNPreprocessor.java
index cb3f1638..87b5a3c0 100644
--- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/RandomSampleKNNPreprocessor.java
+++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/RandomSampleKNNPreprocessor.java
@@ -40,8 +40,7 @@ import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
@@ -104,22 +103,22 @@ public class RandomSampleKNNPreprocessor<O, D extends Distance<D>> extends Abstr
final ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
final int samplesize = (int) (ids.size() * share);
- for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
KNNHeap<D> kNN = DBIDUtil.newHeap(distanceFunction.getDistanceFactory(), k);
DBIDs rsamp = DBIDUtil.randomSample(ids, samplesize, rnd);
- for (DBIDIter iter2 = rsamp.iter(); iter2.valid(); iter2.advance()) {
+ for(DBIDIter iter2 = rsamp.iter(); iter2.valid(); iter2.advance()) {
D dist = distanceQuery.distance(iter, iter2);
- kNN.add(dist, iter2);
+ kNN.insert(dist, iter2);
}
storage.put(iter, kNN.toKNNList());
- if (progress != null) {
+ if(progress != null) {
progress.incrementProcessed(getLogger());
}
}
- if (progress != null) {
+ if(progress != null) {
progress.ensureCompleted(getLogger());
}
}
@@ -230,13 +229,13 @@ public class RandomSampleKNNPreprocessor<O, D extends Distance<D>> extends Abstr
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
DoubleParameter shareP = new DoubleParameter(SHARE_ID);
- shareP.addConstraint(new GreaterConstraint(0.0));
- shareP.addConstraint(new LessConstraint(1.0));
- if (config.grab(shareP)) {
+ shareP.addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE);
+ shareP.addConstraint(CommonConstraints.LESS_THAN_ONE_DOUBLE);
+ if(config.grab(shareP)) {
share = shareP.getValue();
}
RandomParameter rndP = new RandomParameter(SEED_ID);
- if (config.grab(rndP)) {
+ if(config.grab(rndP)) {
rnd = rndP.getValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/SpatialApproximationMaterializeKNNPreprocessor.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/SpatialApproximationMaterializeKNNPreprocessor.java
index 739dcc7c..83f8f6d8 100644
--- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/SpatialApproximationMaterializeKNNPreprocessor.java
+++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/SpatialApproximationMaterializeKNNPreprocessor.java
@@ -126,12 +126,12 @@ public class SpatialApproximationMaterializeKNNPreprocessor<O extends NumberVect
D d = cache.remove(key);
if(d != null) {
// consume the previous result.
- kNN.add(d, id2);
+ kNN.insert(d, id2);
}
else {
// compute new and store the previous result.
d = distanceQuery.distance(id, id2);
- kNN.add(d, id2);
+ kNN.insert(d, id2);
// put it into the cache, but with the keys reversed
key = DBIDUtil.newPair(id2, id);
cache.put(key, d);
diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/localpca/KNNQueryFilteredPCAIndex.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/localpca/KNNQueryFilteredPCAIndex.java
index e6320833..89c10bf2 100644
--- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/localpca/KNNQueryFilteredPCAIndex.java
+++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/localpca/KNNQueryFilteredPCAIndex.java
@@ -37,7 +37,7 @@ import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAFilteredRunner;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -86,7 +86,7 @@ public class KNNQueryFilteredPCAIndex<NV extends NumberVector<?>> extends Abstra
this.k = k;
// Sanity check:
int dim = RelationUtil.dimensionality(relation);
- if (dim > 0 && k <= dim) {
+ if(dim > 0 && k <= dim) {
LOG.warning("PCA results with k < dim are meaningless. Choose k much larger than the dimensionality.");
}
}
@@ -178,8 +178,8 @@ public class KNNQueryFilteredPCAIndex<NV extends NumberVector<?>> extends Abstra
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
final IntParameter kP = new IntParameter(K_ID);
- kP.addConstraint(new GreaterConstraint(0));
- if (config.grab(kP)) {
+ kP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
+ if(config.grab(kP)) {
k = kP.getValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/DiSHPreferenceVectorIndex.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/DiSHPreferenceVectorIndex.java
index 1d1662a7..99a13a23 100644
--- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/DiSHPreferenceVectorIndex.java
+++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/DiSHPreferenceVectorIndex.java
@@ -63,7 +63,7 @@ import de.lmu.ifi.dbs.elki.utilities.exceptions.UnableToComplyException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.WrongParameterValueException;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleListParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.EnumParameter;
@@ -255,11 +255,11 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?>> extends Abstra
boolean allFalse = true;
for(int d = 0; d < dimensionality; d++) {
if(neighborIDs[d].contains(it)) {
- bits[d] = new Bit(true);
+ bits[d] = Bit.TRUE;
allFalse = false;
}
else {
- bits[d] = new Bit(false);
+ bits[d] = Bit.FALSE;
}
}
if(!allFalse) {
@@ -576,7 +576,7 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?>> extends Abstra
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
final IntParameter minptsP = new IntParameter(MINPTS_ID);
- minptsP.addConstraint(new GreaterConstraint(0));
+ minptsP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
if(config.grab(minptsP)) {
minpts = minptsP.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/HiSCPreferenceVectorIndex.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/HiSCPreferenceVectorIndex.java
index e5dcf2f4..8ead8458 100644
--- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/HiSCPreferenceVectorIndex.java
+++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/HiSCPreferenceVectorIndex.java
@@ -49,8 +49,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.exceptions.ExceptionMessages;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -98,7 +97,7 @@ public class HiSCPreferenceVectorIndex<V extends NumberVector<?>> extends Abstra
@Override
public void initialize() {
- if (relation == null || relation.size() <= 0) {
+ if(relation == null || relation.size() <= 0) {
throw new IllegalArgumentException(ExceptionMessages.DATABASE_EMPTY);
}
@@ -111,8 +110,8 @@ public class HiSCPreferenceVectorIndex<V extends NumberVector<?>> extends Abstra
KNNQuery<V, DoubleDistance> knnQuery = QueryUtil.getKNNQuery(relation, EuclideanDistanceFunction.STATIC, k);
- for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
- if (LOG.isDebugging()) {
+ for(DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
+ if(LOG.isDebugging()) {
msg.append("\n\nid = ").append(DBIDUtil.toString(it));
// /msg.append(" ").append(database.getObjectLabelQuery().get(id));
msg.append("\n knns: ");
@@ -122,21 +121,21 @@ public class HiSCPreferenceVectorIndex<V extends NumberVector<?>> extends Abstra
BitSet preferenceVector = determinePreferenceVector(relation, it, knns, msg);
storage.put(it, preferenceVector);
- if (progress != null) {
+ if(progress != null) {
progress.incrementProcessed(LOG);
}
}
- if (progress != null) {
+ if(progress != null) {
progress.ensureCompleted(LOG);
}
- if (LOG.isDebugging()) {
+ if(LOG.isDebugging()) {
LOG.debugFine(msg.toString());
}
long end = System.currentTimeMillis();
// TODO: re-add timing code!
- if (LOG.isVerbose()) {
+ if(LOG.isVerbose()) {
long elapsedTime = end - start;
LOG.verbose(this.getClass().getName() + " runtime: " + elapsedTime + " milliseconds.");
}
@@ -158,13 +157,13 @@ public class HiSCPreferenceVectorIndex<V extends NumberVector<?>> extends Abstra
// preference vector
BitSet preferenceVector = new BitSet(variances.length);
- for (int d = 0; d < variances.length; d++) {
- if (variances[d] < alpha) {
+ for(int d = 0; d < variances.length; d++) {
+ if(variances[d] < alpha) {
preferenceVector.set(d);
}
}
- if (msg != null && LOG.isDebugging()) {
+ if(msg != null && LOG.isDebugging()) {
msg.append("\nalpha ").append(alpha);
msg.append("\nvariances ");
msg.append(FormatUtil.format(variances, ", ", 4));
@@ -261,9 +260,10 @@ public class HiSCPreferenceVectorIndex<V extends NumberVector<?>> extends Abstra
@Override
public HiSCPreferenceVectorIndex<V> instantiate(Relation<V> relation) {
final int usek;
- if (k == null) {
+ if(k == null) {
usek = 3 * RelationUtil.dimensionality(relation);
- } else {
+ }
+ else {
usek = k;
}
return new HiSCPreferenceVectorIndex<>(relation, alpha, usek);
@@ -291,16 +291,16 @@ public class HiSCPreferenceVectorIndex<V extends NumberVector<?>> extends Abstra
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
final DoubleParameter alphaP = new DoubleParameter(ALPHA_ID, DEFAULT_ALPHA);
- alphaP.addConstraint(new GreaterConstraint(0.0));
- alphaP.addConstraint(new LessConstraint(1.0));
- if (config.grab(alphaP)) {
+ alphaP.addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE);
+ alphaP.addConstraint(CommonConstraints.LESS_THAN_ONE_DOUBLE);
+ if(config.grab(alphaP)) {
alpha = alphaP.doubleValue();
}
final IntParameter kP = new IntParameter(K_ID);
- kP.addConstraint(new GreaterConstraint(0));
+ kP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
kP.setOptional(true);
- if (config.grab(kP)) {
+ if(config.grab(kP)) {
k = kP.intValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/snn/SharedNearestNeighborPreprocessor.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/snn/SharedNearestNeighborPreprocessor.java
index 9b1aee0f..650e2169 100644
--- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/snn/SharedNearestNeighborPreprocessor.java
+++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/snn/SharedNearestNeighborPreprocessor.java
@@ -47,7 +47,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.Parameterizable;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
@@ -114,7 +114,7 @@ public class SharedNearestNeighborPreprocessor<O, D extends Distance<D>> extends
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
ArrayModifiableDBIDs neighbors = DBIDUtil.newArray(numberOfNeighbors);
KNNList<D> kNN = knnquery.getKNNForDBID(iditer, numberOfNeighbors);
- for (DBIDIter iter = kNN.iter(); iter.valid(); iter.advance()) {
+ for(DBIDIter iter = kNN.iter(); iter.valid(); iter.advance()) {
// if(!id.equals(nid)) {
neighbors.add(iter);
// }
@@ -248,7 +248,7 @@ public class SharedNearestNeighborPreprocessor<O, D extends Distance<D>> extends
public TypeInformation getInputTypeRestriction() {
return distanceFunction.getInputTypeRestriction();
}
-
+
/**
* Parameterization class.
*
@@ -271,7 +271,7 @@ public class SharedNearestNeighborPreprocessor<O, D extends Distance<D>> extends
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
final IntParameter numberOfNeighborsP = new IntParameter(NUMBER_OF_NEIGHBORS_ID);
- numberOfNeighborsP.addConstraint(new GreaterEqualConstraint(1));
+ numberOfNeighborsP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
if(config.grab(numberOfNeighborsP)) {
numberOfNeighbors = numberOfNeighborsP.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/subspaceproj/AbstractSubspaceProjectionIndex.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/subspaceproj/AbstractSubspaceProjectionIndex.java
index 1bad7db0..a219444e 100644
--- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/subspaceproj/AbstractSubspaceProjectionIndex.java
+++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/subspaceproj/AbstractSubspaceProjectionIndex.java
@@ -48,7 +48,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.exceptions.ExceptionMessages;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.Parameterizable;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DistanceParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -84,7 +84,7 @@ public abstract class AbstractSubspaceProjectionIndex<NV extends NumberVector<?>
/**
* Constructor.
- *
+ *
* @param relation Relation
* @param epsilon Maximum Epsilon
* @param rangeQueryDistanceFunction range query
@@ -262,7 +262,7 @@ public abstract class AbstractSubspaceProjectionIndex<NV extends NumberVector<?>
protected void configMinPts(Parameterization config) {
IntParameter minptsP = new IntParameter(AbstractProjectedDBSCAN.MINPTS_ID);
- minptsP.addConstraint(new GreaterConstraint(0));
+ minptsP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
if(config.grab(minptsP)) {
minpts = minptsP.intValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/subspaceproj/FourCSubspaceIndex.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/subspaceproj/FourCSubspaceIndex.java
index a6e59f3f..35aaa40c 100644
--- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/subspaceproj/FourCSubspaceIndex.java
+++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/subspaceproj/FourCSubspaceIndex.java
@@ -44,9 +44,8 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.ParameterException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.WrongParameterValueException;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GlobalParameterConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessEqualConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.ParameterConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.ParameterFlagGlobalConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization;
@@ -95,12 +94,12 @@ public class FourCSubspaceIndex<V extends NumberVector<?>, D extends Distance<D>
@Override
protected PCAFilteredResult computeProjection(DBIDRef id, DistanceDBIDList<D> neighbors, Relation<V> database) {
ModifiableDBIDs ids = DBIDUtil.newArray(neighbors.size());
- for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ for(DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
ids.add(neighbor);
}
PCAFilteredResult pcares = pca.processIds(ids, database);
- if (LOG.isDebugging()) {
+ if(LOG.isDebugging()) {
StringBuilder msg = new StringBuilder();
msg.append(id).append(' '); // .append(database.getObjectLabelQuery().get(id));
msg.append("\ncorrDim ").append(pcares.getCorrelationDimension());
@@ -188,19 +187,19 @@ public class FourCSubspaceIndex<V extends NumberVector<?>, D extends Distance<D>
// flag absolute
boolean absolute = false;
Flag absoluteF = new Flag(LimitEigenPairFilter.EIGENPAIR_FILTER_ABSOLUTE);
- if (config.grab(absoluteF)) {
+ if(config.grab(absoluteF)) {
absolute = absoluteF.isTrue();
}
// Parameter delta
double delta = 0.0;
DoubleParameter deltaP = new DoubleParameter(LimitEigenPairFilter.EIGENPAIR_FILTER_DELTA, DEFAULT_DELTA);
- deltaP.addConstraint(new GreaterEqualConstraint(0));
- if (config.grab(deltaP)) {
+ deltaP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE);
+ if(config.grab(deltaP)) {
delta = deltaP.doubleValue();
}
// Absolute flag doesn't have a sensible default value for delta.
- if (absolute && deltaP.tookDefaultValue()) {
+ if(absolute && deltaP.tookDefaultValue()) {
config.reportError(new WrongParameterValueException("Illegal parameter setting: " + "Flag " + absoluteF.getName() + " is set, " + "but no value for " + deltaP.getName() + " is specified."));
}
@@ -226,7 +225,7 @@ public class FourCSubspaceIndex<V extends NumberVector<?>, D extends Distance<D>
// eigen pair filter
pcaParameters.addParameter(PCAFilteredRunner.PCA_EIGENPAIR_FILTER, LimitEigenPairFilter.class.getName());
// abs
- if (absolute) {
+ if(absolute) {
pcaParameters.addFlag(LimitEigenPairFilter.EIGENPAIR_FILTER_ABSOLUTE);
}
// delta
@@ -237,7 +236,7 @@ public class FourCSubspaceIndex<V extends NumberVector<?>, D extends Distance<D>
pcaParameters.addParameter(PCAFilteredRunner.SMALL_ID, 1);
Class<PCAFilteredRunner<V>> cls = ClassGenericsUtil.uglyCastIntoSubclass(PCAFilteredRunner.class);
pca = pcaParameters.tryInstantiate(cls);
- for (ParameterException e : pcaParameters.getErrors()) {
+ for(ParameterException e : pcaParameters.getErrors()) {
LoggingUtil.warning("Error in internal parameterization: " + e.getMessage());
}
@@ -245,8 +244,8 @@ public class FourCSubspaceIndex<V extends NumberVector<?>, D extends Distance<D>
// TODO: this constraint is already set in the parameter itself, since
// it
// also applies to the relative case, right? -- erich
- // deltaCons.add(new GreaterEqualConstraint(0));
- deltaCons.add(new LessEqualConstraint(1));
+ // deltaCons.add(CommonConstraints.NONNEGATIVE_DOUBLE);
+ deltaCons.add(CommonConstraints.LESS_EQUAL_ONE_DOUBLE);
GlobalParameterConstraint gpc = new ParameterFlagGlobalConstraint<>(deltaP, deltaCons, absoluteF, false);
config.checkConstraint(gpc);
diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/subspaceproj/PreDeConSubspaceIndex.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/subspaceproj/PreDeConSubspaceIndex.java
index 69882c50..ca322224 100644
--- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/subspaceproj/PreDeConSubspaceIndex.java
+++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/subspaceproj/PreDeConSubspaceIndex.java
@@ -37,8 +37,7 @@ import de.lmu.ifi.dbs.elki.utilities.FormatUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
@@ -92,20 +91,20 @@ public class PreDeConSubspaceIndex<V extends NumberVector<?>, D extends Distance
int referenceSetSize = neighbors.size();
V obj = database.get(id);
- if (getLogger().isDebugging()) {
+ if(getLogger().isDebugging()) {
msg = new StringBuilder();
msg.append("referenceSetSize = ").append(referenceSetSize);
msg.append("\ndelta = ").append(delta);
}
- if (referenceSetSize == 0) {
+ if(referenceSetSize == 0) {
throw new RuntimeException("Reference Set Size = 0. This should never happen!");
}
// prepare similarity matrix
int dim = obj.getDimensionality();
Matrix simMatrix = new Matrix(dim, dim, 0);
- for (int i = 0; i < dim; i++) {
+ for(int i = 0; i < dim; i++) {
simMatrix.set(i, i, 1);
}
@@ -114,35 +113,37 @@ public class PreDeConSubspaceIndex<V extends NumberVector<?>, D extends Distance
// start variance analysis
double[] sum = new double[dim];
- for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ for(DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
V o = database.get(neighbor);
- for (int d = 0; d < dim; d++) {
- sum[d] += Math.pow(obj.doubleValue(d) - o.doubleValue(d), 2.0);
+ for(int d = 0; d < dim; d++) {
+ final double diff = obj.doubleValue(d) - o.doubleValue(d);
+ sum[d] += diff * diff;
}
}
- for (int d = 0; d < dim; d++) {
- if (Math.sqrt(sum[d]) / referenceSetSize <= delta) {
- if (msg != null) {
+ for(int d = 0; d < dim; d++) {
+ if(Math.sqrt(sum[d]) / referenceSetSize <= delta) {
+ if(msg != null) {
msg.append("\nsum[").append(d).append("]= ").append(sum[d]);
msg.append("\n Math.sqrt(sum[d]) / referenceSetSize)= ").append(Math.sqrt(sum[d]) / referenceSetSize);
}
// projDim++;
simMatrix.set(d, d, kappa);
- } else {
+ }
+ else {
// bug in paper?
projDim++;
}
}
- if (projDim == 0) {
- if (msg != null) {
+ if(projDim == 0) {
+ if(msg != null) {
// msg.append("\nprojDim == 0!");
}
projDim = dim;
}
- if (msg != null) {
+ if(msg != null) {
msg.append("\nprojDim ");
// .append(database.getObjectLabelQuery().get(id));
msg.append(": ").append(projDim);
@@ -237,9 +238,9 @@ public class PreDeConSubspaceIndex<V extends NumberVector<?>, D extends Distance
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
DoubleParameter deltaP = new DoubleParameter(DELTA_ID, DEFAULT_DELTA);
- deltaP.addConstraint(new GreaterConstraint(0.0));
- deltaP.addConstraint(new LessConstraint(1.0));
- if (config.grab(deltaP)) {
+ deltaP.addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE);
+ deltaP.addConstraint(CommonConstraints.LESS_THAN_ONE_DOUBLE);
+ if(config.grab(deltaP)) {
delta = deltaP.doubleValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/index/projected/PINN.java b/src/de/lmu/ifi/dbs/elki/index/projected/PINN.java
index bfb18fa8..b3e617b8 100644
--- a/src/de/lmu/ifi/dbs/elki/index/projected/PINN.java
+++ b/src/de/lmu/ifi/dbs/elki/index/projected/PINN.java
@@ -31,7 +31,7 @@ import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -52,7 +52,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
*
* @author Erich Schubert
*
- * @apiviz.composedOf AchlioptasRandomProjection
+ * @apiviz.composedOf AchlioptasRandomProjectionFamily
*
* @param <O> Object type
*/
@@ -130,30 +130,30 @@ public class PINN<O extends NumberVector<?>> extends ProjectedIndex.Factory<O, O
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
ObjectParameter<IndexFactory<O, ?>> innerP = new ObjectParameter<>(ProjectedIndex.Factory.Parameterizer.INDEX_ID, IndexFactory.class);
- if (config.grab(innerP)) {
+ if(config.grab(innerP)) {
inner = innerP.instantiateClass(config);
}
IntParameter tP = new IntParameter(T_ID);
- tP.addConstraint(new GreaterEqualConstraint(1));
- if (config.grab(tP)) {
+ tP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
+ if(config.grab(tP)) {
t = tP.intValue();
}
DoubleParameter sP = new DoubleParameter(S_ID, 1.);
- sP.addConstraint(new GreaterEqualConstraint(1.));
- if (config.grab(sP)) {
+ sP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_DOUBLE);
+ if(config.grab(sP)) {
s = sP.doubleValue();
}
DoubleParameter hP = new DoubleParameter(H_ID, 3.);
- hP.addConstraint(new GreaterEqualConstraint(1.));
- if (config.grab(hP)) {
+ hP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_DOUBLE);
+ if(config.grab(hP)) {
h = hP.doubleValue();
}
RandomParameter randomP = new RandomParameter(RANDOM_ID);
- if (config.grab(randomP)) {
+ if(config.grab(randomP)) {
random = randomP.getValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/index/projected/ProjectedIndex.java b/src/de/lmu/ifi/dbs/elki/index/projected/ProjectedIndex.java
index d5dc86f9..f71ced70 100644
--- a/src/de/lmu/ifi/dbs/elki/index/projected/ProjectedIndex.java
+++ b/src/de/lmu/ifi/dbs/elki/index/projected/ProjectedIndex.java
@@ -47,6 +47,7 @@ import de.lmu.ifi.dbs.elki.database.ids.generic.GenericDistanceDBIDList;
import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.query.range.AbstractDistanceRangeQuery;
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
import de.lmu.ifi.dbs.elki.database.query.rknn.RKNNQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
@@ -65,7 +66,7 @@ import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.statistics.Counter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag;
@@ -306,9 +307,9 @@ public class ProjectedIndex<O, I> implements KNNIndex<O>, RKNNIndex<O>, RangeInd
KNNList<D> ilist = inner.getKNNForObject(pobj, (int) Math.ceil(k * kmulti));
if(distq.getDistanceFunction() instanceof PrimitiveDoubleDistanceFunction) {
PrimitiveDoubleDistanceFunction<? super O> df = (PrimitiveDoubleDistanceFunction<? super O>) distq.getDistanceFunction();
- DoubleDistanceKNNHeap heap = (DoubleDistanceKNNHeap) DBIDUtil.newHeap(DoubleDistance.FACTORY, k);
+ DoubleDistanceKNNHeap heap = DBIDUtil.newDoubleDistanceHeap(k);
for(DistanceDBIDListIter<D> iter = ilist.iter(); iter.valid(); iter.advance()) {
- heap.add(df.doubleDistance(obj, distq.getRelation().get(iter)), iter);
+ heap.insert(df.doubleDistance(obj, distq.getRelation().get(iter)), iter);
countRefinement();
}
return (KNNList<D>) heap.toKNNList();
@@ -316,7 +317,7 @@ public class ProjectedIndex<O, I> implements KNNIndex<O>, RKNNIndex<O>, RangeInd
else {
KNNHeap<D> heap = DBIDUtil.newHeap(distq.getDistanceFactory(), k);
for(DistanceDBIDListIter<D> iter = ilist.iter(); iter.valid(); iter.advance()) {
- heap.add(distq.distance(obj, iter), iter);
+ heap.insert(distq.distance(obj, iter), iter);
countRefinement();
}
return heap.toKNNList();
@@ -331,26 +332,20 @@ public class ProjectedIndex<O, I> implements KNNIndex<O>, RKNNIndex<O>, RangeInd
*
* @param <D> Distance type
*/
- class ProjectedRangeQuery<D extends Distance<D>> implements RangeQuery<O, D> {
+ class ProjectedRangeQuery<D extends Distance<D>> extends AbstractDistanceRangeQuery<O, D> {
/**
* Inner range query.
*/
RangeQuery<I, D> inner;
/**
- * Distance query for refinement.
- */
- DistanceQuery<O, D> distq;
-
- /**
* Constructor.
*
* @param inner Inner range query.
*/
public ProjectedRangeQuery(DistanceQuery<O, D> distanceQuery, RangeQuery<I, D> inner) {
- super();
+ super(distanceQuery);
this.inner = inner;
- this.distq = distanceQuery;
}
@Override
@@ -367,12 +362,12 @@ public class ProjectedIndex<O, I> implements KNNIndex<O>, RKNNIndex<O>, RangeInd
if(norefine) {
return ilist;
}
- if(distq.getDistanceFunction() instanceof PrimitiveDoubleDistanceFunction) {
- PrimitiveDoubleDistanceFunction<? super O> df = (PrimitiveDoubleDistanceFunction<? super O>) distq.getDistanceFunction();
+ if(distanceQuery.getDistanceFunction() instanceof PrimitiveDoubleDistanceFunction) {
+ PrimitiveDoubleDistanceFunction<? super O> df = (PrimitiveDoubleDistanceFunction<? super O>) distanceQuery.getDistanceFunction();
double drange = ((DoubleDistance) range).doubleValue();
ModifiableDoubleDistanceDBIDList olist = new DoubleDistanceDBIDPairList(ilist.size());
for(DistanceDBIDListIter<D> iter = ilist.iter(); iter.valid(); iter.advance()) {
- final double dist = df.doubleDistance(obj, distq.getRelation().get(iter));
+ final double dist = df.doubleDistance(obj, distanceQuery.getRelation().get(iter));
countRefinement();
if(dist <= drange) {
olist.add(dist, iter);
@@ -383,7 +378,7 @@ public class ProjectedIndex<O, I> implements KNNIndex<O>, RKNNIndex<O>, RangeInd
else {
ModifiableDistanceDBIDList<D> olist = new GenericDistanceDBIDList<>(ilist.size());
for(DistanceDBIDListIter<D> iter = ilist.iter(); iter.valid(); iter.advance()) {
- D dist = distq.distance(obj, iter);
+ D dist = distanceQuery.distance(obj, iter);
countRefinement();
if(range.compareTo(dist) <= 0) {
olist.add(dist, iter);
@@ -632,7 +627,7 @@ public class ProjectedIndex<O, I> implements KNNIndex<O>, RKNNIndex<O>, RangeInd
if(!norefine) {
DoubleParameter kmultP = new DoubleParameter(K_MULTIPLIER_ID);
kmultP.setDefaultValue(1.0);
- kmultP.addConstraint(new GreaterEqualConstraint(1.0));
+ kmultP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_DOUBLE);
if(config.grab(kmultP)) {
kmulti = kmultP.doubleValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/AbstractMkTreeUnifiedFactory.java b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/AbstractMkTreeUnifiedFactory.java
index fe408035..996b2438 100644
--- a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/AbstractMkTreeUnifiedFactory.java
+++ b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/AbstractMkTreeUnifiedFactory.java
@@ -30,7 +30,7 @@ import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.AbstractMTreeNode;
import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.MTreeEntry;
import de.lmu.ifi.dbs.elki.persistent.PageFileFactory;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -80,9 +80,9 @@ public abstract class AbstractMkTreeUnifiedFactory<O, D extends NumberDistance<D
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
IntParameter k_maxP = new IntParameter(K_MAX_ID);
- k_maxP.addConstraint(new GreaterConstraint(0));
+ k_maxP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
- if (config.grab(k_maxP)) {
+ if(config.grab(k_maxP)) {
settings.k_max = k_maxP.getValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkapp/MkAppTreeFactory.java b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkapp/MkAppTreeFactory.java
index 45a2e85f..1d4b7fe4 100644
--- a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkapp/MkAppTreeFactory.java
+++ b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkapp/MkAppTreeFactory.java
@@ -30,7 +30,7 @@ import de.lmu.ifi.dbs.elki.persistent.PageFile;
import de.lmu.ifi.dbs.elki.persistent.PageFileFactory;
import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -94,19 +94,19 @@ public class MkAppTreeFactory<O, D extends NumberDistance<D, ?>> extends Abstrac
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
IntParameter kP = new IntParameter(K_ID);
- kP.addConstraint(new GreaterConstraint(0));
- if (config.grab(kP)) {
+ kP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
+ if(config.grab(kP)) {
settings.k_max = kP.getValue();
}
IntParameter pP = new IntParameter(P_ID);
- pP.addConstraint(new GreaterConstraint(0));
- if (config.grab(pP)) {
+ pP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
+ if(config.grab(pP)) {
settings.p = pP.getValue();
}
Flag nologF = new Flag(NOLOG_ID);
- if (config.grab(nologF)) {
+ if(config.grab(nologF)) {
settings.log = !nologF.getValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkapp/PolynomialApproximation.java b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkapp/PolynomialApproximation.java
index dc28b7c3..f156c607 100644
--- a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkapp/PolynomialApproximation.java
+++ b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkapp/PolynomialApproximation.java
@@ -28,6 +28,7 @@ import java.io.IOException;
import java.io.ObjectInput;
import java.io.ObjectOutput;
+import de.lmu.ifi.dbs.elki.math.MathUtil;
import de.lmu.ifi.dbs.elki.utilities.FormatUtil;
/**
@@ -91,18 +92,19 @@ public class PolynomialApproximation implements Externalizable {
}
/**
- * Returns the function value of the polynoial approximation
+ * Returns the function value of the polynomial approximation
* at the specified k.
*
- * @param k the value for which the polynoial approximation should be returned
- * @return the function value of the polynoial approximation
+ * @param k the value for which the polynomial approximation should be returned
+ * @return the function value of the polynomial approximation
* at the specified k
*/
public double getValueAt(int k) {
- double result = 0;
- double log_k = Math.log(k);
+ double result = 0.;
+ double log_k = Math.log(k), acc = 1.;
for (int p = 0; p < b.length; p++) {
- result += b[p] * Math.pow(log_k, p);
+ result += b[p] * acc;
+ acc *= log_k;
}
return result;
}
diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkcop/MkCopTreeFactory.java b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkcop/MkCopTreeFactory.java
index 6a39e92f..fa7afbe2 100644
--- a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkcop/MkCopTreeFactory.java
+++ b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkcop/MkCopTreeFactory.java
@@ -31,7 +31,7 @@ import de.lmu.ifi.dbs.elki.persistent.PageFile;
import de.lmu.ifi.dbs.elki.persistent.PageFileFactory;
import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -84,8 +84,8 @@ public class MkCopTreeFactory<O, D extends NumberDistance<D, ?>> extends Abstrac
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
IntParameter k_maxP = new IntParameter(K_ID);
- k_maxP.addConstraint(new GreaterConstraint(0));
- if (config.grab(k_maxP)) {
+ k_maxP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
+ if(config.grab(k_maxP)) {
settings.k_max = k_maxP.intValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkmax/MkMaxTree.java b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkmax/MkMaxTree.java
index c1f87f20..e3e4f71f 100644
--- a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkmax/MkMaxTree.java
+++ b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkmax/MkMaxTree.java
@@ -224,12 +224,11 @@ public class MkMaxTree<O, D extends NumberDistance<D, ?>> extends AbstractMkTree
// p is nearer to q than the farthest kNN-candidate of q
// ==> p becomes a knn-candidate
if (dist_pq.doubleValue() <= knnDist_q) {
- knns_q.add(dist_pq, p.getRoutingObjectID());
+ knns_q.insert(dist_pq, p.getRoutingObjectID());
if (knns_q.size() >= getKmax()) {
knnDist_q = knns_q.getKNNDistance().doubleValue();
q.setKnnDistance(knnDist_q);
}
-
}
// p is nearer to q than to its farthest knn-candidate
// q becomes knn of p
diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/query/DoubleDistanceMetricalIndexKNNQuery.java b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/query/DoubleDistanceMetricalIndexKNNQuery.java
index 9eb72178..b55b9fd0 100644
--- a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/query/DoubleDistanceMetricalIndexKNNQuery.java
+++ b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/query/DoubleDistanceMetricalIndexKNNQuery.java
@@ -78,7 +78,7 @@ public class DoubleDistanceMetricalIndexKNNQuery<O> extends AbstractDistanceKNNQ
}
index.statistics.countKNNQuery();
- DoubleDistanceKNNHeap knnList = (DoubleDistanceKNNHeap) DBIDUtil.newHeap(DoubleDistance.FACTORY, k);
+ DoubleDistanceKNNHeap knnList = DBIDUtil.newDoubleDistanceHeap(k);
double d_k = Double.POSITIVE_INFINITY;
final ComparableMinHeap<DoubleMTreeDistanceSearchCandidate> pq = new ComparableMinHeap<>();
@@ -131,7 +131,7 @@ public class DoubleDistanceMetricalIndexKNNQuery<O> extends AbstractDistanceKNNQ
double d3 = distf.doubleDistance(o_i, q);
index.statistics.countDistanceCalculation();
if (d3 <= d_k) {
- knnList.add(d3, id_i);
+ knnList.insert(d3, id_i);
d_k = knnList.doubleKNNDistance();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/query/MetricalIndexKNNQuery.java b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/query/MetricalIndexKNNQuery.java
index f40e001b..f21bac82 100644
--- a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/query/MetricalIndexKNNQuery.java
+++ b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/query/MetricalIndexKNNQuery.java
@@ -127,7 +127,7 @@ public class MetricalIndexKNNQuery<O, D extends NumberDistance<D, ?>> extends Ab
D d3 = distanceQuery.distance(o_j, q);
index.statistics.countDistanceCalculation();
if (d3.compareTo(d_k) <= 0) {
- knnList.add(d3, o_j);
+ knnList.insert(d3, o_j);
d_k = knnList.getKNNDistance();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/strategies/split/RandomSplit.java b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/strategies/split/RandomSplit.java
index faf2acc2..ca54f51a 100644
--- a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/strategies/split/RandomSplit.java
+++ b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/strategies/split/RandomSplit.java
@@ -71,7 +71,7 @@ public class RandomSplit<O, D extends NumberDistance<D, ?>, N extends AbstractMT
*/
public RandomSplit(RandomFactory rnd) {
super();
- this.random = rnd.getRandom();
+ this.random = rnd.getSingleThreadedRandom();
}
/**
diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/kd/MinimalisticMemoryKDTree.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/kd/MinimalisticMemoryKDTree.java
index 28f19a25..ce1da63c 100644
--- a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/kd/MinimalisticMemoryKDTree.java
+++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/kd/MinimalisticMemoryKDTree.java
@@ -245,7 +245,7 @@ public class MinimalisticMemoryKDTree<O extends NumberVector<?>> extends Abstrac
@Override
public KNNList<DoubleDistance> getKNNForObject(O obj, int k) {
- final DoubleDistanceKNNHeap knns = (DoubleDistanceKNNHeap) DBIDUtil.newHeap(DoubleDistance.FACTORY, k);
+ final DoubleDistanceKNNHeap knns = DBIDUtil.newDoubleDistanceHeap(k);
kdKNNSearch(0, sorted.size(), 0, obj, knns, sorted.iter(), Double.POSITIVE_INFINITY);
return knns.toKNNList();
}
@@ -284,7 +284,7 @@ public class MinimalisticMemoryKDTree<O extends NumberVector<?>> extends Abstrac
countDistanceComputation();
if(dist <= maxdist) {
iter.seek(middle);
- knns.add(dist, iter);
+ knns.insert(dist, iter);
maxdist = knns.doubleKNNDistance();
}
if(left < middle) {
@@ -305,7 +305,7 @@ public class MinimalisticMemoryKDTree<O extends NumberVector<?>> extends Abstrac
countDistanceComputation();
if(dist <= maxdist) {
iter.seek(middle);
- knns.add(dist, iter);
+ knns.insert(dist, iter);
maxdist = knns.doubleKNNDistance();
}
}
@@ -323,7 +323,7 @@ public class MinimalisticMemoryKDTree<O extends NumberVector<?>> extends Abstrac
countDistanceComputation();
if(dist <= maxdist) {
iter.seek(middle);
- knns.add(dist, iter);
+ knns.insert(dist, iter);
maxdist = knns.doubleKNNDistance();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/AbstractRStarTreeFactory.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/AbstractRStarTreeFactory.java
index 48b84302..70e9f747 100644
--- a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/AbstractRStarTreeFactory.java
+++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/AbstractRStarTreeFactory.java
@@ -38,8 +38,7 @@ import de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.strategies.split.Spl
import de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.strategies.split.TopologicalSplitter;
import de.lmu.ifi.dbs.elki.persistent.PageFileFactory;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
@@ -119,9 +118,9 @@ public abstract class AbstractRStarTreeFactory<O extends NumberVector<?>, N exte
* Tree settings
*/
protected S settings;
-
+
/**
- * Create the settings object
+ * Create the settings object
*
* @return Settings instance.
*/
@@ -132,21 +131,21 @@ public abstract class AbstractRStarTreeFactory<O extends NumberVector<?>, N exte
super.makeOptions(config);
settings = createSettings();
ObjectParameter<InsertionStrategy> insertionStrategyP = new ObjectParameter<>(INSERTION_STRATEGY_ID, InsertionStrategy.class, CombinedInsertionStrategy.class);
- if (config.grab(insertionStrategyP)) {
+ if(config.grab(insertionStrategyP)) {
settings.insertionStrategy = insertionStrategyP.instantiateClass(config);
}
ObjectParameter<SplitStrategy> splitStrategyP = new ObjectParameter<>(SPLIT_STRATEGY_ID, SplitStrategy.class, TopologicalSplitter.class);
- if (config.grab(splitStrategyP)) {
+ if(config.grab(splitStrategyP)) {
settings.nodeSplitter = splitStrategyP.instantiateClass(config);
}
DoubleParameter minimumFillP = new DoubleParameter(MINIMUM_FILL_ID, 0.4);
- minimumFillP.addConstraint(new GreaterConstraint(0.0));
- minimumFillP.addConstraint(new LessConstraint(0.5));
- if (config.grab(minimumFillP)) {
+ minimumFillP.addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE);
+ minimumFillP.addConstraint(CommonConstraints.LESS_THAN_HALF_DOUBLE);
+ if(config.grab(minimumFillP)) {
settings.relativeMinFill = minimumFillP.getValue();
}
ObjectParameter<OverflowTreatment> overflowP = new ObjectParameter<>(OVERFLOW_STRATEGY_ID, OverflowTreatment.class, LimitedReinsertOverflowTreatment.class);
- if (config.grab(overflowP)) {
+ if(config.grab(overflowP)) {
settings.setOverflowTreatment(overflowP.instantiateClass(config));
}
configBulkLoad(config);
@@ -159,7 +158,7 @@ public abstract class AbstractRStarTreeFactory<O extends NumberVector<?>, N exte
*/
protected void configBulkLoad(Parameterization config) {
ObjectParameter<BulkSplit> bulkSplitP = new ObjectParameter<>(BULK_SPLIT_ID, BulkSplit.class, true);
- if (config.grab(bulkSplitP)) {
+ if(config.grab(bulkSplitP)) {
settings.bulkSplitter = bulkSplitP.instantiateClass(config);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/query/DoubleDistanceRStarTreeKNNQuery.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/query/DoubleDistanceRStarTreeKNNQuery.java
index b7769d45..472e4b57 100644
--- a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/query/DoubleDistanceRStarTreeKNNQuery.java
+++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/query/DoubleDistanceRStarTreeKNNQuery.java
@@ -39,6 +39,7 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNHeap;
import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList;
+import de.lmu.ifi.dbs.elki.database.ids.integer.DoubleDistanceIntegerDBIDKNNHeap;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.AbstractDistanceKNNQuery;
import de.lmu.ifi.dbs.elki.distance.distancefunction.SpatialPrimitiveDoubleDistanceFunction;
@@ -46,7 +47,9 @@ import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.index.tree.DirectoryEntry;
import de.lmu.ifi.dbs.elki.index.tree.LeafEntry;
import de.lmu.ifi.dbs.elki.index.tree.query.DoubleDistanceSearchCandidate;
+import de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialDirectoryEntry;
import de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialEntry;
+import de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialPointLeafEntry;
import de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.AbstractRStarTree;
import de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.AbstractRStarTreeNode;
import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ComparableMinHeap;
@@ -92,31 +95,29 @@ public class DoubleDistanceRStarTreeKNNQuery<O extends SpatialComparable> extend
this.distanceFunction = distanceFunction;
}
- /**
- * Performs a k-nearest neighbor query for the given NumberVector with the
- * given parameter k and the according distance function. The query result is
- * in ascending order to the distance to the query object.
- *
- * @param object the query object
- * @param knnList the knn list containing the result
- */
- protected void doKNNQuery(O object, DoubleDistanceKNNHeap knnList) {
- final ComparableMinHeap<DoubleDistanceSearchCandidate> pq = new ComparableMinHeap<>(Math.min(knnList.getK() << 1, 21));
+ @Override
+ public DoubleDistanceKNNList getKNNForObject(O obj, int k) {
+ if(k < 1) {
+ throw new IllegalArgumentException("At least one neighbor has to be requested!");
+ }
tree.statistics.countKNNQuery();
- // push root
- pq.add(new DoubleDistanceSearchCandidate(0.0, tree.getRootID()));
- double maxDist = Double.MAX_VALUE;
+ final DoubleDistanceKNNHeap knnList = new DoubleDistanceIntegerDBIDKNNHeap(k);
+ final ComparableMinHeap<DoubleDistanceSearchCandidate> pq = new ComparableMinHeap<>(Math.min(knnList.getK() << 1, 21));
+
+ // expand root
+ double maxDist = expandNode(obj, knnList, pq, Double.MAX_VALUE, tree.getRootID());
// search in tree
while(!pq.isEmpty()) {
DoubleDistanceSearchCandidate pqNode = pq.poll();
if(pqNode.mindist > maxDist) {
- return;
+ break;
}
- maxDist = expandNode(object, knnList, pq, maxDist, pqNode.nodeID);
+ maxDist = expandNode(obj, knnList, pq, maxDist, pqNode.nodeID);
}
+ return knnList.toKNNList();
}
private double expandNode(O object, DoubleDistanceKNNHeap knnList, final ComparableMinHeap<DoubleDistanceSearchCandidate> pq, double maxDist, final int nodeID) {
@@ -124,28 +125,27 @@ public class DoubleDistanceRStarTreeKNNQuery<O extends SpatialComparable> extend
// data node
if(node.isLeaf()) {
for(int i = 0; i < node.getNumEntries(); i++) {
- SpatialEntry entry = node.getEntry(i);
+ SpatialPointLeafEntry entry = (SpatialPointLeafEntry) node.getEntry(i);
double distance = distanceFunction.doubleMinDist(entry, object);
tree.statistics.countDistanceCalculation();
if(distance <= maxDist) {
- knnList.add(distance, ((LeafEntry) entry).getDBID());
- maxDist = knnList.doubleKNNDistance();
+ maxDist = knnList.insert(distance, entry.getDBID());
}
}
}
// directory node
else {
for(int i = 0; i < node.getNumEntries(); i++) {
- SpatialEntry entry = node.getEntry(i);
+ SpatialDirectoryEntry entry = (SpatialDirectoryEntry) node.getEntry(i);
double distance = distanceFunction.doubleMinDist(entry, object);
tree.statistics.countDistanceCalculation();
// Greedy expand, bypassing the queue
if(distance <= 0) {
- expandNode(object, knnList, pq, maxDist, ((DirectoryEntry) entry).getPageID());
+ expandNode(object, knnList, pq, maxDist, entry.getPageID());
}
else {
if(distance <= maxDist) {
- pq.add(new DoubleDistanceSearchCandidate(distance, ((DirectoryEntry) entry).getPageID()));
+ pq.add(new DoubleDistanceSearchCandidate(distance, entry.getPageID()));
}
}
}
@@ -174,7 +174,7 @@ public class DoubleDistanceRStarTreeKNNQuery<O extends SpatialComparable> extend
double dist_pq = distanceFunction.doubleDistance(relation.get(pid), relation.get(q));
tree.statistics.countDistanceCalculation();
if(dist_pq <= knn_q_maxDist) {
- knns_q.add(dist_pq, pid);
+ knns_q.insert(dist_pq, pid);
}
}
}
@@ -264,27 +264,17 @@ public class DoubleDistanceRStarTreeKNNQuery<O extends SpatialComparable> extend
}
@Override
- public DoubleDistanceKNNList getKNNForObject(O obj, int k) {
- if(k < 1) {
- throw new IllegalArgumentException("At least one enumeration has to be requested!");
- }
-
- final DoubleDistanceKNNHeap knnList = (DoubleDistanceKNNHeap) DBIDUtil.newHeap(DoubleDistance.FACTORY, k);
- doKNNQuery(obj, knnList);
- return knnList.toKNNList();
- }
-
- @Override
public List<DoubleDistanceKNNList> getKNNForBulkDBIDs(ArrayDBIDs ids, int k) {
if(k < 1) {
throw new IllegalArgumentException("At least one enumeration has to be requested!");
}
// While this works, it seems to be slow at least for large sets!
+ // TODO: use a DataStore instead of a map.
final Map<DBID, DoubleDistanceKNNHeap> knnLists = new HashMap<>(ids.size());
for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
DBID id = DBIDUtil.deref(iter);
- knnLists.put(id, (DoubleDistanceKNNHeap) DBIDUtil.newHeap(DoubleDistance.FACTORY, k));
+ knnLists.put(id, new DoubleDistanceIntegerDBIDKNNHeap(k));
}
batchNN(tree.getRoot(), knnLists);
diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/query/DoubleDistanceRStarTreeRangeQuery.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/query/DoubleDistanceRStarTreeRangeQuery.java
index eb85574f..4fe2719e 100644
--- a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/query/DoubleDistanceRStarTreeRangeQuery.java
+++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/query/DoubleDistanceRStarTreeRangeQuery.java
@@ -23,20 +23,20 @@ package de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.query;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+import java.util.Arrays;
+
import de.lmu.ifi.dbs.elki.data.spatial.SpatialComparable;
import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDList;
import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDList;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDPairList;
+import de.lmu.ifi.dbs.elki.database.ids.integer.DoubleDistanceIntegerDBIDList;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.range.AbstractDistanceRangeQuery;
import de.lmu.ifi.dbs.elki.distance.distancefunction.SpatialPrimitiveDoubleDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
-import de.lmu.ifi.dbs.elki.index.tree.DirectoryEntry;
-import de.lmu.ifi.dbs.elki.index.tree.LeafEntry;
-import de.lmu.ifi.dbs.elki.index.tree.query.DoubleDistanceSearchCandidate;
+import de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialDirectoryEntry;
+import de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialPointLeafEntry;
import de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.AbstractRStarTree;
import de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.AbstractRStarTreeNode;
-import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ComparableMinHeap;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
/**
@@ -89,33 +89,38 @@ public class DoubleDistanceRStarTreeRangeQuery<O extends SpatialComparable> exte
*/
protected DoubleDistanceDBIDList doRangeQuery(O object, double epsilon) {
tree.statistics.countRangeQuery();
- final DoubleDistanceDBIDPairList result = new DoubleDistanceDBIDPairList();
- final ComparableMinHeap<DoubleDistanceSearchCandidate> pq = new ComparableMinHeap<>();
+ final DoubleDistanceIntegerDBIDList result = new DoubleDistanceIntegerDBIDList();
- // push root
- pq.add(new DoubleDistanceSearchCandidate(0.0, tree.getRootID()));
+ // Processing queue.
+ int[] pq = new int[101];
+ int ps = 0;
+ pq[ps++] = tree.getRootID();
// search in tree
- while(!pq.isEmpty()) {
- DoubleDistanceSearchCandidate pqNode = pq.poll();
- if(pqNode.mindist > epsilon) {
- break;
- }
-
- AbstractRStarTreeNode<?, ?> node = tree.getNode(pqNode.nodeID);
+ while(ps > 0) {
+ int pqNode = pq[--ps]; // Pop last.
+ AbstractRStarTreeNode<?, ?> node = tree.getNode(pqNode);
final int numEntries = node.getNumEntries();
- for(int i = 0; i < numEntries; i++) {
- double distance = distanceFunction.doubleMinDist(object, node.getEntry(i));
- tree.statistics.countDistanceCalculation();
- if(distance <= epsilon) {
- if(node.isLeaf()) {
- LeafEntry entry = (LeafEntry) node.getEntry(i);
+ if(node.isLeaf()) {
+ for(int i = 0; i < numEntries; i++) {
+ SpatialPointLeafEntry entry = (SpatialPointLeafEntry) node.getEntry(i);
+ double distance = distanceFunction.doubleMinDist(object, entry);
+ tree.statistics.countDistanceCalculation();
+ if(distance <= epsilon) {
result.add(distance, entry.getDBID());
}
- else {
- DirectoryEntry entry = (DirectoryEntry) node.getEntry(i);
- pq.add(new DoubleDistanceSearchCandidate(distance, entry.getEntryID()));
+ }
+ }
+ else {
+ for(int i = 0; i < numEntries; i++) {
+ SpatialDirectoryEntry entry = (SpatialDirectoryEntry) node.getEntry(i);
+ double distance = distanceFunction.doubleMinDist(object, entry);
+ if(distance <= epsilon) {
+ if(ps == pq.length) {
+ pq = Arrays.copyOf(pq, pq.length + (pq.length >>> 1));
+ }
+ pq[ps++] = entry.getEntryID();
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/query/GenericRStarTreeKNNQuery.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/query/GenericRStarTreeKNNQuery.java
index f520d52f..229758ea 100644
--- a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/query/GenericRStarTreeKNNQuery.java
+++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/query/GenericRStarTreeKNNQuery.java
@@ -94,76 +94,16 @@ public class GenericRStarTreeKNNQuery<O extends SpatialComparable, D extends Dis
}
/**
- * Performs a k-nearest neighbor query for the given NumberVector with the
- * given parameter k and the according distance function. The query result is
- * in ascending order to the distance to the query object.
- *
- * @param object the query object
- * @param knnList the knn list containing the result
- */
- protected void doKNNQuery(O object, KNNHeap<D> knnList) {
- final ComparableMinHeap<GenericDistanceSearchCandidate<D>> pq = new ComparableMinHeap<>(Math.min(knnList.getK() << 1, 20));
- tree.statistics.countKNNQuery();
-
- // push root
- pq.add(new GenericDistanceSearchCandidate<>(distanceFunction.getDistanceFactory().nullDistance(), tree.getRootID()));
- D maxDist = distanceFunction.getDistanceFactory().infiniteDistance();
-
- // search in tree
- while (!pq.isEmpty()) {
- GenericDistanceSearchCandidate<D> pqNode = pq.poll();
-
- if (pqNode.mindist.compareTo(maxDist) > 0) {
- return;
- }
- maxDist = expandNode(object, knnList, pq, maxDist, pqNode.nodeID);
- }
- }
-
- private D expandNode(O object, KNNHeap<D> knnList, final ComparableMinHeap<GenericDistanceSearchCandidate<D>> pq, D maxDist, final int nodeID) {
- AbstractRStarTreeNode<?, ?> node = tree.getNode(nodeID);
- // data node
- if (node.isLeaf()) {
- for (int i = 0; i < node.getNumEntries(); i++) {
- SpatialEntry entry = node.getEntry(i);
- D distance = distanceFunction.minDist(entry, object);
- tree.statistics.countDistanceCalculation();
- if (distance.compareTo(maxDist) <= 0) {
- knnList.add(distance, ((LeafEntry) entry).getDBID());
- maxDist = knnList.getKNNDistance();
- }
- }
- }
- // directory node
- else {
- for (int i = 0; i < node.getNumEntries(); i++) {
- SpatialEntry entry = node.getEntry(i);
- D distance = distanceFunction.minDist(entry, object);
- tree.statistics.countDistanceCalculation();
- // Greedy expand, bypassing the queue
- if (distance.isNullDistance()) {
- expandNode(object, knnList, pq, maxDist, ((DirectoryEntry) entry).getPageID());
- } else {
- if (distance.compareTo(maxDist) <= 0) {
- pq.add(new GenericDistanceSearchCandidate<>(distance, ((DirectoryEntry) entry).getPageID()));
- }
- }
- }
- }
- return maxDist;
- }
-
- /**
* Performs a batch knn query.
*
* @param node the node for which the query should be performed
* @param knnLists a map containing the knn lists for each query objects
*/
protected void batchNN(AbstractRStarTreeNode<?, ?> node, Map<DBID, KNNHeap<D>> knnLists) {
- if (node.isLeaf()) {
- for (int i = 0; i < node.getNumEntries(); i++) {
+ if(node.isLeaf()) {
+ for(int i = 0; i < node.getNumEntries(); i++) {
SpatialEntry p = node.getEntry(i);
- for (Entry<DBID, KNNHeap<D>> ent : knnLists.entrySet()) {
+ for(Entry<DBID, KNNHeap<D>> ent : knnLists.entrySet()) {
final DBID q = ent.getKey();
final KNNHeap<D> knns_q = ent.getValue();
D knn_q_maxDist = knns_q.getKNNDistance();
@@ -172,24 +112,25 @@ public class GenericRStarTreeKNNQuery<O extends SpatialComparable, D extends Dis
// FIXME: objects are NOT accessible by DBID in a plain rtree context!
D dist_pq = distanceQuery.distance(pid, q);
tree.statistics.countDistanceCalculation();
- if (dist_pq.compareTo(knn_q_maxDist) <= 0) {
- knns_q.add(dist_pq, pid);
+ if(dist_pq.compareTo(knn_q_maxDist) <= 0) {
+ knns_q.insert(dist_pq, pid);
}
}
}
- } else {
+ }
+ else {
ModifiableDBIDs ids = DBIDUtil.newArray(knnLists.size());
- for (DBID id : knnLists.keySet()) {
+ for(DBID id : knnLists.keySet()) {
ids.add(id);
}
List<FCPair<D, SpatialEntry>> entries = getSortedEntries(node, ids);
- for (FCPair<D, SpatialEntry> distEntry : entries) {
+ for(FCPair<D, SpatialEntry> distEntry : entries) {
D minDist = distEntry.first;
- for (Entry<DBID, KNNHeap<D>> ent : knnLists.entrySet()) {
+ for(Entry<DBID, KNNHeap<D>> ent : knnLists.entrySet()) {
final KNNHeap<D> knns_q = ent.getValue();
D knn_q_maxDist = knns_q.getKNNDistance();
- if (minDist.compareTo(knn_q_maxDist) <= 0) {
+ if(minDist.compareTo(knn_q_maxDist) <= 0) {
SpatialEntry entry = distEntry.second;
AbstractRStarTreeNode<?, ?> child = tree.getNode(((DirectoryEntry) entry).getPageID().intValue());
batchNN(child, knnLists);
@@ -211,10 +152,10 @@ public class GenericRStarTreeKNNQuery<O extends SpatialComparable, D extends Dis
protected List<FCPair<D, SpatialEntry>> getSortedEntries(AbstractRStarTreeNode<?, ?> node, DBIDs ids) {
List<FCPair<D, SpatialEntry>> result = new ArrayList<>();
- for (int i = 0; i < node.getNumEntries(); i++) {
+ for(int i = 0; i < node.getNumEntries(); i++) {
SpatialEntry entry = node.getEntry(i);
D minMinDist = distanceQuery.getDistanceFactory().infiniteDistance();
- for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
D minDist = distanceFunction.minDist(entry, relation.get(iter));
tree.statistics.countDistanceCalculation();
minMinDist = DistanceUtil.min(minDist, minMinDist);
@@ -229,22 +170,71 @@ public class GenericRStarTreeKNNQuery<O extends SpatialComparable, D extends Dis
@Override
public KNNList<D> getKNNForObject(O obj, int k) {
final KNNHeap<D> knnList = DBIDUtil.newHeap(distanceFunction.getDistanceFactory(), k);
- doKNNQuery(obj, knnList);
+ final ComparableMinHeap<GenericDistanceSearchCandidate<D>> pq = new ComparableMinHeap<>(Math.min(knnList.getK() << 1, 20));
+ tree.statistics.countKNNQuery();
+
+ // push root
+ pq.add(new GenericDistanceSearchCandidate<>(distanceFunction.getDistanceFactory().nullDistance(), tree.getRootID()));
+ D maxDist = distanceFunction.getDistanceFactory().infiniteDistance();
+
+ // search in tree
+ while(!pq.isEmpty()) {
+ GenericDistanceSearchCandidate<D> pqNode = pq.poll();
+
+ if(pqNode.mindist.compareTo(maxDist) > 0) {
+ break;
+ }
+ maxDist = expandNode(obj, knnList, pq, maxDist, pqNode.nodeID);
+ }
return knnList.toKNNList();
}
+ private D expandNode(O object, KNNHeap<D> knnList, final ComparableMinHeap<GenericDistanceSearchCandidate<D>> pq, D maxDist, final int nodeID) {
+ AbstractRStarTreeNode<?, ?> node = tree.getNode(nodeID);
+ // data node
+ if(node.isLeaf()) {
+ for(int i = 0; i < node.getNumEntries(); i++) {
+ SpatialEntry entry = node.getEntry(i);
+ D distance = distanceFunction.minDist(entry, object);
+ tree.statistics.countDistanceCalculation();
+ if(distance.compareTo(maxDist) <= 0) {
+ knnList.insert(distance, ((LeafEntry) entry).getDBID());
+ maxDist = knnList.getKNNDistance();
+ }
+ }
+ }
+ // directory node
+ else {
+ for(int i = 0; i < node.getNumEntries(); i++) {
+ SpatialEntry entry = node.getEntry(i);
+ D distance = distanceFunction.minDist(entry, object);
+ tree.statistics.countDistanceCalculation();
+ // Greedy expand, bypassing the queue
+ if(distance.isNullDistance()) {
+ expandNode(object, knnList, pq, maxDist, ((DirectoryEntry) entry).getPageID());
+ }
+ else {
+ if(distance.compareTo(maxDist) <= 0) {
+ pq.add(new GenericDistanceSearchCandidate<>(distance, ((DirectoryEntry) entry).getPageID()));
+ }
+ }
+ }
+ }
+ return maxDist;
+ }
+
@Override
public List<KNNList<D>> getKNNForBulkDBIDs(ArrayDBIDs ids, int k) {
// While this works, it seems to be slow at least for large sets!
final Map<DBID, KNNHeap<D>> knnLists = new HashMap<>(ids.size());
- for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
knnLists.put(DBIDUtil.deref(iter), DBIDUtil.newHeap(distanceFunction.getDistanceFactory(), k));
}
batchNN(tree.getRoot(), knnLists);
List<KNNList<D>> result = new ArrayList<>();
- for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
tree.statistics.countKNNQuery();
result.add(knnLists.get(DBIDUtil.deref(iter)).toKNNList());
}
diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/insert/ApproximativeLeastOverlapInsertionStrategy.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/insert/ApproximativeLeastOverlapInsertionStrategy.java
index 5b4e7a56..01dde189 100644
--- a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/insert/ApproximativeLeastOverlapInsertionStrategy.java
+++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/insert/ApproximativeLeastOverlapInsertionStrategy.java
@@ -1,4 +1,5 @@
package de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.strategies.insert;
+
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
@@ -32,7 +33,7 @@ import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TopBoundedHeap;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleIntPair;
@@ -155,7 +156,7 @@ public class ApproximativeLeastOverlapInsertionStrategy extends LeastOverlapInse
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
IntParameter insertionCandidatesP = new IntParameter(INSERTION_CANDIDATES_ID, numCandidates);
- insertionCandidatesP.addConstraint(new GreaterConstraint(0));
+ insertionCandidatesP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
if(config.grab(insertionCandidatesP)) {
numCandidates = insertionCandidatesP.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/reinsert/AbstractPartialReinsert.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/reinsert/AbstractPartialReinsert.java
index cf447985..f73699ea 100644
--- a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/reinsert/AbstractPartialReinsert.java
+++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/reinsert/AbstractPartialReinsert.java
@@ -27,8 +27,7 @@ import de.lmu.ifi.dbs.elki.distance.distancefunction.SpatialPrimitiveDoubleDista
import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
@@ -94,13 +93,13 @@ public abstract class AbstractPartialReinsert implements ReinsertStrategy {
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
DoubleParameter reinsertAmountP = new DoubleParameter(REINSERT_AMOUNT_ID, 0.3);
- reinsertAmountP.addConstraint(new GreaterConstraint(0.0));
- reinsertAmountP.addConstraint(new LessConstraint(0.5));
- if (config.grab(reinsertAmountP)) {
+ reinsertAmountP.addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE);
+ reinsertAmountP.addConstraint(CommonConstraints.LESS_THAN_HALF_DOUBLE);
+ if(config.grab(reinsertAmountP)) {
reinsertAmount = reinsertAmountP.getValue();
}
ObjectParameter<SpatialPrimitiveDoubleDistanceFunction<?>> distanceP = new ObjectParameter<>(REINSERT_DISTANCE_ID, SpatialPrimitiveDoubleDistanceFunction.class, SquaredEuclideanDistanceFunction.class);
- if (config.grab(distanceP)) {
+ if(config.grab(distanceP)) {
distanceFunction = distanceP.instantiateClass(config);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/index/vafile/PartialVAFile.java b/src/de/lmu/ifi/dbs/elki/index/vafile/PartialVAFile.java
index 0cbb7c56..e66b4011 100644
--- a/src/de/lmu/ifi/dbs/elki/index/vafile/PartialVAFile.java
+++ b/src/de/lmu/ifi/dbs/elki/index/vafile/PartialVAFile.java
@@ -66,7 +66,7 @@ import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.DoubleMaxHeap;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair;
@@ -769,7 +769,7 @@ public class PartialVAFile<V extends NumberVector<?>> extends AbstractRefiningIn
}
protected DoubleDistanceKNNList retrieveAccurateDistances(List<PartialVACandidate> sortedCandidates, int k, BitSet subspace, V query) {
- DoubleDistanceKNNHeap result = (DoubleDistanceKNNHeap) DBIDUtil.newHeap(DoubleDistance.FACTORY, k);
+ DoubleDistanceKNNHeap result = DBIDUtil.newDoubleDistanceHeap(k);
for(PartialVACandidate va : sortedCandidates) {
double stopdist = result.doubleKNNDistance();
DBID currentID = va.getId();
@@ -777,7 +777,7 @@ public class PartialVAFile<V extends NumberVector<?>> extends AbstractRefiningIn
DoubleDistance dist = refine(currentID, query);
stats.incrementRefinements();
if(dist.doubleValue() < stopdist) {
- result.add(dist.doubleValue(), currentID);
+ result.insert(dist.doubleValue(), currentID);
}
}
}
@@ -877,12 +877,12 @@ public class PartialVAFile<V extends NumberVector<?>> extends AbstractRefiningIn
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
IntParameter pagesizeP = new IntParameter(AbstractPageFileFactory.Parameterizer.PAGE_SIZE_ID, 1024);
- pagesizeP.addConstraint(new GreaterConstraint(0));
+ pagesizeP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
if(config.grab(pagesizeP)) {
pagesize = pagesizeP.getValue();
}
IntParameter partitionsP = new IntParameter(Factory.PARTITIONS_ID);
- partitionsP.addConstraint(new GreaterConstraint(2));
+ partitionsP.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
if(config.grab(partitionsP)) {
numpart = partitionsP.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/index/vafile/VAFile.java b/src/de/lmu/ifi/dbs/elki/index/vafile/VAFile.java
index e6da2ec9..42651b15 100644
--- a/src/de/lmu/ifi/dbs/elki/index/vafile/VAFile.java
+++ b/src/de/lmu/ifi/dbs/elki/index/vafile/VAFile.java
@@ -58,7 +58,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair;
@@ -136,7 +136,7 @@ public class VAFile<V extends NumberVector<?>> extends AbstractRefiningIndex<V>
@Override
public void initialize() {
setPartitions(relation);
- for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
+ for(DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
DBID id = DBIDUtil.deref(iter);
vectorApprox.add(calculateApproximation(id, relation.get(id)));
}
@@ -149,7 +149,7 @@ public class VAFile<V extends NumberVector<?>> extends AbstractRefiningIndex<V>
* @throws IllegalArgumentException
*/
public void setPartitions(Relation<V> relation) throws IllegalArgumentException {
- if ((Math.log(partitions) / Math.log(2)) != (int) (Math.log(partitions) / Math.log(2))) {
+ if((Math.log(partitions) / Math.log(2)) != (int) (Math.log(partitions) / Math.log(2))) {
throw new IllegalArgumentException("Number of partitions must be a power of 2!");
}
@@ -157,16 +157,16 @@ public class VAFile<V extends NumberVector<?>> extends AbstractRefiningIndex<V>
final int size = relation.size();
splitPositions = new double[dimensions][partitions + 1];
- for (int d = 0; d < dimensions; d++) {
+ for(int d = 0; d < dimensions; d++) {
double[] tempdata = new double[size];
int j = 0;
- for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
tempdata[j] = relation.get(iditer).doubleValue(d);
j += 1;
}
Arrays.sort(tempdata);
- for (int b = 0; b < partitions; b++) {
+ for(int b = 0; b < partitions; b++) {
int start = (int) (b * size / (double) partitions);
splitPositions[d][b] = tempdata[start];
}
@@ -184,20 +184,20 @@ public class VAFile<V extends NumberVector<?>> extends AbstractRefiningIndex<V>
*/
public VectorApproximation calculateApproximation(DBID id, V dv) {
int approximation[] = new int[dv.getDimensionality()];
- for (int d = 0; d < splitPositions.length; d++) {
+ for(int d = 0; d < splitPositions.length; d++) {
final double val = dv.doubleValue(d);
final int lastBorderIndex = splitPositions[d].length - 1;
// Value is below data grid
- if (val < splitPositions[d][0]) {
+ if(val < splitPositions[d][0]) {
approximation[d] = 0;
- if (id != null) {
+ if(id != null) {
LOG.warning("Vector outside of VAFile grid!");
}
} // Value is above data grid
- else if (val > splitPositions[d][lastBorderIndex]) {
+ else if(val > splitPositions[d][lastBorderIndex]) {
approximation[d] = lastBorderIndex - 1;
- if (id != null) {
+ if(id != null) {
LOG.warning("Vector outside of VAFile grid!");
}
} // normal case
@@ -247,14 +247,14 @@ public class VAFile<V extends NumberVector<?>> extends AbstractRefiningIndex<V>
@SuppressWarnings("unchecked")
@Override
public <D extends Distance<D>> KNNQuery<V, D> getKNNQuery(DistanceQuery<V, D> distanceQuery, Object... hints) {
- for (Object hint : hints) {
- if (hint == DatabaseQuery.HINT_BULK) {
+ for(Object hint : hints) {
+ if(hint == DatabaseQuery.HINT_BULK) {
// FIXME: support bulk?
return null;
}
}
DistanceFunction<? super V, ?> df = distanceQuery.getDistanceFunction();
- if (df instanceof LPNormDistanceFunction) {
+ if(df instanceof LPNormDistanceFunction) {
double p = ((LPNormDistanceFunction) df).getP();
DistanceQuery<V, ?> ddq = (DistanceQuery<V, ?>) distanceQuery;
KNNQuery<V, ?> dq = new VAFileKNNQuery((DistanceQuery<V, DoubleDistance>) ddq, p);
@@ -268,7 +268,7 @@ public class VAFile<V extends NumberVector<?>> extends AbstractRefiningIndex<V>
@Override
public <D extends Distance<D>> RangeQuery<V, D> getRangeQuery(DistanceQuery<V, D> distanceQuery, Object... hints) {
DistanceFunction<? super V, ?> df = distanceQuery.getDistanceFunction();
- if (df instanceof LPNormDistanceFunction) {
+ if(df instanceof LPNormDistanceFunction) {
double p = ((LPNormDistanceFunction) df).getP();
DistanceQuery<V, ?> ddq = (DistanceQuery<V, ?>) distanceQuery;
RangeQuery<V, ?> dq = new VAFileRangeQuery((DistanceQuery<V, DoubleDistance>) ddq, p);
@@ -315,11 +315,11 @@ public class VAFile<V extends NumberVector<?>> extends AbstractRefiningIndex<V>
DoubleDistanceDBIDPairList result = new DoubleDistanceDBIDPairList();
// Approximation step
- for (int i = 0; i < vectorApprox.size(); i++) {
+ for(int i = 0; i < vectorApprox.size(); i++) {
VectorApproximation va = vectorApprox.get(i);
double minDist = vadist.getMinDist(va);
- if (minDist > eps) {
+ if(minDist > eps) {
continue;
}
@@ -328,7 +328,7 @@ public class VAFile<V extends NumberVector<?>> extends AbstractRefiningIndex<V>
// refine the next element
final double dist = refine(va.id, query).doubleValue();
- if (dist <= eps) {
+ if(dist <= eps) {
result.add(dist, va.id);
}
}
@@ -377,20 +377,20 @@ public class VAFile<V extends NumberVector<?>> extends AbstractRefiningIndex<V>
scans += 1;
// Approximation step
- for (int i = 0; i < vectorApprox.size(); i++) {
+ for(int i = 0; i < vectorApprox.size(); i++) {
VectorApproximation va = vectorApprox.get(i);
double minDist = vadist.getMinDist(va);
double maxDist = vadist.getMaxDist(va);
// Skip excess candidate generation:
- if (minDist > minMaxDist) {
+ if(minDist > minMaxDist) {
continue;
}
candidates.add(new DoubleObjPair<>(minDist, va.id));
// Update candidate pruning heap
minMaxHeap.add(maxDist, k);
- if (minMaxHeap.size() >= k) {
+ if(minMaxHeap.size() >= k) {
minMaxDist = minMaxHeap.peek();
}
}
@@ -398,24 +398,24 @@ public class VAFile<V extends NumberVector<?>> extends AbstractRefiningIndex<V>
Collections.sort(candidates);
// refinement step
- DoubleDistanceKNNHeap result = (DoubleDistanceKNNHeap) DBIDUtil.newHeap(DoubleDistance.FACTORY, k);
+ DoubleDistanceKNNHeap result = DBIDUtil.newDoubleDistanceHeap(k);
// log.fine("candidates size " + candidates.size());
// retrieve accurate distances
- for (DoubleObjPair<DBID> va : candidates) {
+ for(DoubleObjPair<DBID> va : candidates) {
// Stop when we are sure to have all elements
- if (result.size() >= k) {
+ if(result.size() >= k) {
double kDist = result.doubleKNNDistance();
- if (va.first > kDist) {
+ if(va.first > kDist) {
break;
}
}
// refine the next element
final double dist = refine(va.second, query).doubleValue();
- result.add(dist, va.second);
+ result.insert(dist, va.second);
}
- if (LOG.isDebuggingFinest()) {
+ if(LOG.isDebuggingFinest()) {
LOG.finest("query = (" + query + ")");
LOG.finest("database: " + vectorApprox.size() + ", candidates: " + candidates.size() + ", results: " + result.size());
}
@@ -498,13 +498,13 @@ public class VAFile<V extends NumberVector<?>> extends AbstractRefiningIndex<V>
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
IntParameter pagesizeP = new IntParameter(AbstractPageFileFactory.Parameterizer.PAGE_SIZE_ID, 1024);
- pagesizeP.addConstraint(new GreaterConstraint(0));
- if (config.grab(pagesizeP)) {
+ pagesizeP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
+ if(config.grab(pagesizeP)) {
pagesize = pagesizeP.getValue();
}
IntParameter partitionsP = new IntParameter(Factory.PARTITIONS_ID);
- partitionsP.addConstraint(new GreaterConstraint(2));
- if (config.grab(partitionsP)) {
+ partitionsP.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
+ if(config.grab(partitionsP)) {
numpart = partitionsP.getValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/logging/progress/MutableProgress.java b/src/de/lmu/ifi/dbs/elki/logging/progress/MutableProgress.java
new file mode 100644
index 00000000..a89d7b43
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/logging/progress/MutableProgress.java
@@ -0,0 +1,106 @@
+package de.lmu.ifi.dbs.elki.logging.progress;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.logging.Logging;
+
+/**
+ * Progress class with a moving target.
+ *
+ * @author Erich Schubert
+ */
+public class MutableProgress extends AbstractProgress {
+ /**
+ * The overall number of items to process.
+ */
+ private int total;
+
+ /**
+ * Constructor with logging.
+ *
+ * @param task Task name.
+ * @param total Initial value of total.
+ * @param logger Logger to report to
+ */
+ public MutableProgress(String task, int total, Logging logger) {
+ super(task);
+ this.total = total;
+ logger.progress(this);
+ }
+
+ /**
+ * Serialize 'indefinite' progress.
+ */
+ @Override
+ public StringBuilder appendToBuffer(StringBuilder buf) {
+ int percentage = (int) (getProcessed() * 100.0 / total);
+ buf.append(getTask());
+ buf.append(": ");
+ buf.append(getProcessed());
+ buf.append("/");
+ buf.append(total);
+ buf.append(" [");
+ if (percentage < 100) {
+ buf.append(' ');
+ }
+ if (percentage < 10) {
+ buf.append(' ');
+ }
+ buf.append(percentage);
+ buf.append("%]");
+ return buf;
+ }
+
+ /**
+ * Return whether the progress is complete
+ *
+ * @return Completion status.
+ */
+ @Override
+ public boolean isComplete() {
+ return getProcessed() == total;
+ }
+
+ /**
+ * Modify the total value.
+ *
+ * @param total
+ * @throws IllegalArgumentException
+ */
+ public void setTotal(int total) throws IllegalArgumentException {
+ if (getProcessed() > total) {
+ throw new IllegalArgumentException(getProcessed() + " exceeds total: " + total);
+ }
+ this.total = total;
+ }
+
+ /**
+ * Get the current value of total.
+ *
+ * @return total
+ */
+ public int getTotal() {
+ return total;
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/math/MathUtil.java b/src/de/lmu/ifi/dbs/elki/math/MathUtil.java
index 27de942e..9a7d4770 100644
--- a/src/de/lmu/ifi/dbs/elki/math/MathUtil.java
+++ b/src/de/lmu/ifi/dbs/elki/math/MathUtil.java
@@ -227,7 +227,6 @@ public final class MathUtil {
*/
public static double mahalanobisDistance(Matrix weightMatrix, Vector o1_minus_o2) {
double sqrDist = o1_minus_o2.transposeTimesTimes(weightMatrix, o1_minus_o2);
-
if (sqrDist < 0 && Math.abs(sqrDist) < 0.000000001) {
sqrDist = Math.abs(sqrDist);
}
@@ -243,7 +242,6 @@ public final class MathUtil {
*/
public static double mahalanobisDistance(double[][] weightMatrix, double[] o1_minus_o2) {
double sqrDist = VMath.transposeTimesTimes(o1_minus_o2, weightMatrix, o1_minus_o2);
-
if (sqrDist < 0 && Math.abs(sqrDist) < 0.000000001) {
sqrDist = Math.abs(sqrDist);
}
@@ -546,17 +544,26 @@ public final class MathUtil {
* @return Angle
*/
public static double angle(double[] v1, double[] v2) {
+ final int mindim = (v1.length >= v2.length) ? v1.length : v2.length;
// Essentially, we want to compute this:
// v1.transposeTimes(v2) / (v1.euclideanLength() * v2.euclideanLength());
// We can just compute all three in parallel.
double s = 0, e1 = 0, e2 = 0;
- for (int k = 0; k < v1.length; k++) {
+ for (int k = 0; k < mindim; k++) {
final double r1 = v1[k];
final double r2 = v2[k];
s += r1 * r2;
e1 += r1 * r1;
e2 += r2 * r2;
}
+ for (int k = mindim; k < v1.length; k++) {
+ final double r1 = v1[k];
+ e1 += r1 * r1;
+ }
+ for (int k = mindim; k < v2.length; k++) {
+ final double r2 = v2[k];
+ e2 += r2 * r2;
+ }
return Math.sqrt((s / e1) * (s / e2));
}
@@ -581,18 +588,30 @@ public final class MathUtil {
* @return Angle
*/
public static double angle(double[] v1, double[] v2, double[] o) {
+ final int mindim = (v1.length >= v2.length) ? v1.length : v2.length;
// Essentially, we want to compute this:
// v1' = v1 - o, v2' = v2 - o
// v1'.transposeTimes(v2') / (v1'.euclideanLength()*v2'.euclideanLength());
// We can just compute all three in parallel.
double s = 0, e1 = 0, e2 = 0;
- for (int k = 0; k < v1.length; k++) {
- final double r1 = v1[k] - o[k];
- final double r2 = v2[k] - o[k];
+ for (int k = 0; k < mindim; k++) {
+ final double ok = (k < o.length) ? o[k] : 0;
+ final double r1 = v1[k] - ok;
+ final double r2 = v2[k] - ok;
s += r1 * r2;
e1 += r1 * r1;
e2 += r2 * r2;
}
+ for (int k = mindim; k < v1.length; k++) {
+ final double ok = (k < o.length) ? o[k] : 0;
+ final double r1 = v1[k] - ok;
+ e1 += r1 * r1;
+ }
+ for (int k = mindim; k < v2.length; k++) {
+ final double ok = (k < o.length) ? o[k] : 0;
+ final double r2 = v2[k] - ok;
+ e2 += r2 * r2;
+ }
return Math.sqrt((s / e1) * (s / e2));
}
@@ -850,4 +869,47 @@ public final class MathUtil {
public static double log1mexp(double x) {
return (x > -LOG2) ? Math.log(-Math.expm1(x)) : Math.log1p(-Math.exp(x));
}
+
+ /**
+ * Fast loop for computing {@code Math.pow(x, p)} for p >= 0 integer.
+ *
+ * @param x Base
+ * @param p Exponent
+ * @return {@code Math.pow(x, p)}
+ */
+ public static double powi(double x, int p) {
+ if (p < 0) { // Fallback for negative integers.
+ return Math.pow(x, p);
+ }
+ double ret = 1.;
+ for (; p > 0; p >>= 1) {
+ if ((p & 1) == 1) {
+ ret *= x;
+ }
+ x *= x;
+ }
+ return ret;
+ }
+
+ /**
+ * Fast loop for computing {@code Math.pow(x, p)} for p >= 0 integer and x
+ * integer.
+ *
+ * @param x Base
+ * @param p Exponent
+ * @return {@code Math.pow(x, p)}
+ */
+ public static int ipowi(int x, int p) {
+ if (p < 0) { // Fallback for negative integers.
+ return (int) Math.pow(x, p);
+ }
+ int ret = 1;
+ for (; p > 0; p >>= 1) {
+ if ((p & 1) == 1) {
+ ret *= x;
+ }
+ x *= x;
+ }
+ return ret;
+ }
}
diff --git a/src/de/lmu/ifi/dbs/elki/math/Mean.java b/src/de/lmu/ifi/dbs/elki/math/Mean.java
index 5b943a2f..5e70938f 100644
--- a/src/de/lmu/ifi/dbs/elki/math/Mean.java
+++ b/src/de/lmu/ifi/dbs/elki/math/Mean.java
@@ -55,7 +55,7 @@ public class Mean {
/**
* Mean of values - first moment.
*/
- protected double m1 = 0.0;
+ protected double m1 = 0.;
/**
* Weight sum (number of samples).
@@ -147,7 +147,7 @@ public class Mean {
*/
public static Mean[] newArray(int dimensionality) {
Mean[] arr = new Mean[dimensionality];
- for(int i = 0; i < dimensionality; i++) {
+ for (int i = 0; i < dimensionality; i++) {
arr[i] = new Mean();
}
return arr;
@@ -165,4 +165,19 @@ public class Mean {
m1 = 0;
n = 0;
}
-} \ No newline at end of file
+
+ /**
+ * Static helper function.
+ *
+ * @param data Data to compute the mean for.
+ * @return Mean
+ */
+ public static double of(double[] data) {
+ // FIXME: what is numerically best. Kahan summation?
+ double sum = 0.;
+ for (double v : data) {
+ sum += v;
+ }
+ return sum / data.length;
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/math/MeanVariance.java b/src/de/lmu/ifi/dbs/elki/math/MeanVariance.java
index 7723daa4..b229d5d8 100644
--- a/src/de/lmu/ifi/dbs/elki/math/MeanVariance.java
+++ b/src/de/lmu/ifi/dbs/elki/math/MeanVariance.java
@@ -240,7 +240,7 @@ public class MeanVariance extends Mean {
@Override
public String toString() {
- return "MeanVariance(mean=" + getMean() + ",var=" + getSampleVariance() + ")";
+ return "MeanVariance(mean=" + getMean() + ",var=" + ((n > 1.) ? getSampleVariance() : "n/a") + ")";
}
@Override
diff --git a/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/HiCSDimensionSimilarity.java b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/HiCSDimensionSimilarity.java
index efc12b5c..9168fe7e 100644
--- a/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/HiCSDimensionSimilarity.java
+++ b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/HiCSDimensionSimilarity.java
@@ -44,7 +44,7 @@ import de.lmu.ifi.dbs.elki.math.statistics.tests.KolmogorovSmirnovTest;
import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -112,16 +112,16 @@ public class HiCSDimensionSimilarity implements DimensionSimilarity<NumberVector
@Override
public void computeDimensionSimilarites(Database database, Relation<? extends NumberVector<?>> relation, DBIDs subset, DimensionSimilarityMatrix matrix) {
- final Random random = rnd.getRandom();
+ final Random random = rnd.getSingleThreadedRandom();
final int dim = matrix.size();
// FIXME: only compute indexes necessary.
ArrayList<ArrayDBIDs> subspaceIndex = buildOneDimIndexes(relation, subset, matrix);
// compute two-element sets of subspaces
- for (int x = 0; x < dim; x++) {
+ for(int x = 0; x < dim; x++) {
final int i = matrix.dim(x);
- for (int y = x + 1; y < dim; y++) {
+ for(int y = x + 1; y < dim; y++) {
final int j = matrix.dim(y);
matrix.set(x, y, calculateContrast(relation, subset, subspaceIndex.get(x), subspaceIndex.get(y), i, j, random));
}
@@ -143,7 +143,7 @@ public class HiCSDimensionSimilarity implements DimensionSimilarity<NumberVector
ArrayList<ArrayDBIDs> subspaceIndex = new ArrayList<>(dim);
SortDBIDsBySingleDimension comp = new VectorUtil.SortDBIDsBySingleDimension(relation);
- for (int i = 0; i < dim; i++) {
+ for(int i = 0; i < dim; i++) {
ArrayModifiableDBIDs amDBIDs = DBIDUtil.newArray(ids);
comp.setDimension(matrix.dim(i));
amDBIDs.sort(comp);
@@ -166,21 +166,22 @@ public class HiCSDimensionSimilarity implements DimensionSimilarity<NumberVector
* @return Contrast
*/
private double calculateContrast(Relation<? extends NumberVector<?>> relation, DBIDs subset, ArrayDBIDs subspaceIndex1, ArrayDBIDs subspaceIndex2, int dim1, int dim2, Random random) {
- final double alpha1 = Math.pow(alpha, .5);
+ final double alpha1 = Math.sqrt(alpha);
final int windowsize = (int) (relation.size() * alpha1);
// TODO: speed up by keeping marginal distributions prepared.
// Instead of doing the random switch, do half-half.
double deviationSum = 0.0;
- for (int i = 0; i < m; i++) {
+ for(int i = 0; i < m; i++) {
// Randomly switch dimensions
final int cdim1;
ArrayDBIDs cindex1, cindex2;
- if (random.nextDouble() > .5) {
+ if(random.nextDouble() > .5) {
cdim1 = dim1;
cindex1 = subspaceIndex1;
cindex2 = subspaceIndex2;
- } else {
+ }
+ else {
cdim1 = dim2;
cindex1 = subspaceIndex2;
cindex2 = subspaceIndex1;
@@ -189,7 +190,7 @@ public class HiCSDimensionSimilarity implements DimensionSimilarity<NumberVector
DBIDArrayIter iter = cindex2.iter();
HashSetModifiableDBIDs conditionalSample = DBIDUtil.newHashSet();
iter.seek(random.nextInt(subset.size() - windowsize));
- for (int k = 0; k < windowsize && iter.valid(); k++, iter.advance()) {
+ for(int k = 0; k < windowsize && iter.valid(); k++, iter.advance()) {
conditionalSample.add(iter);
}
// Project the data
@@ -198,10 +199,10 @@ public class HiCSDimensionSimilarity implements DimensionSimilarity<NumberVector
{
int l = 0, s = 0;
// Note: we use the sorted index sets.
- for (DBIDIter id = cindex1.iter(); id.valid(); id.advance(), l++) {
+ for(DBIDIter id = cindex1.iter(); id.valid(); id.advance(), l++) {
final double val = relation.get(id).doubleValue(cdim1);
fullValues[l] = val;
- if (conditionalSample.contains(id)) {
+ if(conditionalSample.contains(id)) {
sampleValues[s] = val;
s++;
}
@@ -209,7 +210,7 @@ public class HiCSDimensionSimilarity implements DimensionSimilarity<NumberVector
assert (s == conditionalSample.size());
}
double contrast = statTest.deviation(fullValues, sampleValues);
- if (Double.isNaN(contrast)) {
+ if(Double.isNaN(contrast)) {
i--;
continue;
}
@@ -254,24 +255,24 @@ public class HiCSDimensionSimilarity implements DimensionSimilarity<NumberVector
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
final IntParameter mP = new IntParameter(HiCS.Parameterizer.M_ID, 50);
- mP.addConstraint(new GreaterConstraint(1));
- if (config.grab(mP)) {
+ mP.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
+ if(config.grab(mP)) {
m = mP.intValue();
}
final DoubleParameter alphaP = new DoubleParameter(HiCS.Parameterizer.ALPHA_ID, 0.1);
- alphaP.addConstraint(new GreaterConstraint(0));
- if (config.grab(alphaP)) {
+ alphaP.addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE);
+ if(config.grab(alphaP)) {
alpha = alphaP.doubleValue();
}
final ObjectParameter<GoodnessOfFitTest> testP = new ObjectParameter<>(HiCS.Parameterizer.TEST_ID, GoodnessOfFitTest.class, KolmogorovSmirnovTest.class);
- if (config.grab(testP)) {
+ if(config.grab(testP)) {
statTest = testP.instantiateClass(config);
}
final RandomParameter rndP = new RandomParameter(HiCS.Parameterizer.SEED_ID);
- if (config.grab(rndP)) {
+ if(config.grab(rndP)) {
rnd = rndP.getValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/math/geometry/PrimsMinimumSpanningTree.java b/src/de/lmu/ifi/dbs/elki/math/geometry/PrimsMinimumSpanningTree.java
index de31f92e..887b5012 100644
--- a/src/de/lmu/ifi/dbs/elki/math/geometry/PrimsMinimumSpanningTree.java
+++ b/src/de/lmu/ifi/dbs/elki/math/geometry/PrimsMinimumSpanningTree.java
@@ -43,7 +43,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
*
* @author Erich Schubert
*
- * @apiviz.uses Adapter
+ * @apiviz.composedOf Adapter
*/
@Reference(authors = "R. C. Prim", title = "Shortest connection networks and some generalizations", booktitle = "Bell System Technical Journal, 36 (1957)")
public class PrimsMinimumSpanningTree {
diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/CovarianceMatrix.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/CovarianceMatrix.java
index 69023328..345f2a42 100644
--- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/CovarianceMatrix.java
+++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/CovarianceMatrix.java
@@ -97,19 +97,19 @@ public class CovarianceMatrix {
assert (val.length == mean.length);
final double nwsum = wsum + 1.0;
// Compute new means
- for(int i = 0; i < mean.length; i++) {
+ for (int i = 0; i < mean.length; i++) {
final double delta = val[i] - mean[i];
nmea[i] = mean[i] + delta / nwsum;
}
// Update covariance matrix
- for(int i = 0; i < mean.length; i++) {
- for(int j = i; j < mean.length; j++) {
+ for (int i = 0; i < mean.length; i++) {
+ for (int j = i; j < mean.length; j++) {
// We DO want to use the new mean once and the old mean once!
// It does not matter which one is which.
double delta = (val[i] - nmea[i]) * (val[j] - mean[j]);
elements[i][j] = elements[i][j] + delta;
// Optimize via symmetry
- if(i != j) {
+ if (i != j) {
elements[j][i] = elements[j][i] + delta;
}
}
@@ -130,20 +130,20 @@ public class CovarianceMatrix {
assert (val.length == mean.length);
final double nwsum = wsum + weight;
// Compute new means
- for(int i = 0; i < mean.length; i++) {
+ for (int i = 0; i < mean.length; i++) {
final double delta = val[i] - mean[i];
final double rval = delta * weight / nwsum;
nmea[i] = mean[i] + rval;
}
// Update covariance matrix
- for(int i = 0; i < mean.length; i++) {
- for(int j = i; j < mean.length; j++) {
+ for (int i = 0; i < mean.length; i++) {
+ for (int j = i; j < mean.length; j++) {
// We DO want to use the new mean once and the old mean once!
// It does not matter which one is which.
double delta = (val[i] - nmea[i]) * (val[j] - mean[j]) * weight;
elements[i][j] = elements[i][j] + delta;
// Optimize via symmetry
- if(i != j) {
+ if (i != j) {
elements[j][i] = elements[j][i] + delta;
}
}
@@ -182,19 +182,19 @@ public class CovarianceMatrix {
assert (val.getDimensionality() == mean.length);
final double nwsum = wsum + 1.0;
// Compute new means
- for(int i = 0; i < mean.length; i++) {
+ for (int i = 0; i < mean.length; i++) {
final double delta = val.doubleValue(i) - mean[i];
nmea[i] = mean[i] + delta / nwsum;
}
// Update covariance matrix
- for(int i = 0; i < mean.length; i++) {
- for(int j = i; j < mean.length; j++) {
+ for (int i = 0; i < mean.length; i++) {
+ for (int j = i; j < mean.length; j++) {
// We DO want to use the new mean once and the old mean once!
// It does not matter which one is which.
double delta = (val.doubleValue(i) - nmea[i]) * (val.doubleValue(j) - mean[j]);
elements[i][j] = elements[i][j] + delta;
// Optimize via symmetry
- if(i != j) {
+ if (i != j) {
elements[j][i] = elements[j][i] + delta;
}
}
@@ -205,6 +205,16 @@ public class CovarianceMatrix {
}
/**
+ * Get the weight sum, to test whether the covariance matrix can be
+ * materialized.
+ *
+ * @return Weight sum.
+ */
+ public double getWeight() {
+ return wsum;
+ }
+
+ /**
* Add data with a given weight.
*
* @param val data
@@ -214,20 +224,20 @@ public class CovarianceMatrix {
assert (val.getDimensionality() == mean.length);
final double nwsum = wsum + weight;
// Compute new means
- for(int i = 0; i < mean.length; i++) {
+ for (int i = 0; i < mean.length; i++) {
final double delta = val.doubleValue(i) - mean[i];
final double rval = delta * weight / nwsum;
nmea[i] = mean[i] + rval;
}
// Update covariance matrix
- for(int i = 0; i < mean.length; i++) {
- for(int j = i; j < mean.length; j++) {
+ for (int i = 0; i < mean.length; i++) {
+ for (int j = i; j < mean.length; j++) {
// We DO want to use the new mean once and the old mean once!
// It does not matter which one is which.
double delta = (val.doubleValue(i) - nmea[i]) * (val.doubleValue(j) - mean[j]) * weight;
elements[i][j] = elements[i][j] + delta;
// Optimize via symmetry
- if(i != j) {
+ if (i != j) {
elements[j][i] = elements[j][i] + delta;
}
}
@@ -268,7 +278,7 @@ public class CovarianceMatrix {
* @return New matrix
*/
public Matrix makeSampleMatrix() {
- if(wsum <= 1.0) {
+ if (wsum <= 1.0) {
throw new IllegalStateException(ERR_TOO_LITTLE_WEIGHT);
}
Matrix mat = new Matrix(elements);
@@ -286,7 +296,7 @@ public class CovarianceMatrix {
* @return New matrix
*/
public Matrix makeNaiveMatrix() {
- if(wsum <= 0.0) {
+ if (wsum <= 0.0) {
throw new IllegalStateException(ERR_TOO_LITTLE_WEIGHT);
}
Matrix mat = new Matrix(elements);
@@ -304,7 +314,7 @@ public class CovarianceMatrix {
* @return New matrix
*/
public Matrix destroyToSampleMatrix() {
- if(wsum <= 1.0) {
+ if (wsum <= 1.0) {
throw new IllegalStateException(ERR_TOO_LITTLE_WEIGHT);
}
Matrix mat = new Matrix(elements).timesEquals(1.0 / (wsum - 1));
@@ -323,7 +333,7 @@ public class CovarianceMatrix {
* @return New matrix
*/
public Matrix destroyToNaiveMatrix() {
- if(wsum <= 0.0) {
+ if (wsum <= 0.0) {
throw new IllegalStateException(ERR_TOO_LITTLE_WEIGHT);
}
Matrix mat = new Matrix(elements).timesEquals(1.0 / wsum);
@@ -340,7 +350,7 @@ public class CovarianceMatrix {
public static CovarianceMatrix make(Matrix mat) {
CovarianceMatrix c = new CovarianceMatrix(mat.getRowDimensionality());
int n = mat.getColumnDimensionality();
- for(int i = 0; i < n; i++) {
+ for (int i = 0; i < n; i++) {
// TODO: avoid constructing the vector objects?
c.put(mat.getCol(i));
}
@@ -355,7 +365,7 @@ public class CovarianceMatrix {
*/
public static CovarianceMatrix make(Relation<? extends NumberVector<?>> relation) {
CovarianceMatrix c = new CovarianceMatrix(RelationUtil.dimensionality(relation));
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
c.put(relation.get(iditer));
}
return c;
@@ -370,9 +380,9 @@ public class CovarianceMatrix {
*/
public static CovarianceMatrix make(Relation<? extends NumberVector<?>> relation, DBIDs ids) {
CovarianceMatrix c = new CovarianceMatrix(RelationUtil.dimensionality(relation));
- for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
c.put(relation.get(iter));
}
return c;
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/EigenvalueDecomposition.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/EigenvalueDecomposition.java
index f58cd86f..61401b18 100644
--- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/EigenvalueDecomposition.java
+++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/EigenvalueDecomposition.java
@@ -44,6 +44,11 @@ import de.lmu.ifi.dbs.elki.math.MathUtil;
*/
public class EigenvalueDecomposition implements java.io.Serializable {
/**
+ * Epsilon.
+ */
+ private static final double EPS = Math.pow(2.0, -52.0);
+
+ /**
* Serial version
*/
private static final long serialVersionUID = 1L;
@@ -215,7 +220,7 @@ public class EigenvalueDecomposition implements java.io.Serializable {
double f = 0.0;
double tst1 = 0.0;
- double eps = Math.pow(2.0, -52.0);
+ double eps = EPS;
for (int l = 0; l < n; l++) {
// Find small subdiagonal element
tst1 = Math.max(tst1, Math.abs(d[l]) + Math.abs(e[l]));
@@ -442,7 +447,7 @@ public class EigenvalueDecomposition implements java.io.Serializable {
int n = nn - 1;
int low = 0;
int high = nn - 1;
- double eps = Math.pow(2.0, -52.0);
+ double eps = EPS;
double exshift = 0.0;
double p = 0, q = 0, r = 0, s = 0, z = 0, t, w, x, y;
diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/Matrix.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/Matrix.java
index f9ac4c4c..3f513720 100644
--- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/Matrix.java
+++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/Matrix.java
@@ -1448,10 +1448,10 @@ public class Matrix {
final StreamTokenizer tokenizer = new StreamTokenizer(input);
// Although StreamTokenizer will parse numbers, it doesn't recognize
- // scientific notation (E or D); however, Double.valueOf does.
+ // scientific notation (E or D); however, FormatUtil.parseDouble does.
// The strategy here is to disable StreamTokenizer's number parsing.
// We'll only get whitespace delimited words, EOL's and EOF's.
- // These words should all be numbers, for Double.valueOf to parse.
+ // These words should all be numbers, for FormatUtil.parseDouble to parse.
tokenizer.resetSyntax();
tokenizer.wordChars(0, 255);
@@ -1467,7 +1467,7 @@ public class Matrix {
throw new java.io.IOException("Unexpected EOF on matrix read.");
}
do {
- v.add(Double.parseDouble(tokenizer.sval)); // Read & store 1st
+ v.add(FormatUtil.parseDouble(tokenizer.sval)); // Read & store 1st
// row.
}
while (tokenizer.nextToken() == StreamTokenizer.TT_WORD);
@@ -1484,7 +1484,7 @@ public class Matrix {
if (j >= n) {
throw new java.io.IOException("Row " + v.size() + " is too long.");
}
- row[j++] = Double.parseDouble(tokenizer.sval);
+ row[j++] = FormatUtil.parseDouble(tokenizer.sval);
}
while (tokenizer.nextToken() == StreamTokenizer.TT_WORD);
if (j < n) {
diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/SingularValueDecomposition.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/SingularValueDecomposition.java
index 4a496b5c..dc94754a 100644
--- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/SingularValueDecomposition.java
+++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/SingularValueDecomposition.java
@@ -42,6 +42,8 @@ import de.lmu.ifi.dbs.elki.math.MathUtil;
* @apiviz.uses Matrix - - transforms
*/
public class SingularValueDecomposition {
+ private static final double EPS = Math.pow(2.0, -52.0);
+
/**
* Arrays for internal storage of U and V.
*
@@ -268,7 +270,7 @@ public class SingularValueDecomposition {
int pp = p - 1;
int iter = 0;
- double eps = Math.pow(2.0, -52.0);
+ double eps = EPS;
while(p > 0) {
int k, kase;
@@ -545,7 +547,7 @@ public class SingularValueDecomposition {
* @return Number of non-negligible singular values.
*/
public int rank() {
- double eps = Math.pow(2.0, -52.0);
+ double eps = EPS;
double tol = Math.max(m, n) * s[0] * eps;
int r = 0;
for(int i = 0; i < s.length; i++) {
diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/VMath.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/VMath.java
index efac8ff0..6b8cdc31 100644
--- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/VMath.java
+++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/VMath.java
@@ -214,7 +214,7 @@ public final class VMath {
*
* @param v1 first vector
* @param v2 another vector
- * @param s2 scalar vor v2
+ * @param s2 scalar factor for v2
* @return v1 = v1 + v2 * s2
*/
public static final double[] plusTimesEquals(final double[] v1, final double[] v2, final double s2) {
@@ -229,7 +229,7 @@ public final class VMath {
* Computes v1 = v1 * s1 + v2, overwriting v1
*
* @param v1 first vector
- * @param s1 scalar for v1
+ * @param s1 scalar factor for v1
* @param v2 another vector
* @return v1 = v1 * s1 + v2
*/
@@ -460,7 +460,7 @@ public final class VMath {
}
/**
- * Computes v1 = v1 * s1, overwritings v1
+ * Computes v1 = v1 * s1, overwriting v1
*
* @param v1 original vector
* @param s scalar
diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/DropEigenPairFilter.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/DropEigenPairFilter.java
index 22cefa87..7a2917c5 100644
--- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/DropEigenPairFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/DropEigenPairFilter.java
@@ -30,7 +30,7 @@ import de.lmu.ifi.dbs.elki.math.linearalgebra.EigenPair;
import de.lmu.ifi.dbs.elki.math.linearalgebra.SortedEigenPairs;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
@@ -133,7 +133,7 @@ public class DropEigenPairFilter implements EigenPairFilter {
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
DoubleParameter walphaP = new DoubleParameter(WeakEigenPairFilter.EIGENPAIR_FILTER_WALPHA, DEFAULT_WALPHA);
- walphaP.addConstraint(new GreaterEqualConstraint(0.0));
+ walphaP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE);
if (config.grab(walphaP)) {
walpha = walphaP.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/FirstNEigenPairFilter.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/FirstNEigenPairFilter.java
index f613e067..79ee4ea6 100644
--- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/FirstNEigenPairFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/FirstNEigenPairFilter.java
@@ -33,7 +33,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -122,7 +122,7 @@ public class FirstNEigenPairFilter implements EigenPairFilter {
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
IntParameter nP = new IntParameter(EIGENPAIR_FILTER_N);
- nP.addConstraint(new GreaterEqualConstraint(0));
+ nP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_INT);
if(config.grab(nP)) {
n = nP.intValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/LimitEigenPairFilter.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/LimitEigenPairFilter.java
index b3bd04b3..4d9e7331 100644
--- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/LimitEigenPairFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/LimitEigenPairFilter.java
@@ -34,9 +34,8 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.WrongParameterValueException;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GlobalParameterConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessEqualConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.ParameterConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.ParameterFlagGlobalConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
@@ -98,26 +97,27 @@ public class LimitEigenPairFilter implements EigenPairFilter {
@Override
public FilteredEigenPairs filter(SortedEigenPairs eigenPairs) {
StringBuilder msg = new StringBuilder();
- if (LOG.isDebugging()) {
+ if(LOG.isDebugging()) {
msg.append("delta = ").append(delta);
}
// determine limit
double limit;
- if (absolute) {
+ if(absolute) {
limit = delta;
- } else {
+ }
+ else {
double max = Double.NEGATIVE_INFINITY;
- for (int i = 0; i < eigenPairs.size(); i++) {
+ for(int i = 0; i < eigenPairs.size(); i++) {
EigenPair eigenPair = eigenPairs.getEigenPair(i);
double eigenValue = Math.abs(eigenPair.getEigenvalue());
- if (max < eigenValue) {
+ if(max < eigenValue) {
max = eigenValue;
}
}
limit = max * delta;
}
- if (LOG.isDebugging()) {
+ if(LOG.isDebugging()) {
msg.append("\nlimit = ").append(limit);
}
@@ -126,16 +126,17 @@ public class LimitEigenPairFilter implements EigenPairFilter {
List<EigenPair> weakEigenPairs = new ArrayList<>();
// determine strong and weak eigenpairs
- for (int i = 0; i < eigenPairs.size(); i++) {
+ for(int i = 0; i < eigenPairs.size(); i++) {
EigenPair eigenPair = eigenPairs.getEigenPair(i);
double eigenValue = Math.abs(eigenPair.getEigenvalue());
- if (eigenValue >= limit) {
+ if(eigenValue >= limit) {
strongEigenPairs.add(eigenPair);
- } else {
+ }
+ else {
weakEigenPairs.add(eigenPair);
}
}
- if (LOG.isDebugging()) {
+ if(LOG.isDebugging()) {
msg.append("\nstrong EigenPairs = ").append(strongEigenPairs);
msg.append("\nweak EigenPairs = ").append(weakEigenPairs);
LOG.debugFine(msg.toString());
@@ -166,16 +167,16 @@ public class LimitEigenPairFilter implements EigenPairFilter {
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
Flag absoluteF = new Flag(EIGENPAIR_FILTER_ABSOLUTE);
- if (config.grab(absoluteF)) {
+ if(config.grab(absoluteF)) {
absolute = absoluteF.isTrue();
}
DoubleParameter deltaP = new DoubleParameter(EIGENPAIR_FILTER_DELTA, DEFAULT_DELTA);
- deltaP.addConstraint(new GreaterEqualConstraint(0));
- if (config.grab(deltaP)) {
+ deltaP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE);
+ if(config.grab(deltaP)) {
delta = deltaP.doubleValue();
// TODO: make this a global constraint?
- if (absolute && deltaP.tookDefaultValue()) {
+ if(absolute && deltaP.tookDefaultValue()) {
config.reportError(new WrongParameterValueException("Illegal parameter setting: " + "Flag " + absoluteF.getName() + " is set, " + "but no value for " + deltaP.getName() + " is specified."));
}
}
@@ -187,10 +188,8 @@ public class LimitEigenPairFilter implements EigenPairFilter {
List<ParameterConstraint<? super Double>> cons = new ArrayList<>();
// TODO: Keep the constraint here - applies to non-conditional case as
// well, and is set above.
- ParameterConstraint<Number> aboveNull = new GreaterEqualConstraint(0.);
- cons.add(aboveNull);
- ParameterConstraint<Number> underOne = new LessEqualConstraint(1.);
- cons.add(underOne);
+ cons.add(CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE);
+ cons.add(CommonConstraints.LESS_EQUAL_ONE_DOUBLE);
GlobalParameterConstraint gpc = new ParameterFlagGlobalConstraint<>(deltaP, cons, absoluteF, false);
config.checkConstraint(gpc);
diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredRunner.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredRunner.java
index 2318b72d..670b4559 100644
--- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredRunner.java
+++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredRunner.java
@@ -32,8 +32,7 @@ import de.lmu.ifi.dbs.elki.math.linearalgebra.EigenvalueDecomposition;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
import de.lmu.ifi.dbs.elki.math.linearalgebra.SortedEigenPairs;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessGlobalConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
@@ -207,20 +206,20 @@ public class PCAFilteredRunner<V extends NumberVector<?>> extends PCARunner<V> {
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
ObjectParameter<EigenPairFilter> filterP = new ObjectParameter<>(PCA_EIGENPAIR_FILTER, EigenPairFilter.class, PercentageEigenPairFilter.class);
- if (config.grab(filterP)) {
+ if(config.grab(filterP)) {
eigenPairFilter = filterP.instantiateClass(config);
}
- DoubleParameter bigP = new DoubleParameter(BIG_ID, 1.0);
- bigP.addConstraint(new GreaterConstraint(0));
- if (config.grab(bigP)) {
+ DoubleParameter bigP = new DoubleParameter(BIG_ID, 1.);
+ bigP.addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE);
+ if(config.grab(bigP)) {
big = bigP.doubleValue();
}
- DoubleParameter smallP = new DoubleParameter(SMALL_ID, 0.0);
- smallP.addConstraint(new GreaterEqualConstraint(0));
- if (config.grab(smallP)) {
+ DoubleParameter smallP = new DoubleParameter(SMALL_ID, 0.);
+ smallP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE);
+ if(config.grab(smallP)) {
small = smallP.doubleValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PercentageEigenPairFilter.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PercentageEigenPairFilter.java
index 85e29867..5602228e 100644
--- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PercentageEigenPairFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PercentageEigenPairFilter.java
@@ -33,8 +33,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
@@ -90,7 +89,7 @@ public class PercentageEigenPairFilter implements EigenPairFilter {
@Override
public FilteredEigenPairs filter(SortedEigenPairs eigenPairs) {
StringBuilder msg = new StringBuilder();
- if (LOG.isDebugging()) {
+ if(LOG.isDebugging()) {
msg.append("alpha = ").append(alpha);
msg.append("\nsortedEigenPairs = ").append(eigenPairs);
}
@@ -101,32 +100,34 @@ public class PercentageEigenPairFilter implements EigenPairFilter {
// determine sum of eigenvalues
double totalSum = 0;
- for (int i = 0; i < eigenPairs.size(); i++) {
+ for(int i = 0; i < eigenPairs.size(); i++) {
EigenPair eigenPair = eigenPairs.getEigenPair(i);
totalSum += eigenPair.getEigenvalue();
}
- if (LOG.isDebugging()) {
+ if(LOG.isDebugging()) {
msg.append("\ntotalSum = ").append(totalSum);
}
// determine strong and weak eigenpairs
double currSum = 0;
boolean found = false;
- for (int i = 0; i < eigenPairs.size(); i++) {
+ for(int i = 0; i < eigenPairs.size(); i++) {
EigenPair eigenPair = eigenPairs.getEigenPair(i);
currSum += eigenPair.getEigenvalue();
- if (currSum / totalSum >= alpha) {
- if (!found) {
+ if(currSum / totalSum >= alpha) {
+ if(!found) {
found = true;
strongEigenPairs.add(eigenPair);
- } else {
+ }
+ else {
weakEigenPairs.add(eigenPair);
}
- } else {
+ }
+ else {
strongEigenPairs.add(eigenPair);
}
}
- if (LOG.isDebugging()) {
+ if(LOG.isDebugging()) {
msg.append("\nstrong EigenPairs = ").append(strongEigenPairs);
msg.append("\nweak EigenPairs = ").append(weakEigenPairs);
LOG.debugFine(msg.toString());
@@ -153,9 +154,9 @@ public class PercentageEigenPairFilter implements EigenPairFilter {
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
DoubleParameter alphaP = new DoubleParameter(ALPHA_ID, DEFAULT_ALPHA);
- alphaP.addConstraint(new GreaterConstraint(0.0));
- alphaP.addConstraint(new LessConstraint(1.0));
- if (config.grab(alphaP)) {
+ alphaP.addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE);
+ alphaP.addConstraint(CommonConstraints.LESS_THAN_ONE_DOUBLE);
+ if(config.grab(alphaP)) {
alpha = alphaP.doubleValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/ProgressiveEigenPairFilter.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/ProgressiveEigenPairFilter.java
index b99de2e1..4f412257 100644
--- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/ProgressiveEigenPairFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/ProgressiveEigenPairFilter.java
@@ -32,9 +32,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
@@ -133,7 +131,7 @@ public class ProgressiveEigenPairFilter implements EigenPairFilter {
// determine sum of eigenvalues
double totalSum = 0;
- for (int i = 0; i < eigenPairs.size(); i++) {
+ for(int i = 0; i < eigenPairs.size(); i++) {
EigenPair eigenPair = eigenPairs.getEigenPair(i);
totalSum += eigenPair.getEigenvalue();
}
@@ -143,35 +141,35 @@ public class ProgressiveEigenPairFilter implements EigenPairFilter {
double currSum = 0;
boolean found = false;
int i;
- for (i = 0; i < eigenPairs.size(); i++) {
+ for(i = 0; i < eigenPairs.size(); i++) {
EigenPair eigenPair = eigenPairs.getEigenPair(i);
// weak Eigenvector?
- if (eigenPair.getEigenvalue() < expectedVariance) {
+ if(eigenPair.getEigenvalue() < expectedVariance) {
break;
}
currSum += eigenPair.getEigenvalue();
// calculate progressive alpha level
double alpha = 1.0 - (1.0 - palpha) * (1.0 - (i + 1) / eigenPairs.size());
- if (currSum / totalSum >= alpha || i == eigenPairs.size() - 1) {
+ if(currSum / totalSum >= alpha || i == eigenPairs.size() - 1) {
found = true;
strongEigenPairs.add(eigenPair);
break;
}
}
// if we didn't hit our alpha level, we consider all vectors to be weak!
- if (!found) {
+ if(!found) {
assert (weakEigenPairs.size() == 0);
weakEigenPairs = strongEigenPairs;
strongEigenPairs = new ArrayList<>();
}
- for (; i < eigenPairs.size(); i++) {
+ for(; i < eigenPairs.size(); i++) {
EigenPair eigenPair = eigenPairs.getEigenPair(i);
weakEigenPairs.add(eigenPair);
}
// the code using this method doesn't expect an empty strong set,
// if we didn't find any strong ones, we make all vectors strong
- if (strongEigenPairs.size() == 0) {
+ if(strongEigenPairs.size() == 0) {
return new FilteredEigenPairs(new ArrayList<EigenPair>(), weakEigenPairs);
}
return new FilteredEigenPairs(weakEigenPairs, strongEigenPairs);
@@ -200,15 +198,15 @@ public class ProgressiveEigenPairFilter implements EigenPairFilter {
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
DoubleParameter palphaP = new DoubleParameter(EIGENPAIR_FILTER_PALPHA, DEFAULT_PALPHA);
- palphaP.addConstraint(new GreaterConstraint(0.0));
- palphaP.addConstraint(new LessConstraint(1.0));
- if (config.grab(palphaP)) {
+ palphaP.addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE);
+ palphaP.addConstraint(CommonConstraints.LESS_THAN_ONE_DOUBLE);
+ if(config.grab(palphaP)) {
palpha = palphaP.getValue();
}
DoubleParameter walphaP = new DoubleParameter(WeakEigenPairFilter.EIGENPAIR_FILTER_WALPHA, DEFAULT_WALPHA);
- walphaP.addConstraint(new GreaterEqualConstraint(0.0));
- if (config.grab(walphaP)) {
+ walphaP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE);
+ if(config.grab(walphaP)) {
walpha = walphaP.getValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/RANSACCovarianceMatrixBuilder.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/RANSACCovarianceMatrixBuilder.java
index 3e9bccf7..2b369a32 100644
--- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/RANSACCovarianceMatrixBuilder.java
+++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/RANSACCovarianceMatrixBuilder.java
@@ -38,7 +38,7 @@ import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
@@ -107,31 +107,31 @@ public class RANSACCovarianceMatrixBuilder<V extends NumberVector<?>> extends Ab
DBIDs best = DBIDUtil.EMPTYDBIDS;
double tresh = ChiSquaredDistribution.quantile(0.85, dim);
- for (int i = 0; i < iterations; i++) {
+ for(int i = 0; i < iterations; i++) {
DBIDs sample = DBIDUtil.randomSample(ids, dim + 1, rnd);
CovarianceMatrix cv = CovarianceMatrix.make(relation, sample);
Vector centroid = cv.getMeanVector();
Matrix p = cv.destroyToSampleMatrix().inverse();
ModifiableDBIDs support = DBIDUtil.newHashSet();
- for (DBIDIter id = ids.iter(); id.valid(); id.advance()) {
+ for(DBIDIter id = ids.iter(); id.valid(); id.advance()) {
Vector vec = relation.get(id).getColumnVector().minusEquals(centroid);
double sqlen = vec.transposeTimesTimes(p, vec);
- if (sqlen < tresh) {
+ if(sqlen < tresh) {
support.add(id);
}
}
- if (support.size() > best.size()) {
+ if(support.size() > best.size()) {
best = support;
}
- if (support.size() >= ids.size()) {
+ if(support.size() >= ids.size()) {
break; // Can't get better than this!
}
}
// logger.warning("Consensus size: "+best.size()+" of "+ids.size());
// Fall back to regular PCA
- if (best.size() <= dim) {
+ if(best.size() <= dim) {
return CovarianceMatrix.make(relation, ids).destroyToSampleMatrix();
}
// Return estimation based on consensus set.
@@ -172,12 +172,12 @@ public class RANSACCovarianceMatrixBuilder<V extends NumberVector<?>> extends Ab
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
IntParameter iterP = new IntParameter(ITER_ID, 1000);
- iterP.addConstraint(new GreaterConstraint(0));
- if (config.grab(iterP)) {
+ iterP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
+ if(config.grab(iterP)) {
iterations = iterP.intValue();
}
RandomParameter rndP = new RandomParameter(SEED_ID);
- if (config.grab(rndP)) {
+ if(config.grab(rndP)) {
rnd = rndP.getValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/RelativeEigenPairFilter.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/RelativeEigenPairFilter.java
index 12da3fdf..32092ce9 100644
--- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/RelativeEigenPairFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/RelativeEigenPairFilter.java
@@ -32,7 +32,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
@@ -93,21 +93,21 @@ public class RelativeEigenPairFilter implements EigenPairFilter {
// find the last eigenvector that is considered 'strong' by the weak rule
// applied to the remaining vectors only
double eigenValueSum = eigenPairs.getEigenPair(eigenPairs.size() - 1).getEigenvalue();
- for (int i = eigenPairs.size() - 2; i >= 0; i--) {
+ for(int i = eigenPairs.size() - 2; i >= 0; i--) {
EigenPair eigenPair = eigenPairs.getEigenPair(i);
eigenValueSum += eigenPair.getEigenvalue();
double needEigenvalue = eigenValueSum / (eigenPairs.size() - i) * ralpha;
- if (eigenPair.getEigenvalue() >= needEigenvalue) {
+ if(eigenPair.getEigenvalue() >= needEigenvalue) {
contrastAtMax = i;
break;
}
}
- for (int i = 0; i <= contrastAtMax /* && i < eigenPairs.size() */; i++) {
+ for(int i = 0; i <= contrastAtMax /* && i < eigenPairs.size() */; i++) {
EigenPair eigenPair = eigenPairs.getEigenPair(i);
strongEigenPairs.add(eigenPair);
}
- for (int i = contrastAtMax + 1; i < eigenPairs.size(); i++) {
+ for(int i = contrastAtMax + 1; i < eigenPairs.size(); i++) {
EigenPair eigenPair = eigenPairs.getEigenPair(i);
weakEigenPairs.add(eigenPair);
}
@@ -129,8 +129,8 @@ public class RelativeEigenPairFilter implements EigenPairFilter {
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
DoubleParameter ralphaP = new DoubleParameter(EIGENPAIR_FILTER_RALPHA, DEFAULT_RALPHA);
- ralphaP.addConstraint(new GreaterEqualConstraint(0.0));
- if (config.grab(ralphaP)) {
+ ralphaP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE);
+ if(config.grab(ralphaP)) {
ralpha = ralphaP.getValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/SignificantEigenPairFilter.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/SignificantEigenPairFilter.java
index 73044b59..6219f77f 100644
--- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/SignificantEigenPairFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/SignificantEigenPairFilter.java
@@ -31,7 +31,7 @@ import de.lmu.ifi.dbs.elki.math.linearalgebra.SortedEigenPairs;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
@@ -87,32 +87,32 @@ public class SignificantEigenPairFilter implements EigenPairFilter {
double maxContrast = 0.0;
// calc the eigenvalue sum.
double eigenValueSum = 0.0;
- for (int i = 0; i < eigenPairs.size(); i++) {
+ for(int i = 0; i < eigenPairs.size(); i++) {
EigenPair eigenPair = eigenPairs.getEigenPair(i);
eigenValueSum += eigenPair.getEigenvalue();
}
double weakEigenvalue = eigenValueSum / eigenPairs.size() * walpha;
// now find the maximum contrast.
double currSum = eigenPairs.getEigenPair(eigenPairs.size() - 1).getEigenvalue();
- for (int i = eigenPairs.size() - 2; i >= 0; i--) {
+ for(int i = eigenPairs.size() - 2; i >= 0; i--) {
EigenPair eigenPair = eigenPairs.getEigenPair(i);
currSum += eigenPair.getEigenvalue();
// weak?
- if (eigenPair.getEigenvalue() < weakEigenvalue) {
+ if(eigenPair.getEigenvalue() < weakEigenvalue) {
continue;
}
double contrast = eigenPair.getEigenvalue() / (currSum / (eigenPairs.size() - i));
- if (contrast > maxContrast) {
+ if(contrast > maxContrast) {
maxContrast = contrast;
contrastMaximum = i;
}
}
- for (int i = 0; i <= contrastMaximum /* && i < eigenPairs.size() */; i++) {
+ for(int i = 0; i <= contrastMaximum /* && i < eigenPairs.size() */; i++) {
EigenPair eigenPair = eigenPairs.getEigenPair(i);
strongEigenPairs.add(eigenPair);
}
- for (int i = contrastMaximum + 1; i < eigenPairs.size(); i++) {
+ for(int i = contrastMaximum + 1; i < eigenPairs.size(); i++) {
EigenPair eigenPair = eigenPairs.getEigenPair(i);
weakEigenPairs.add(eigenPair);
}
@@ -134,8 +134,8 @@ public class SignificantEigenPairFilter implements EigenPairFilter {
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
DoubleParameter walphaP = new DoubleParameter(WeakEigenPairFilter.EIGENPAIR_FILTER_WALPHA, DEFAULT_WALPHA);
- walphaP.addConstraint(new GreaterEqualConstraint(0.0));
- if (config.grab(walphaP)) {
+ walphaP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE);
+ if(config.grab(walphaP)) {
walpha = walphaP.getValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/WeakEigenPairFilter.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/WeakEigenPairFilter.java
index 66bcba30..92e1e32e 100644
--- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/WeakEigenPairFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/WeakEigenPairFilter.java
@@ -32,7 +32,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
@@ -85,25 +85,26 @@ public class WeakEigenPairFilter implements EigenPairFilter {
// determine sum of eigenvalues
double totalSum = 0;
- for (int i = 0; i < eigenPairs.size(); i++) {
+ for(int i = 0; i < eigenPairs.size(); i++) {
EigenPair eigenPair = eigenPairs.getEigenPair(i);
totalSum += eigenPair.getEigenvalue();
}
double expectEigenvalue = totalSum / eigenPairs.size() * walpha;
// determine strong and weak eigenpairs
- for (int i = 0; i < eigenPairs.size(); i++) {
+ for(int i = 0; i < eigenPairs.size(); i++) {
EigenPair eigenPair = eigenPairs.getEigenPair(i);
- if (eigenPair.getEigenvalue() > expectEigenvalue) {
+ if(eigenPair.getEigenvalue() > expectEigenvalue) {
strongEigenPairs.add(eigenPair);
- } else {
+ }
+ else {
weakEigenPairs.add(eigenPair);
}
}
// the code using this method doesn't expect an empty strong set,
// if we didn't find any strong ones, we make all vectors strong
- if (strongEigenPairs.size() == 0) {
+ if(strongEigenPairs.size() == 0) {
return new FilteredEigenPairs(new ArrayList<EigenPair>(), weakEigenPairs);
}
return new FilteredEigenPairs(weakEigenPairs, strongEigenPairs);
@@ -127,8 +128,8 @@ public class WeakEigenPairFilter implements EigenPairFilter {
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
DoubleParameter walphaP = new DoubleParameter(EIGENPAIR_FILTER_WALPHA, DEFAULT_WALPHA);
- walphaP.addConstraint(new GreaterEqualConstraint(0.0));
- if (config.grab(walphaP)) {
+ walphaP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE);
+ if(config.grab(walphaP)) {
walpha = walphaP.getValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/randomprojections/AbstractRandomProjectionFamily.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/randomprojections/AbstractRandomProjectionFamily.java
index 292fb002..e7f8198a 100644
--- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/randomprojections/AbstractRandomProjectionFamily.java
+++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/randomprojections/AbstractRandomProjectionFamily.java
@@ -51,7 +51,7 @@ public abstract class AbstractRandomProjectionFamily implements RandomProjection
*/
public AbstractRandomProjectionFamily(RandomFactory random) {
super();
- this.random = random.getRandom();
+ this.random = random.getSingleThreadedRandom();
}
/**
diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/randomprojections/AchlioptasRandomProjectionFamily.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/randomprojections/AchlioptasRandomProjectionFamily.java
index 50610cf9..c07a9590 100644
--- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/randomprojections/AchlioptasRandomProjectionFamily.java
+++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/randomprojections/AchlioptasRandomProjectionFamily.java
@@ -26,7 +26,7 @@ import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
@@ -61,7 +61,7 @@ public class AchlioptasRandomProjectionFamily extends AbstractRandomProjectionFa
super(random);
this.sparsity = sparsity;
}
-
+
@Override
public Projection generateProjection(int idim, int odim) {
final double pPos = .5 / sparsity;
@@ -69,15 +69,17 @@ public class AchlioptasRandomProjectionFamily extends AbstractRandomProjectionFa
double baseValuePart = Math.sqrt(this.sparsity);
Matrix projectionMatrix = new Matrix(odim, idim);
- for (int i = 0; i < odim; ++i) {
- for (int j = 0; j < idim; ++j) {
+ for(int i = 0; i < odim; ++i) {
+ for(int j = 0; j < idim; ++j) {
final double r = random.nextDouble();
final double value;
- if (r < pPos) {
+ if(r < pPos) {
value = baseValuePart;
- } else if (r < pNeg) {
+ }
+ else if(r < pNeg) {
value = -baseValuePart;
- } else {
+ }
+ else {
value = 0.;
}
@@ -110,8 +112,8 @@ public class AchlioptasRandomProjectionFamily extends AbstractRandomProjectionFa
super.makeOptions(config);
DoubleParameter sparsP = new DoubleParameter(SPARSITY_ID);
sparsP.setDefaultValue(3.);
- sparsP.addConstraint(new GreaterEqualConstraint(1.));
- if (config.grab(sparsP)) {
+ sparsP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_DOUBLE);
+ if(config.grab(sparsP)) {
sparsity = sparsP.doubleValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/randomprojections/RandomSubsetProjectionFamily.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/randomprojections/RandomSubsetProjectionFamily.java
index d5192086..94a4a29c 100644
--- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/randomprojections/RandomSubsetProjectionFamily.java
+++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/randomprojections/RandomSubsetProjectionFamily.java
@@ -101,7 +101,7 @@ public class RandomSubsetProjectionFamily extends AbstractRandomProjectionFamily
*
* FIXME: move to shared code.
*
- * @param existing Existing array
+ * @param out Existing output array
* @param random Random generator.
* @return Same array.
*/
diff --git a/src/de/lmu/ifi/dbs/elki/math/scales/Scales.java b/src/de/lmu/ifi/dbs/elki/math/scales/Scales.java
index 67a4e748..bf258d99 100644
--- a/src/de/lmu/ifi/dbs/elki/math/scales/Scales.java
+++ b/src/de/lmu/ifi/dbs/elki/math/scales/Scales.java
@@ -67,7 +67,11 @@ public final class Scales {
for (DBIDIter iditer = db.iterDBIDs(); iditer.valid(); iditer.advance()) {
O v = db.get(iditer);
for (int d = 0; d < dim; d++) {
- minmax[d].put(v.doubleValue(d));
+ final double val = v.doubleValue(d);
+ if(val != val) {
+ continue; // NaN
+ }
+ minmax[d].put(val);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/KernelDensityEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/KernelDensityEstimator.java
index 72c50869..de756520 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/KernelDensityEstimator.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/KernelDensityEstimator.java
@@ -34,7 +34,7 @@ import de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions.KernelDensityFunction
*
* @author Erich Schubert
*
- * @apiviz.uses de.lmu.ifi.dbs.elki.math.statistics.KernelDensityFunction
+ * @apiviz.uses KernelDensityFunction
*/
public class KernelDensityEstimator {
/**
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/MultipleLinearRegression.java b/src/de/lmu/ifi/dbs/elki/math/statistics/MultipleLinearRegression.java
index 0d37ca67..37718358 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/MultipleLinearRegression.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/MultipleLinearRegression.java
@@ -123,7 +123,8 @@ public class MultipleLinearRegression {
// sum of square totals: sst
sum = 0;
for(int i = 0; i < y.getDimensionality(); i++) {
- sum += Math.pow((y.get(i) - y_mean), 2);
+ final double diff = y.get(i) - y_mean;
+ sum += diff * diff;
}
sst = sum;
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/PolynomialRegression.java b/src/de/lmu/ifi/dbs/elki/math/statistics/PolynomialRegression.java
index 12dfe28c..c478142a 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/PolynomialRegression.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/PolynomialRegression.java
@@ -23,6 +23,7 @@ package de.lmu.ifi.dbs.elki.math.statistics;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+import de.lmu.ifi.dbs.elki.math.MathUtil;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
@@ -67,7 +68,7 @@ public class PolynomialRegression extends MultipleLinearRegression {
Matrix result = new Matrix(n, p + 1);
for(int i = 0; i < n; i++) {
for(int j = 0; j < p + 1; j++) {
- result.set(i, j, Math.pow(x.get(i), j));
+ result.set(i, j, MathUtil.powi(x.get(i), j));
}
}
return result;
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/ProbabilityWeightedMoments.java b/src/de/lmu/ifi/dbs/elki/math/statistics/ProbabilityWeightedMoments.java
index c783f5f1..b74ba06a 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/ProbabilityWeightedMoments.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/ProbabilityWeightedMoments.java
@@ -52,7 +52,8 @@ public class ProbabilityWeightedMoments {
* Compute the alpha_r factors using the method of probability-weighted
* moments.
*
- * @param sorted <b>Presorted</b> data array.
+ * @param data <b>Presorted</b> data array.
+ * @param adapter Array adapter.
* @param nmom Number of moments to compute
* @return Alpha moments (0-indexed)
*/
@@ -75,7 +76,8 @@ public class ProbabilityWeightedMoments {
* Compute the beta_r factors using the method of probability-weighted
* moments.
*
- * @param sorted <b>Presorted</b> data array.
+ * @param data <b>Presorted</b> data array.
+ * @param adapter Array adapter.
* @param nmom Number of moments to compute
* @return Beta moments (0-indexed)
*/
@@ -99,7 +101,8 @@ public class ProbabilityWeightedMoments {
* probability-weighted moments. Usually cheaper than computing them
* separately.
*
- * @param sorted <b>Presorted</b> data array.
+ * @param data <b>Presorted</b> data array.
+ * @param adapter Array adapter.
* @param nmom Number of moments to compute
* @return Alpha and Beta moments (0-indexed, interleaved)
*/
@@ -125,6 +128,7 @@ public class ProbabilityWeightedMoments {
* Compute the sample L-Moments using probability weighted moments.
*
* @param sorted <b>Presorted</b> data array.
+ * @param adapter Array adapter.
* @param nmom Number of moments to compute
* @return Array containing Lambda1, Lambda2, Tau3 ... TauN
*/
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/AbstractDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/AbstractDistribution.java
new file mode 100644
index 00000000..30481d00
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/AbstractDistribution.java
@@ -0,0 +1,115 @@
+package de.lmu.ifi.dbs.elki.math.statistics.distribution;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.Random;
+
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
+
+/**
+ * Abstract base class for distributions.
+ *
+ * @author Erich Schubert
+ */
+public abstract class AbstractDistribution implements Distribution {
+ /**
+ * Random source.
+ */
+ final protected Random random;
+
+ /**
+ * Constructor.
+ *
+ * @param rnd Random source
+ */
+ public AbstractDistribution(RandomFactory rnd) {
+ super();
+ this.random = rnd.getRandom();
+ }
+
+ /**
+ * Constructor.
+ *
+ * @param rnd Random source
+ */
+ public AbstractDistribution(Random rnd) {
+ super();
+ this.random = rnd;
+ }
+
+ @Override
+ public double nextRandom() {
+ return quantile(random.nextDouble());
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public abstract static class Parameterizer extends AbstractParameterizer {
+ /**
+ * Parameter to specify the random seeding source.
+ */
+ public static final OptionID RANDOM_ID = new OptionID("distribution.random", "Random generation data source.");
+
+ /**
+ * Location parameter.
+ */
+ public static final OptionID LOCATION_ID = new OptionID("distribution.location", "Distribution location parameter");
+
+ /**
+ * Scale parameter.
+ */
+ public static final OptionID SCALE_ID = new OptionID("distribution.scale", "Distribution scale parameter");
+
+ /**
+ * Shape parameter.
+ */
+ public static final OptionID SHAPE_ID = new OptionID("distribution.shape", "Distribution shape parameter");
+
+ /**
+ * Random source.
+ */
+ RandomFactory rnd;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ RandomParameter randomP = new RandomParameter(RANDOM_ID);
+ if (config.grab(randomP)) {
+ rnd = randomP.getValue();
+ }
+ }
+
+ @Override
+ abstract protected Distribution makeInstance();
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/BetaDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/BetaDistribution.java
index c48583ea..04d55262 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/BetaDistribution.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/BetaDistribution.java
@@ -2,7 +2,11 @@ package de.lmu.ifi.dbs.elki.math.statistics.distribution;
import java.util.Random;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
/*
This file is part of ELKI:
@@ -34,7 +38,7 @@ import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
* @author Jan Brusis
* @author Erich Schubert
*/
-public class BetaDistribution implements Distribution {
+public class BetaDistribution extends AbstractDistribution {
/**
* Numerical precision to use
*/
@@ -66,11 +70,6 @@ public class BetaDistribution implements Distribution {
private final double beta;
/**
- * For random number generation
- */
- private Random random;
-
- /**
* Log beta(a, b) cache
*/
private double logbab;
@@ -82,7 +81,7 @@ public class BetaDistribution implements Distribution {
* @param b shape Parameter b
*/
public BetaDistribution(double a, double b) {
- this(a, b, new Random());
+ this(a, b, (Random) null);
}
/**
@@ -93,7 +92,25 @@ public class BetaDistribution implements Distribution {
* @param random Random generator
*/
public BetaDistribution(double a, double b, Random random) {
- super();
+ super(random);
+ if (a <= 0.0 || b <= 0.0) {
+ throw new IllegalArgumentException("Invalid parameters for Beta distribution.");
+ }
+
+ this.alpha = a;
+ this.beta = b;
+ this.logbab = logBeta(a, b);
+ }
+
+ /**
+ * Constructor.
+ *
+ * @param a shape Parameter a
+ * @param b shape Parameter b
+ * @param random Random generator
+ */
+ public BetaDistribution(double a, double b, RandomFactory random) {
+ super(random);
if (a <= 0.0 || b <= 0.0) {
throw new IllegalArgumentException("Invalid parameters for Beta distribution.");
}
@@ -101,7 +118,6 @@ public class BetaDistribution implements Distribution {
this.alpha = a;
this.beta = b;
this.logbab = logBeta(a, b);
- this.random = random;
}
@Override
@@ -457,7 +473,8 @@ public class BetaDistribution implements Distribution {
} else {
double r = beta + beta;
double t = 1. / (9. * beta);
- t = r * Math.pow(1. - t + y * Math.sqrt(t), 3.0);
+ final double a = 1. - t + y * Math.sqrt(t);
+ t = r * a * a * a;
if (t <= 0.) {
x = 1. - Math.exp((Math.log1p(-p) + Math.log(beta) + logbeta) / beta);
} else {
@@ -525,4 +542,46 @@ public class BetaDistribution implements Distribution {
// Not converged in Newton-Raphson
throw new AbortException("Beta quantile computation did not converge.");
}
+
+ /**
+ * Parameterization class
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractDistribution.Parameterizer {
+ /**
+ * Alpha parameter.
+ */
+ public static final OptionID ALPHA_ID = new OptionID("distribution.beta.alpha", "Beta distribution alpha parameter");
+
+ /**
+ * Beta parameter.
+ */
+ public static final OptionID BETA_ID = new OptionID("distribution.beta.beta", "Beta distribution beta parameter");
+
+ /** Parameters. */
+ double alpha, beta;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ DoubleParameter alphaP = new DoubleParameter(ALPHA_ID);
+ if (config.grab(alphaP)) {
+ alpha = alphaP.doubleValue();
+ }
+
+ DoubleParameter betaP = new DoubleParameter(BETA_ID);
+ if (config.grab(betaP)) {
+ beta = betaP.doubleValue();
+ }
+ }
+
+ @Override
+ protected BetaDistribution makeInstance() {
+ return new BetaDistribution(alpha, beta, rnd);
+ }
+ }
}
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/CauchyDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/CauchyDistribution.java
index c218a37f..2b5d4949 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/CauchyDistribution.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/CauchyDistribution.java
@@ -25,12 +25,17 @@ package de.lmu.ifi.dbs.elki.math.statistics.distribution;
import java.util.Random;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+
/**
* Cauchy distribution.
*
* @author Erich Schubert
*/
-public class CauchyDistribution implements Distribution {
+public class CauchyDistribution extends AbstractDistribution {
/**
* The location (x0) parameter.
*/
@@ -42,18 +47,13 @@ public class CauchyDistribution implements Distribution {
final double shape;
/**
- * The random generator.
- */
- private Random random;
-
- /**
* Constructor with default random.
*
* @param location Location (x0)
* @param shape Shape (gamma)
*/
public CauchyDistribution(double location, double shape) {
- this(location, shape, new Random());
+ this(location, shape, (Random) null);
}
/**
@@ -64,10 +64,22 @@ public class CauchyDistribution implements Distribution {
* @param random Random generator
*/
public CauchyDistribution(double location, double shape, Random random) {
- super();
+ super(random);
+ this.location = location;
+ this.shape = shape;
+ }
+
+ /**
+ * Constructor.
+ *
+ * @param location Location (x0)
+ * @param shape Shape (gamma)
+ * @param random Random generator
+ */
+ public CauchyDistribution(double location, double shape, RandomFactory random) {
+ super(random);
this.location = location;
this.shape = shape;
- this.random = random;
}
@Override
@@ -132,4 +144,41 @@ public class CauchyDistribution implements Distribution {
public String toString() {
return "CauchyDistribution(location=" + location + ", shape=" + shape + ")";
}
+
+ /**
+ * Parameterization class
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractDistribution.Parameterizer {
+ /**
+ * Shape parameter gamma.
+ */
+ public static final OptionID SHAPE_ID = new OptionID("distribution.cauchy.shape", "Cauchy distribution gamma/shape parameter.");
+
+ /** Parameters. */
+ double location, shape;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ DoubleParameter locP = new DoubleParameter(LOCATION_ID);
+ if (config.grab(locP)) {
+ location = locP.doubleValue();
+ }
+
+ DoubleParameter shapeP = new DoubleParameter(SHAPE_ID);
+ if (config.grab(shapeP)) {
+ shape = shapeP.doubleValue();
+ }
+ }
+
+ @Override
+ protected CauchyDistribution makeInstance() {
+ return new CauchyDistribution(location, shape, rnd);
+ }
+ }
}
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiDistribution.java
index a552e413..5f144946 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiDistribution.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiDistribution.java
@@ -1,8 +1,5 @@
package de.lmu.ifi.dbs.elki.math.statistics.distribution;
-import de.lmu.ifi.dbs.elki.utilities.exceptions.ExceptionMessages;
-import de.lmu.ifi.dbs.elki.utilities.exceptions.NotImplementedException;
-
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
@@ -26,6 +23,14 @@ import de.lmu.ifi.dbs.elki.utilities.exceptions.NotImplementedException;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+import java.util.Random;
+
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.ExceptionMessages;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.NotImplementedException;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+
/**
* Chi distribution.
*
@@ -33,7 +38,7 @@ import de.lmu.ifi.dbs.elki.utilities.exceptions.NotImplementedException;
*
* @apiviz.composedOf ChiSquaredDistribution
*/
-public class ChiDistribution implements Distribution {
+public class ChiDistribution extends AbstractDistribution {
/**
* Degrees of freedom. Usually integer.
*/
@@ -50,9 +55,31 @@ public class ChiDistribution implements Distribution {
* @param dof Degrees of freedom. Usually integer.
*/
public ChiDistribution(double dof) {
- super();
+ this(dof, (Random) null);
+ }
+
+ /**
+ * Constructor.
+ *
+ * @param dof Degrees of freedom. Usually integer.
+ * @param random Random number generator.
+ */
+ public ChiDistribution(double dof, Random random) {
+ super(random);
this.dof = dof;
- this.chisq = new ChiSquaredDistribution(dof);
+ this.chisq = new ChiSquaredDistribution(dof, random);
+ }
+
+ /**
+ * Constructor.
+ *
+ * @param dof Degrees of freedom. Usually integer.
+ * @param random Random number generator.
+ */
+ public ChiDistribution(double dof, RandomFactory random) {
+ super(random);
+ this.dof = dof;
+ this.chisq = new ChiSquaredDistribution(dof, random);
}
@Override
@@ -73,7 +100,7 @@ public class ChiDistribution implements Distribution {
* @return Pdf value
*/
public static double pdf(double val, double dof) {
- if(val < 0) {
+ if (val < 0) {
return 0.0;
}
return Math.sqrt(ChiSquaredDistribution.pdf(val, dof));
@@ -105,4 +132,31 @@ public class ChiDistribution implements Distribution {
public String toString() {
return "ChiDistribution(dof=" + dof + ")";
}
-} \ No newline at end of file
+
+ /**
+ * Parameterization class
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractDistribution.Parameterizer {
+ /** Parameters. */
+ double dof;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ DoubleParameter dofP = new DoubleParameter(ChiSquaredDistribution.Parameterizer.DOF_ID);
+ if (config.grab(dofP)) {
+ dof = dofP.doubleValue();
+ }
+ }
+
+ @Override
+ protected ChiDistribution makeInstance() {
+ return new ChiDistribution(dof, rnd);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiSquaredDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiSquaredDistribution.java
index 235367cd..6fd432b2 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiSquaredDistribution.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiSquaredDistribution.java
@@ -1,7 +1,5 @@
package de.lmu.ifi.dbs.elki.math.statistics.distribution;
-import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
-
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
@@ -24,6 +22,13 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+import java.util.Random;
+
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
/**
* Chi-Squared distribution (a specialization of the Gamma distribution).
@@ -37,7 +42,27 @@ public class ChiSquaredDistribution extends GammaDistribution {
* @param dof Degrees of freedom.
*/
public ChiSquaredDistribution(double dof) {
- super(.5 * dof, .5);
+ this(dof, (Random) null);
+ }
+
+ /**
+ * Constructor.
+ *
+ * @param dof Degrees of freedom.
+ * @param random Random generator.
+ */
+ public ChiSquaredDistribution(double dof, Random random) {
+ super(.5 * dof, .5, random);
+ }
+
+ /**
+ * Constructor.
+ *
+ * @param dof Degrees of freedom.
+ * @param random Random generator.
+ */
+ public ChiSquaredDistribution(double dof, RandomFactory random) {
+ super(.5 * dof, .5, random);
}
/**
@@ -95,4 +120,36 @@ public class ChiSquaredDistribution extends GammaDistribution {
public String toString() {
return "ChiSquaredDistribution(dof=" + (2 * getK()) + ")";
}
+
+ /**
+ * Parameterization class
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractDistribution.Parameterizer {
+ /**
+ * Degrees of freedom parameter.
+ */
+ public static final OptionID DOF_ID = new OptionID("distribution.chi.dof", "Chi distribution degrees of freedom parameter.");
+
+ /** Parameters. */
+ double dof;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ DoubleParameter dofP = new DoubleParameter(DOF_ID);
+ if (config.grab(dofP)) {
+ dof = dofP.doubleValue();
+ }
+ }
+
+ @Override
+ protected ChiSquaredDistribution makeInstance() {
+ return new ChiSquaredDistribution(dof, rnd);
+ }
+ }
}
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ConstantDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ConstantDistribution.java
index 35d5294f..b31eaa3d 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ConstantDistribution.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ConstantDistribution.java
@@ -1,5 +1,10 @@
package de.lmu.ifi.dbs.elki.math.statistics.distribution;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
@@ -56,11 +61,43 @@ public class ConstantDistribution implements Distribution {
@Override
public double cdf(double val) {
- return (val >= c) ? 1.0 : 0.0;
+ return (c < val) ? 0. : (c > val) ? 1. : .5;
}
@Override
public double quantile(double val) {
return c;
}
-} \ No newline at end of file
+
+ /**
+ * Parameterization class
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ /**
+ * Constant value parameter
+ */
+ public static final OptionID CONSTANT_ID = new OptionID("distribution.constant", "Constant value.");
+
+ /** Parameters. */
+ double constant;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ DoubleParameter constP = new DoubleParameter(CONSTANT_ID);
+ if (config.grab(constP)) {
+ constant = constP.doubleValue();
+ }
+ }
+
+ @Override
+ protected ConstantDistribution makeInstance() {
+ return new ConstantDistribution(constant);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/Distribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/Distribution.java
index 519ba0b3..0c60e02f 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/Distribution.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/Distribution.java
@@ -23,12 +23,14 @@ package de.lmu.ifi.dbs.elki.math.statistics.distribution;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.Parameterizable;
+
/**
* Statistical distributions, with their common functions.
*
* @author Erich Schubert
*/
-public interface Distribution {
+public interface Distribution extends Parameterizable {
/**
* Return the density of an existing value
*
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ExponentialDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ExponentialDistribution.java
index e5af3e5b..33d8e853 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ExponentialDistribution.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ExponentialDistribution.java
@@ -25,17 +25,17 @@ package de.lmu.ifi.dbs.elki.math.statistics.distribution;
import java.util.Random;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+
/**
* Exponential distribution.
*
* @author Erich Schubert
*/
-public class ExponentialDistribution implements Distribution {
- /**
- * Random generator.
- */
- Random rnd;
-
+public class ExponentialDistribution extends AbstractDistribution {
/**
* Rate, inverse of mean
*/
@@ -52,7 +52,7 @@ public class ExponentialDistribution implements Distribution {
* @param rate Rate parameter (1/scale)
*/
public ExponentialDistribution(double rate) {
- this(rate, 0.0, null);
+ this(rate, 0.0, (Random) null);
}
/**
@@ -62,7 +62,7 @@ public class ExponentialDistribution implements Distribution {
* @param location Location parameter
*/
public ExponentialDistribution(double rate, double location) {
- this(rate, location, null);
+ this(rate, location, (Random) null);
}
/**
@@ -83,10 +83,22 @@ public class ExponentialDistribution implements Distribution {
* @param random Random generator
*/
public ExponentialDistribution(double rate, double location, Random random) {
- super();
+ super(random);
+ this.rate = rate;
+ this.location = location;
+ }
+
+ /**
+ * Constructor.
+ *
+ * @param rate Rate parameter (1/scale)
+ * @param location Location parameter
+ * @param random Random generator
+ */
+ public ExponentialDistribution(double rate, double location, RandomFactory random) {
+ super(random);
this.rate = rate;
this.location = location;
- this.rnd = random;
}
@Override
@@ -160,11 +172,48 @@ public class ExponentialDistribution implements Distribution {
*/
@Override
public double nextRandom() {
- return -Math.log(rnd.nextDouble()) / rate + location;
+ return -Math.log(random.nextDouble()) / rate + location;
}
@Override
public String toString() {
return "ExponentialDistribution(rate=" + rate + ", location=" + location + ")";
}
+
+ /**
+ * Parameterization class
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractDistribution.Parameterizer {
+ /**
+ * Shape parameter gamma.
+ */
+ public static final OptionID RATE_ID = new OptionID("distribution.exponential.rate", "Exponential distribution rate (lambda) parameter (inverse of scale).");
+
+ /** Parameters. */
+ double location, rate;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ DoubleParameter locP = new DoubleParameter(LOCATION_ID);
+ if (config.grab(locP)) {
+ location = locP.doubleValue();
+ }
+
+ DoubleParameter rateP = new DoubleParameter(RATE_ID);
+ if (config.grab(rateP)) {
+ rate = rateP.doubleValue();
+ }
+ }
+
+ @Override
+ protected ExponentialDistribution makeInstance() {
+ return new ExponentialDistribution(rate, location, rnd);
+ }
+ }
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ExponentiallyModifiedGaussianDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ExponentiallyModifiedGaussianDistribution.java
index 01e91777..22e75e3d 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ExponentiallyModifiedGaussianDistribution.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ExponentiallyModifiedGaussianDistribution.java
@@ -25,9 +25,13 @@ package de.lmu.ifi.dbs.elki.math.statistics.distribution;
import java.util.Random;
+import de.lmu.ifi.dbs.elki.math.MathUtil;
import de.lmu.ifi.dbs.elki.utilities.Alias;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
import de.lmu.ifi.dbs.elki.utilities.exceptions.ExceptionMessages;
import de.lmu.ifi.dbs.elki.utilities.exceptions.NotImplementedException;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
/**
* Exponentially modified Gaussian (EMG) distribution (ExGaussian distribution)
@@ -36,7 +40,7 @@ import de.lmu.ifi.dbs.elki.utilities.exceptions.NotImplementedException;
* @author Erich Schubert
*/
@Alias({ "exgaussian" })
-public class ExponentiallyModifiedGaussianDistribution implements Distribution {
+public class ExponentiallyModifiedGaussianDistribution extends AbstractDistribution {
/**
* Mean value for the generator
*/
@@ -53,9 +57,19 @@ public class ExponentiallyModifiedGaussianDistribution implements Distribution {
private double lambda;
/**
- * Random generator.
+ * Constructor for ExGaussian distribution
+ *
+ * @param mean Mean
+ * @param stddev Standard Deviation
+ * @param lambda Rate
+ * @param random Random
*/
- private Random rnd;
+ public ExponentiallyModifiedGaussianDistribution(double mean, double stddev, double lambda, Random random) {
+ super(random);
+ this.mean = mean;
+ this.stddev = stddev;
+ this.lambda = lambda;
+ }
/**
* Constructor for ExGaussian distribution
@@ -63,14 +77,13 @@ public class ExponentiallyModifiedGaussianDistribution implements Distribution {
* @param mean Mean
* @param stddev Standard Deviation
* @param lambda Rate
- * @param rnd Random
+ * @param random Random
*/
- public ExponentiallyModifiedGaussianDistribution(double mean, double stddev, double lambda, Random rnd) {
- super();
+ public ExponentiallyModifiedGaussianDistribution(double mean, double stddev, double lambda, RandomFactory random) {
+ super(random);
this.mean = mean;
this.stddev = stddev;
this.lambda = lambda;
- this.rnd = rnd;
}
/**
@@ -81,7 +94,7 @@ public class ExponentiallyModifiedGaussianDistribution implements Distribution {
* @param lambda Rate
*/
public ExponentiallyModifiedGaussianDistribution(double mean, double stddev, double lambda) {
- this(mean, stddev, lambda, null);
+ this(mean, stddev, lambda, (Random) null);
}
@Override
@@ -105,8 +118,8 @@ public class ExponentiallyModifiedGaussianDistribution implements Distribution {
@Override
public double nextRandom() {
- double no = mean + rnd.nextGaussian() * stddev;
- double ex = -Math.log(rnd.nextDouble()) / lambda;
+ double no = mean + random.nextGaussian() * stddev;
+ double ex = -Math.log(random.nextDouble()) / lambda;
return no + ex;
}
@@ -147,8 +160,9 @@ public class ExponentiallyModifiedGaussianDistribution implements Distribution {
*/
public static double pdf(double x, double mu, double sigma, double lambda) {
final double dx = x - mu;
- final double erfc = NormalDistribution.erfc(lambda * sigma * sigma - dx);
- return .5 * lambda * Math.exp(lambda * (lambda * sigma * sigma * .5 - dx)) * erfc;
+ final double lss = lambda * sigma * sigma;
+ final double erfc = NormalDistribution.erfc((lss - dx) / (sigma * MathUtil.SQRT2));
+ return .5 * lambda * Math.exp(lambda * (lss * .5 - dx)) * erfc;
}
/**
@@ -185,4 +199,41 @@ public class ExponentiallyModifiedGaussianDistribution implements Distribution {
// FIXME: implement!
throw new NotImplementedException(ExceptionMessages.UNSUPPORTED_NOT_YET);
}
+
+ /**
+ * Parameterization class
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractDistribution.Parameterizer {
+ /** Parameters. */
+ double mean, stddev, lambda;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ DoubleParameter locP = new DoubleParameter(LOCATION_ID);
+ if(config.grab(locP)) {
+ mean = locP.doubleValue();
+ }
+
+ DoubleParameter scaleP = new DoubleParameter(SCALE_ID);
+ if(config.grab(scaleP)) {
+ stddev = scaleP.doubleValue();
+ }
+
+ DoubleParameter rateP = new DoubleParameter(ExponentialDistribution.Parameterizer.RATE_ID);
+ if(config.grab(rateP)) {
+ lambda = rateP.doubleValue();
+ }
+ }
+
+ @Override
+ protected ExponentiallyModifiedGaussianDistribution makeInstance() {
+ return new ExponentiallyModifiedGaussianDistribution(mean, stddev, lambda, rnd);
+ }
+ }
}
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GammaDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GammaDistribution.java
index 1b9e2b42..850b0e3a 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GammaDistribution.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GammaDistribution.java
@@ -27,14 +27,18 @@ import java.util.Random;
import de.lmu.ifi.dbs.elki.logging.LoggingUtil;
import de.lmu.ifi.dbs.elki.math.MathUtil;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
/**
* Gamma Distribution, with random generation and density functions.
*
* @author Erich Schubert
*/
-public class GammaDistribution implements Distribution {
+public class GammaDistribution extends AbstractDistribution {
/**
* Euler–Mascheroni constant
*/
@@ -77,9 +81,21 @@ public class GammaDistribution implements Distribution {
private final double theta;
/**
- * The random generator.
+ * Constructor for Gamma distribution.
+ *
+ * @param k k, alpha aka. "shape" parameter
+ * @param theta Theta = 1.0/Beta aka. "scaling" parameter
+ * @param random Random generator
*/
- private Random random;
+ public GammaDistribution(double k, double theta, Random random) {
+ super(random);
+ if (!(k > 0.0) || !(theta > 0.0)) { // Note: also tests for NaNs!
+ throw new IllegalArgumentException("Invalid parameters for Gamma distribution: " + k + " " + theta);
+ }
+
+ this.k = k;
+ this.theta = theta;
+ }
/**
* Constructor for Gamma distribution.
@@ -88,15 +104,14 @@ public class GammaDistribution implements Distribution {
* @param theta Theta = 1.0/Beta aka. "scaling" parameter
* @param random Random generator
*/
- public GammaDistribution(double k, double theta, Random random) {
- super();
+ public GammaDistribution(double k, double theta, RandomFactory random) {
+ super(random);
if (!(k > 0.0) || !(theta > 0.0)) { // Note: also tests for NaNs!
throw new IllegalArgumentException("Invalid parameters for Gamma distribution: " + k + " " + theta);
}
this.k = k;
this.theta = theta;
- this.random = random;
}
/**
@@ -106,7 +121,7 @@ public class GammaDistribution implements Distribution {
* @param theta Theta = 1.0/Beta aka. "scaling" parameter
*/
public GammaDistribution(double k, double theta) {
- this(k, theta, new Random());
+ this(k, theta, (Random) null);
}
@Override
@@ -631,12 +646,13 @@ public class GammaDistribution implements Distribution {
// (Math.log(alpha) + g);
// return Math.exp((lgam1pa + logp) / alpha + MathUtil.LOG2);
// This is literal AS 91, above is the GNU R variant.
- return Math.pow(p * k * Math.exp(g + k * MathUtil.LOG2), 1 / k);
+ return Math.pow(p * k * Math.exp(g + k * MathUtil.LOG2), 1. / k);
} else if (nu > 0.32) {
// Wilson and Hilferty estimate: - AS 91 at 3
final double x = NormalDistribution.quantile(p, 0, 1);
final double p1 = 2. / (9. * nu);
- double ch = nu * Math.pow(x * Math.sqrt(p1) + 1 - p1, 3);
+ final double a = x * Math.sqrt(p1) + 1 - p1;
+ double ch = nu * a * a * a;
// Better approximation for p tending to 1:
if (ch > 2.2 * nu + 6) {
@@ -890,4 +906,48 @@ public class GammaDistribution implements Distribution {
return trigamma(x + 1.) - 1. / (x * x);
}
}
+
+ /**
+ * Parameterization class
+ *
+ * TODO: allow alternate parameterization, with alpha+beta?
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractDistribution.Parameterizer {
+ /**
+ * K parameter.
+ */
+ public static final OptionID K_ID = new OptionID("distribution.gamma.k", "Gamma distribution k = alpha parameter.");
+
+ /**
+ * Theta parameter.
+ */
+ public static final OptionID THETA_ID = new OptionID("distribution.gamma.theta", "Gamma distribution theta = 1/beta parameter.");
+
+ /** Parameters. */
+ double k, theta;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ DoubleParameter kP = new DoubleParameter(K_ID);
+ if (config.grab(kP)) {
+ k = kP.doubleValue();
+ }
+
+ DoubleParameter thetaP = new DoubleParameter(THETA_ID);
+ if (config.grab(thetaP)) {
+ theta = thetaP.doubleValue();
+ }
+ }
+
+ @Override
+ protected GammaDistribution makeInstance() {
+ return new GammaDistribution(k, theta, rnd);
+ }
+ }
}
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GeneralizedExtremeValueDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GeneralizedExtremeValueDistribution.java
index 9cd6cb4e..0d3fd4f8 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GeneralizedExtremeValueDistribution.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GeneralizedExtremeValueDistribution.java
@@ -1,7 +1,5 @@
package de.lmu.ifi.dbs.elki.math.statistics.distribution;
-import java.util.Random;
-
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
@@ -25,6 +23,12 @@ import java.util.Random;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+import java.util.Random;
+
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+
/**
* Generalized Extreme Value (GEV) distribution, also known as Fisher–Tippett
* distribution.
@@ -37,16 +41,22 @@ import java.util.Random;
*
* @author Erich Schubert
*/
-public class GeneralizedExtremeValueDistribution implements Distribution {
+public class GeneralizedExtremeValueDistribution extends AbstractDistribution {
/**
* Parameters (location, scale, shape)
*/
final double mu, sigma, k;
/**
- * Random number generator.
+ * Constructor.
+ *
+ * @param mu Location parameter mu
+ * @param sigma Scale parameter sigma
+ * @param k Shape parameter k
*/
- Random random;
+ public GeneralizedExtremeValueDistribution(double mu, double sigma, double k) {
+ this(mu, sigma, k, (Random) null);
+ }
/**
* Constructor.
@@ -54,9 +64,13 @@ public class GeneralizedExtremeValueDistribution implements Distribution {
* @param mu Location parameter mu
* @param sigma Scale parameter sigma
* @param k Shape parameter k
+ * @param random Random number generator
*/
- public GeneralizedExtremeValueDistribution(double mu, double sigma, double k) {
- this(mu, sigma, k, null);
+ public GeneralizedExtremeValueDistribution(double mu, double sigma, double k, RandomFactory random) {
+ super(random);
+ this.mu = mu;
+ this.sigma = sigma;
+ this.k = k;
}
/**
@@ -68,11 +82,10 @@ public class GeneralizedExtremeValueDistribution implements Distribution {
* @param random Random number generator
*/
public GeneralizedExtremeValueDistribution(double mu, double sigma, double k, Random random) {
- super();
+ super(random);
this.mu = mu;
this.sigma = sigma;
this.k = k;
- this.random = random;
}
/**
@@ -156,12 +169,44 @@ public class GeneralizedExtremeValueDistribution implements Distribution {
}
@Override
- public double nextRandom() {
- return quantile(random.nextDouble());
- }
-
- @Override
public String toString() {
return "GeneralizedExtremeValueDistribution(sigma=" + sigma + ", mu=" + mu + ", k=" + k + ")";
}
+
+ /**
+ * Parameterization class
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractDistribution.Parameterizer {
+ /** Parameters. */
+ double mu, sigma, k;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ DoubleParameter muP = new DoubleParameter(LOCATION_ID);
+ if (config.grab(muP)) {
+ mu = muP.doubleValue();
+ }
+
+ DoubleParameter sigmaP = new DoubleParameter(SCALE_ID);
+ if (config.grab(sigmaP)) {
+ sigma = sigmaP.doubleValue();
+ }
+
+ DoubleParameter kP = new DoubleParameter(SHAPE_ID);
+ if (config.grab(kP)) {
+ k = kP.doubleValue();
+ }
+ }
+
+ @Override
+ protected GeneralizedExtremeValueDistribution makeInstance() {
+ return new GeneralizedExtremeValueDistribution(mu, sigma, k, rnd);
+ }
+ }
}
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GeneralizedLogisticAlternateDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GeneralizedLogisticAlternateDistribution.java
index 467d6aae..eb5e1b1a 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GeneralizedLogisticAlternateDistribution.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GeneralizedLogisticAlternateDistribution.java
@@ -24,6 +24,10 @@ package de.lmu.ifi.dbs.elki.math.statistics.distribution;
*/
import java.util.Random;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+
/**
* Generalized logistic distribution.
*
@@ -33,7 +37,7 @@ import java.util.Random;
*
* @author Erich Schubert
*/
-public class GeneralizedLogisticAlternateDistribution implements Distribution {
+public class GeneralizedLogisticAlternateDistribution extends AbstractDistribution {
/**
* Parameters: location and scale
*/
@@ -45,9 +49,15 @@ public class GeneralizedLogisticAlternateDistribution implements Distribution {
double shape;
/**
- * Random number generator
+ * Constructor.
+ *
+ * @param location Location
+ * @param scale Scale
+ * @param shape Shape parameter
*/
- Random random;
+ public GeneralizedLogisticAlternateDistribution(double location, double scale, double shape) {
+ this(location, scale, shape, (Random) null);
+ }
/**
* Constructor.
@@ -55,9 +65,16 @@ public class GeneralizedLogisticAlternateDistribution implements Distribution {
* @param location Location
* @param scale Scale
* @param shape Shape parameter
+ * @param random Random number generator
*/
- public GeneralizedLogisticAlternateDistribution(double location, double scale, double shape) {
- this(location, scale, shape, null);
+ public GeneralizedLogisticAlternateDistribution(double location, double scale, double shape, Random random) {
+ super(random);
+ this.location = location;
+ this.scale = scale;
+ this.shape = shape;
+ if (!(shape > -1.) || !(shape < 1.)) {
+ throw new ArithmeticException("Invalid shape parameter - must be -1 to +1, is: " + shape);
+ }
}
/**
@@ -68,12 +85,11 @@ public class GeneralizedLogisticAlternateDistribution implements Distribution {
* @param shape Shape parameter
* @param random Random number generator
*/
- public GeneralizedLogisticAlternateDistribution(double location, double scale, double shape, Random random) {
- super();
+ public GeneralizedLogisticAlternateDistribution(double location, double scale, double shape, RandomFactory random) {
+ super(random);
this.location = location;
this.scale = scale;
this.shape = shape;
- this.random = random;
if (!(shape > -1.) || !(shape < 1.)) {
throw new ArithmeticException("Invalid shape parameter - must be -1 to +1, is: " + shape);
}
@@ -159,4 +175,41 @@ public class GeneralizedLogisticAlternateDistribution implements Distribution {
public String toString() {
return "GeneralizedLogisticAlternateDistribution(location=" + location + ", scale=" + scale + ", shape=" + shape + ")";
}
+
+ /**
+ * Parameterization class
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractDistribution.Parameterizer {
+ /** Parameters. */
+ double location, scale, shape;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ DoubleParameter locationP = new DoubleParameter(LOCATION_ID);
+ if (config.grab(locationP)) {
+ location = locationP.doubleValue();
+ }
+
+ DoubleParameter scaleP = new DoubleParameter(SCALE_ID);
+ if (config.grab(scaleP)) {
+ scale = scaleP.doubleValue();
+ }
+
+ DoubleParameter shapeP = new DoubleParameter(SHAPE_ID);
+ if (config.grab(shapeP)) {
+ shape = shapeP.doubleValue();
+ }
+ }
+
+ @Override
+ protected GeneralizedLogisticAlternateDistribution makeInstance() {
+ return new GeneralizedLogisticAlternateDistribution(location, scale, shape, rnd);
+ }
+ }
}
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GeneralizedLogisticDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GeneralizedLogisticDistribution.java
index 76f71107..467ad4f9 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GeneralizedLogisticDistribution.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GeneralizedLogisticDistribution.java
@@ -24,6 +24,10 @@ package de.lmu.ifi.dbs.elki.math.statistics.distribution;
*/
import java.util.Random;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+
/**
* Generalized logistic distribution. (Type I, Skew-logistic distribution)
*
@@ -37,7 +41,7 @@ import java.util.Random;
*
* @author Erich Schubert
*/
-public class GeneralizedLogisticDistribution implements Distribution {
+public class GeneralizedLogisticDistribution extends AbstractDistribution {
/**
* Parameters: location and scale
*/
@@ -49,9 +53,15 @@ public class GeneralizedLogisticDistribution implements Distribution {
double shape;
/**
- * Random number generator
+ * Constructor.
+ *
+ * @param location Location
+ * @param scale Scale
+ * @param shape Shape parameter
*/
- Random random;
+ public GeneralizedLogisticDistribution(double location, double scale, double shape) {
+ this(location, scale, shape, (Random) null);
+ }
/**
* Constructor.
@@ -59,9 +69,13 @@ public class GeneralizedLogisticDistribution implements Distribution {
* @param location Location
* @param scale Scale
* @param shape Shape parameter
+ * @param random Random number generator
*/
- public GeneralizedLogisticDistribution(double location, double scale, double shape) {
- this(location, scale, shape, null);
+ public GeneralizedLogisticDistribution(double location, double scale, double shape, Random random) {
+ super(random);
+ this.location = location;
+ this.scale = scale;
+ this.shape = shape;
}
/**
@@ -72,12 +86,11 @@ public class GeneralizedLogisticDistribution implements Distribution {
* @param shape Shape parameter
* @param random Random number generator
*/
- public GeneralizedLogisticDistribution(double location, double scale, double shape, Random random) {
- super();
+ public GeneralizedLogisticDistribution(double location, double scale, double shape, RandomFactory random) {
+ super(random);
this.location = location;
this.scale = scale;
this.shape = shape;
- this.random = random;
}
/**
@@ -181,4 +194,41 @@ public class GeneralizedLogisticDistribution implements Distribution {
public String toString() {
return "GeneralizedLogisticDistribution(location=" + location + ", scale=" + scale + ", shape=" + shape + ")";
}
+
+ /**
+ * Parameterization class
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractDistribution.Parameterizer {
+ /** Parameters. */
+ double location, scale, shape;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ DoubleParameter locationP = new DoubleParameter(LOCATION_ID);
+ if (config.grab(locationP)) {
+ location = locationP.doubleValue();
+ }
+
+ DoubleParameter scaleP = new DoubleParameter(SCALE_ID);
+ if (config.grab(scaleP)) {
+ scale = scaleP.doubleValue();
+ }
+
+ DoubleParameter shapeP = new DoubleParameter(SHAPE_ID);
+ if (config.grab(shapeP)) {
+ shape = shapeP.doubleValue();
+ }
+ }
+
+ @Override
+ protected GeneralizedLogisticDistribution makeInstance() {
+ return new GeneralizedLogisticDistribution(location, scale, shape, rnd);
+ }
+ }
}
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GumbelDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GumbelDistribution.java
index 15b4ca24..9f42b7e2 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GumbelDistribution.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GumbelDistribution.java
@@ -25,12 +25,16 @@ package de.lmu.ifi.dbs.elki.math.statistics.distribution;
import java.util.Random;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+
/**
* Gumbel distribution, also known as Log-Weibull distribution.
*
* @author Erich Schubert
*/
-public class GumbelDistribution implements Distribution {
+public class GumbelDistribution extends AbstractDistribution {
/**
* Mode parameter mu.
*/
@@ -42,18 +46,26 @@ public class GumbelDistribution implements Distribution {
double beta;
/**
- * Random number generator.
+ * Constructor.
+ *
+ * @param mu Mode
+ * @param beta Shape
*/
- Random random;
+ public GumbelDistribution(double mu, double beta) {
+ this(mu, beta, (Random) null);
+ }
/**
* Constructor.
*
* @param mu Mode
* @param beta Shape
+ * @param random Random number generator
*/
- public GumbelDistribution(double mu, double beta) {
- this(mu, beta, null);
+ public GumbelDistribution(double mu, double beta, Random random) {
+ super(random);
+ this.mu = mu;
+ this.beta = beta;
}
/**
@@ -63,11 +75,10 @@ public class GumbelDistribution implements Distribution {
* @param beta Shape
* @param random Random number generator
*/
- public GumbelDistribution(double mu, double beta, Random random) {
- super();
+ public GumbelDistribution(double mu, double beta, RandomFactory random) {
+ super(random);
this.mu = mu;
this.beta = beta;
- this.random = random;
}
/**
@@ -131,4 +142,36 @@ public class GumbelDistribution implements Distribution {
public String toString() {
return "GumbelDistribution(mu=" + mu + ", beta=" + beta + ")";
}
+
+ /**
+ * Parameterization class
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractDistribution.Parameterizer {
+ /** Parameters. */
+ double mean, shape;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ DoubleParameter meanP = new DoubleParameter(LOCATION_ID);
+ if (config.grab(meanP)) {
+ mean = meanP.doubleValue();
+ }
+
+ DoubleParameter shapeP = new DoubleParameter(SHAPE_ID);
+ if (config.grab(shapeP)) {
+ shape = shapeP.doubleValue();
+ }
+ }
+
+ @Override
+ protected GumbelDistribution makeInstance() {
+ return new GumbelDistribution(mean, shape, rnd);
+ }
+ }
}
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/HaltonUniformDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/HaltonUniformDistribution.java
index 145744db..c16bb498 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/HaltonUniformDistribution.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/HaltonUniformDistribution.java
@@ -27,7 +27,10 @@ import java.util.Random;
import de.lmu.ifi.dbs.elki.math.MathUtil;
import de.lmu.ifi.dbs.elki.math.Primes;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
/**
* Halton sequences are a pseudo-uniform distribution. The data is actually too
@@ -162,7 +165,6 @@ public class HaltonUniformDistribution implements Distribution {
* @param max Maximum value
*/
public HaltonUniformDistribution(double min, double max) {
- // TODO: use different starting primes?
this(min, max, new Random());
}
@@ -175,11 +177,22 @@ public class HaltonUniformDistribution implements Distribution {
* @param rnd Random generator
*/
public HaltonUniformDistribution(double min, double max, Random rnd) {
- // TODO: use different starting primes?
this(min, max, choosePrime(rnd), rnd.nextDouble());
}
/**
+ * Constructor for a halton pseudo uniform distribution on the interval [min,
+ * max[
+ *
+ * @param min Minimum value
+ * @param max Maximum value
+ * @param rnd Random generator
+ */
+ public HaltonUniformDistribution(double min, double max, RandomFactory rnd) {
+ this(min, max, rnd.getRandom());
+ }
+
+ /**
* Choose a random prime. We try to avoid the later primes, as they are known
* to cause too correlated data.
*
@@ -310,4 +323,38 @@ public class HaltonUniformDistribution implements Distribution {
public double getMax() {
return max;
}
+
+ /**
+ * Parameterization class
+ *
+ * TODO: allow manual parameterization of sequence parameters!
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractDistribution.Parameterizer {
+ /** Parameters. */
+ double min, max;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ DoubleParameter minP = new DoubleParameter(UniformDistribution.Parameterizer.MIN_ID);
+ if (config.grab(minP)) {
+ min = minP.doubleValue();
+ }
+
+ DoubleParameter maxP = new DoubleParameter(UniformDistribution.Parameterizer.MAX_ID);
+ if (config.grab(maxP)) {
+ max = maxP.doubleValue();
+ }
+ }
+
+ @Override
+ protected HaltonUniformDistribution makeInstance() {
+ return new HaltonUniformDistribution(min, max, rnd);
+ }
+ }
}
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/KappaDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/KappaDistribution.java
index 9414767c..156bf325 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/KappaDistribution.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/KappaDistribution.java
@@ -24,6 +24,11 @@ package de.lmu.ifi.dbs.elki.math.statistics.distribution;
*/
import java.util.Random;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+
/**
* Kappa distribution, by Hosking.
*
@@ -31,7 +36,7 @@ import java.util.Random;
*
* @author Erich Schubert
*/
-public class KappaDistribution implements Distribution {
+public class KappaDistribution extends AbstractDistribution {
/**
* Parameters: location and scale
*/
@@ -43,11 +48,6 @@ public class KappaDistribution implements Distribution {
double shape1, shape2;
/**
- * Random number generator
- */
- Random random;
-
- /**
* Constructor.
*
* @param location Location
@@ -56,7 +56,7 @@ public class KappaDistribution implements Distribution {
* @param shape2 Shape parameter
*/
public KappaDistribution(double location, double scale, double shape1, double shape2) {
- this(location, scale, shape1, shape2, null);
+ this(location, scale, shape1, shape2, (Random) null);
}
/**
@@ -69,19 +69,43 @@ public class KappaDistribution implements Distribution {
* @param random Random number generator
*/
public KappaDistribution(double location, double scale, double shape1, double shape2, Random random) {
- super();
+ super(random);
this.location = location;
this.scale = scale;
this.shape1 = shape1;
this.shape2 = shape2;
- this.random = random;
- if(shape2 >= 0.) {
- if(shape1 < -1.) {
+ if (shape2 >= 0.) {
+ if (shape1 < -1.) {
throw new ArithmeticException("Invalid shape1 parameter - must be greater than -1 if shape2 >= 0.!");
}
+ } else {
+ if (shape1 < 1. || shape1 > 1. / shape2) {
+ throw new ArithmeticException("Invalid shape1 parameter - must be -1 to +1/shape2 if shape2 < 0.!");
+ }
}
- else {
- if(shape1 < 1. || shape1 > 1. / shape2) {
+ }
+
+ /**
+ * Constructor.
+ *
+ * @param location Location
+ * @param scale Scale
+ * @param shape1 Shape parameter
+ * @param shape2 Shape parameter
+ * @param random Random number generator
+ */
+ public KappaDistribution(double location, double scale, double shape1, double shape2, RandomFactory random) {
+ super(random);
+ this.location = location;
+ this.scale = scale;
+ this.shape1 = shape1;
+ this.shape2 = shape2;
+ if (shape2 >= 0.) {
+ if (shape1 < -1.) {
+ throw new ArithmeticException("Invalid shape1 parameter - must be greater than -1 if shape2 >= 0.!");
+ }
+ } else {
+ if (shape1 < 1. || shape1 > 1. / shape2) {
throw new ArithmeticException("Invalid shape1 parameter - must be -1 to +1/shape2 if shape2 < 0.!");
}
}
@@ -100,9 +124,9 @@ public class KappaDistribution implements Distribution {
public static double pdf(double val, double loc, double scale, double shape1, double shape2) {
final double c = cdf(val, loc, scale, shape1, shape2);
val = (val - loc) / scale;
- if(shape1 != 0.) {
+ if (shape1 != 0.) {
val = 1 - shape1 * val;
- if(val < 1e-15) {
+ if (val < 1e-15) {
return 0.;
}
val = (1. - 1. / shape1) * Math.log(val);
@@ -128,24 +152,22 @@ public class KappaDistribution implements Distribution {
*/
public static double cdf(double val, double loc, double scale, double shape1, double shape2) {
val = (val - loc) / scale;
- if(shape1 != 0.) {
+ if (shape1 != 0.) {
double tmp = 1. - shape1 * val;
- if(tmp < 1e-15) {
+ if (tmp < 1e-15) {
return (shape1 < 0.) ? 0. : 1.;
}
val = Math.exp(Math.log(tmp) / shape1);
- }
- else {
+ } else {
val = Math.exp(-val);
}
- if(shape2 != 0.) {
+ if (shape2 != 0.) {
double tmp = 1. - shape2 * val;
- if(tmp < 1e-15) {
+ if (tmp < 1e-15) {
return 0.;
}
val = Math.exp(Math.log(tmp) / shape2);
- }
- else {
+ } else {
val = Math.exp(-val);
}
return val;
@@ -167,39 +189,36 @@ public class KappaDistribution implements Distribution {
* @return Quantile
*/
public static double quantile(double val, double loc, double scale, double shape1, double shape2) {
- if(!(val >= 0.) || !(val <= 1.)) {
+ if (!(val >= 0.) || !(val <= 1.)) {
return Double.NaN;
}
- if(val == 0.) {
- if(shape2 <= 0.) {
- if(shape1 < 0.) {
+ if (val == 0.) {
+ if (shape2 <= 0.) {
+ if (shape1 < 0.) {
return loc + scale / shape1;
- }
- else {
+ } else {
return Double.NEGATIVE_INFINITY;
}
- }
- else {
- if(shape1 != 0.) {
+ } else {
+ if (shape1 != 0.) {
return loc + scale / shape1 * (1. - Math.pow(shape2, -shape1));
- }
- else {
+ } else {
return loc + scale * Math.log(shape2);
}
}
}
- if(val == 1.) {
- if(shape1 <= 0.) {
+ if (val == 1.) {
+ if (shape1 <= 0.) {
return Double.NEGATIVE_INFINITY;
}
return loc + scale / shape1;
}
val = -Math.log(val);
- if(shape2 != 0.) {
+ if (shape2 != 0.) {
val = (1 - Math.exp(-shape2 * val)) / shape2;
}
val = -Math.log(val);
- if(shape1 != 0.) {
+ if (shape1 != 0.) {
val = (1 - Math.exp(-shape1 * val)) / shape1;
}
return loc + scale * val;
@@ -220,4 +239,56 @@ public class KappaDistribution implements Distribution {
public String toString() {
return "KappaDistribution(location=" + location + ", scale=" + scale + ", shape1=" + shape1 + ", shape2=" + shape2 + ")";
}
+
+ /**
+ * Parameterization class
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractDistribution.Parameterizer {
+ /**
+ * First shape parameter.
+ */
+ public static final OptionID SHAPE1_ID = new OptionID("distribution.kappa.shape1", "First shape parameter of kappa distribution.");
+
+ /**
+ * Second shape parameter.
+ */
+ public static final OptionID SHAPE2_ID = new OptionID("distribution.kappa.shape2", "Second shape parameter of kappa distribution.");
+
+ /** Parameters. */
+ double location, scale, shape1, shape2;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ DoubleParameter locationP = new DoubleParameter(LOCATION_ID);
+ if (config.grab(locationP)) {
+ location = locationP.doubleValue();
+ }
+
+ DoubleParameter scaleP = new DoubleParameter(SCALE_ID);
+ if (config.grab(scaleP)) {
+ scale = scaleP.doubleValue();
+ }
+
+ DoubleParameter shape1P = new DoubleParameter(SHAPE1_ID);
+ if (config.grab(shape1P)) {
+ shape1 = shape1P.doubleValue();
+ }
+
+ DoubleParameter shape2P = new DoubleParameter(SHAPE2_ID);
+ if (config.grab(shape2P)) {
+ shape2 = shape2P.doubleValue();
+ }
+ }
+
+ @Override
+ protected KappaDistribution makeInstance() {
+ return new KappaDistribution(location, scale, shape1, shape2, rnd);
+ }
+ }
}
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/LaplaceDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/LaplaceDistribution.java
index eb238a20..18a6ffbe 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/LaplaceDistribution.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/LaplaceDistribution.java
@@ -26,6 +26,10 @@ package de.lmu.ifi.dbs.elki.math.statistics.distribution;
import java.util.Random;
import de.lmu.ifi.dbs.elki.utilities.Alias;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
/**
* Laplace distribution also known as double exponential distribution
@@ -33,12 +37,7 @@ import de.lmu.ifi.dbs.elki.utilities.Alias;
* @author Erich Schubert
*/
@Alias("DoubleExponentialDistribution")
-public class LaplaceDistribution implements Distribution {
- /**
- * Random generator.
- */
- Random rnd;
-
+public class LaplaceDistribution extends AbstractDistribution {
/**
* Rate, inverse of mean
*/
@@ -55,7 +54,7 @@ public class LaplaceDistribution implements Distribution {
* @param rate Rate parameter (1/scale)
*/
public LaplaceDistribution(double rate) {
- this(rate, 0.0, null);
+ this(rate, 0., (Random) null);
}
/**
@@ -65,7 +64,7 @@ public class LaplaceDistribution implements Distribution {
* @param location Location parameter
*/
public LaplaceDistribution(double rate, double location) {
- this(rate, location, null);
+ this(rate, location, (Random) null);
}
/**
@@ -75,7 +74,7 @@ public class LaplaceDistribution implements Distribution {
* @param random Random generator
*/
public LaplaceDistribution(double rate, Random random) {
- this(rate, 0.0, random);
+ this(rate, 0., random);
}
/**
@@ -86,10 +85,22 @@ public class LaplaceDistribution implements Distribution {
* @param random Random generator
*/
public LaplaceDistribution(double rate, double location, Random random) {
- super();
+ super(random);
+ this.rate = rate;
+ this.location = location;
+ }
+
+ /**
+ * Constructor.
+ *
+ * @param rate Rate parameter (1/scale)
+ * @param location Location parameter
+ * @param random Random generator
+ */
+ public LaplaceDistribution(double rate, double location, RandomFactory random) {
+ super(random);
this.rate = rate;
this.location = location;
- this.rnd = random;
}
@Override
@@ -157,7 +168,7 @@ public class LaplaceDistribution implements Distribution {
*/
@Override
public double nextRandom() {
- double val = rnd.nextDouble();
+ double val = random.nextDouble();
if (val < .5) {
return Math.log(2 * val) / rate + location;
} else {
@@ -169,4 +180,41 @@ public class LaplaceDistribution implements Distribution {
public String toString() {
return "LaplaceDistribution(rate=" + rate + ", location=" + location + ")";
}
+
+ /**
+ * Parameterization class
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractDistribution.Parameterizer {
+ /**
+ * Shape parameter gamma.
+ */
+ public static final OptionID RATE_ID = new OptionID("distribution.laplace.rate", "Laplace distribution rate (lambda) parameter (inverse of scale).");
+
+ /** Parameters. */
+ double location, rate;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ DoubleParameter locP = new DoubleParameter(LOCATION_ID);
+ if (config.grab(locP)) {
+ location = locP.doubleValue();
+ }
+
+ DoubleParameter rateP = new DoubleParameter(RATE_ID);
+ if (config.grab(rateP)) {
+ rate = rateP.doubleValue();
+ }
+ }
+
+ @Override
+ protected LaplaceDistribution makeInstance() {
+ return new LaplaceDistribution(rate, location, rnd);
+ }
+ }
}
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/LogGammaAlternateDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/LogGammaAlternateDistribution.java
index 496e6867..90902ae0 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/LogGammaAlternateDistribution.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/LogGammaAlternateDistribution.java
@@ -25,6 +25,11 @@ package de.lmu.ifi.dbs.elki.math.statistics.distribution;
import java.util.Random;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+
/**
* Alternate Log-Gamma Distribution, with random generation and density
* functions.
@@ -35,7 +40,7 @@ import java.util.Random;
*
* @author Erich Schubert
*/
-public class LogGammaAlternateDistribution implements Distribution {
+public class LogGammaAlternateDistribution extends AbstractDistribution {
/**
* Alpha == k.
*/
@@ -52,9 +57,23 @@ public class LogGammaAlternateDistribution implements Distribution {
private final double shift;
/**
- * The random generator.
+ * Constructor for Gamma distribution.
+ *
+ * @param k k, alpha aka. "shape" parameter
+ * @param shift Location offset
+ * @param theta Theta = 1.0/Beta aka. "scaling" parameter
+ * @param random Random generator
*/
- private Random random;
+ public LogGammaAlternateDistribution(double k, double theta, double shift, Random random) {
+ super(random);
+ if (!(k > 0.0) || !(theta > 0.0)) { // Note: also tests for NaNs!
+ throw new IllegalArgumentException("Invalid parameters for Gamma distribution: " + k + " " + theta);
+ }
+
+ this.k = k;
+ this.theta = theta;
+ this.shift = shift;
+ }
/**
* Constructor for Gamma distribution.
@@ -64,8 +83,8 @@ public class LogGammaAlternateDistribution implements Distribution {
* @param theta Theta = 1.0/Beta aka. "scaling" parameter
* @param random Random generator
*/
- public LogGammaAlternateDistribution(double k, double theta, double shift, Random random) {
- super();
+ public LogGammaAlternateDistribution(double k, double theta, double shift, RandomFactory random) {
+ super(random);
if (!(k > 0.0) || !(theta > 0.0)) { // Note: also tests for NaNs!
throw new IllegalArgumentException("Invalid parameters for Gamma distribution: " + k + " " + theta);
}
@@ -73,7 +92,6 @@ public class LogGammaAlternateDistribution implements Distribution {
this.k = k;
this.theta = theta;
this.shift = shift;
- this.random = random;
}
/**
@@ -84,7 +102,7 @@ public class LogGammaAlternateDistribution implements Distribution {
* @param shift Location offset
*/
public LogGammaAlternateDistribution(double k, double theta, double shift) {
- this(k, theta, shift, null);
+ this(k, theta, shift, (Random) null);
}
@Override
@@ -206,4 +224,46 @@ public class LogGammaAlternateDistribution implements Distribution {
public static double quantile(double p, double k, double theta, double shift) {
return Math.log(GammaDistribution.quantile(p, k, 1.)) / theta + shift;
}
+
+ /**
+ * Parameterization class
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractDistribution.Parameterizer {
+ /**
+ * Shifting offset parameter.
+ */
+ public static final OptionID SHIFT_ID = new OptionID("distribution.loggamma.shift", "Shift offset parameter.");
+
+ /** Parameters. */
+ double k, theta, shift;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ DoubleParameter kP = new DoubleParameter(GammaDistribution.Parameterizer.K_ID);
+ if (config.grab(kP)) {
+ k = kP.doubleValue();
+ }
+
+ DoubleParameter thetaP = new DoubleParameter(GammaDistribution.Parameterizer.THETA_ID);
+ if (config.grab(thetaP)) {
+ theta = thetaP.doubleValue();
+ }
+
+ DoubleParameter shiftP = new DoubleParameter(SHIFT_ID);
+ if (config.grab(shiftP)) {
+ shift = shiftP.doubleValue();
+ }
+ }
+
+ @Override
+ protected LogGammaAlternateDistribution makeInstance() {
+ return new LogGammaAlternateDistribution(k, theta, shift, rnd);
+ }
+ }
}
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/LogGammaDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/LogGammaDistribution.java
index db3a2b3f..76b10dde 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/LogGammaDistribution.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/LogGammaDistribution.java
@@ -25,6 +25,11 @@ package de.lmu.ifi.dbs.elki.math.statistics.distribution;
import java.util.Random;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+
/**
* Log-Gamma Distribution, with random generation and density functions.
*
@@ -34,7 +39,7 @@ import java.util.Random;
*
* @author Erich Schubert
*/
-public class LogGammaDistribution implements Distribution {
+public class LogGammaDistribution extends AbstractDistribution {
/**
* Alpha == k.
*/
@@ -51,9 +56,23 @@ public class LogGammaDistribution implements Distribution {
private final double shift;
/**
- * The random generator.
+ * Constructor for Gamma distribution.
+ *
+ * @param k k, alpha aka. "shape" parameter
+ * @param shift Location offset
+ * @param theta Theta = 1.0/Beta aka. "scaling" parameter
+ * @param random Random generator
*/
- private Random random;
+ public LogGammaDistribution(double k, double theta, double shift, Random random) {
+ super(random);
+ if (!(k > 0.0) || !(theta > 0.0)) { // Note: also tests for NaNs!
+ throw new IllegalArgumentException("Invalid parameters for Gamma distribution: " + k + " " + theta);
+ }
+
+ this.k = k;
+ this.theta = theta;
+ this.shift = shift;
+ }
/**
* Constructor for Gamma distribution.
@@ -63,8 +82,8 @@ public class LogGammaDistribution implements Distribution {
* @param theta Theta = 1.0/Beta aka. "scaling" parameter
* @param random Random generator
*/
- public LogGammaDistribution(double k, double theta, double shift, Random random) {
- super();
+ public LogGammaDistribution(double k, double theta, double shift, RandomFactory random) {
+ super(random);
if (!(k > 0.0) || !(theta > 0.0)) { // Note: also tests for NaNs!
throw new IllegalArgumentException("Invalid parameters for Gamma distribution: " + k + " " + theta);
}
@@ -72,7 +91,6 @@ public class LogGammaDistribution implements Distribution {
this.k = k;
this.theta = theta;
this.shift = shift;
- this.random = random;
}
/**
@@ -83,7 +101,7 @@ public class LogGammaDistribution implements Distribution {
* @param shift Location offset
*/
public LogGammaDistribution(double k, double theta, double shift) {
- this(k, theta, shift, null);
+ this(k, theta, shift, (Random) null);
}
@Override
@@ -191,4 +209,46 @@ public class LogGammaDistribution implements Distribution {
public static double quantile(double p, double k, double theta, double shift) {
return Math.exp(GammaDistribution.quantile(p, k, theta)) + shift;
}
+
+ /**
+ * Parameterization class
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractDistribution.Parameterizer {
+ /**
+ * Shifting offset parameter.
+ */
+ public static final OptionID SHIFT_ID = new OptionID("distribution.loggamma.shift", "Shift offset parameter.");
+
+ /** Parameters. */
+ double k, theta, shift;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ DoubleParameter kP = new DoubleParameter(GammaDistribution.Parameterizer.K_ID);
+ if (config.grab(kP)) {
+ k = kP.doubleValue();
+ }
+
+ DoubleParameter thetaP = new DoubleParameter(GammaDistribution.Parameterizer.THETA_ID);
+ if (config.grab(thetaP)) {
+ theta = thetaP.doubleValue();
+ }
+
+ DoubleParameter shiftP = new DoubleParameter(SHIFT_ID);
+ if (config.grab(shiftP)) {
+ shift = shiftP.doubleValue();
+ }
+ }
+
+ @Override
+ protected LogGammaDistribution makeInstance() {
+ return new LogGammaDistribution(k, theta, shift, rnd);
+ }
+ }
}
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/LogLogisticDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/LogLogisticDistribution.java
index cb75561d..fe8557b3 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/LogLogisticDistribution.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/LogLogisticDistribution.java
@@ -24,30 +24,44 @@ package de.lmu.ifi.dbs.elki.math.statistics.distribution;
*/
import java.util.Random;
+import de.lmu.ifi.dbs.elki.utilities.Alias;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+
/**
* Log-Logistic distribution also known as Fisk distribution.
*
* @author Erich Schubert
*/
-public class LogLogisticDistribution implements Distribution {
+@Alias({ "fisk", "loglog" })
+public class LogLogisticDistribution extends AbstractDistribution {
/**
* Parameters: scale and shape
*/
double scale, shape;
/**
- * Random number generator
+ * Constructor.
+ *
+ * @param scale Scale
+ * @param shape Shape
*/
- Random random;
+ public LogLogisticDistribution(double scale, double shape) {
+ this(scale, shape, (Random) null);
+ }
/**
* Constructor.
*
* @param scale Scale
* @param shape Shape
+ * @param random Random number generator
*/
- public LogLogisticDistribution(double scale, double shape) {
- this(scale, shape, null);
+ public LogLogisticDistribution(double scale, double shape, Random random) {
+ super(random);
+ this.scale = scale;
+ this.shape = shape;
}
/**
@@ -57,11 +71,10 @@ public class LogLogisticDistribution implements Distribution {
* @param shape Shape
* @param random Random number generator
*/
- public LogLogisticDistribution(double scale, double shape, Random random) {
- super();
+ public LogLogisticDistribution(double scale, double shape, RandomFactory random) {
+ super(random);
this.scale = scale;
this.shape = shape;
- this.random = random;
}
/**
@@ -73,7 +86,7 @@ public class LogLogisticDistribution implements Distribution {
* @return PDF
*/
public static double pdf(double val, double scale, double shape) {
- if(val < 0) {
+ if (val < 0) {
return 0;
}
val = Math.abs(val / scale);
@@ -96,7 +109,7 @@ public class LogLogisticDistribution implements Distribution {
* @return CDF
*/
public static double cdf(double val, double scale, double shape) {
- if(val < 0) {
+ if (val < 0) {
return 0;
}
return 1. / (1. + Math.pow(val / scale, -shape));
@@ -134,4 +147,36 @@ public class LogLogisticDistribution implements Distribution {
public String toString() {
return "LogLogisticDistribution(scale=" + scale + ", shape=" + shape + ")";
}
+
+ /**
+ * Parameterization class
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractDistribution.Parameterizer {
+ /** Parameters. */
+ double scale, shape;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ DoubleParameter scaleP = new DoubleParameter(SCALE_ID);
+ if (config.grab(scaleP)) {
+ scale = scaleP.doubleValue();
+ }
+
+ DoubleParameter shapeP = new DoubleParameter(SHAPE_ID);
+ if (config.grab(shapeP)) {
+ shape = shapeP.doubleValue();
+ }
+ }
+
+ @Override
+ protected LogLogisticDistribution makeInstance() {
+ return new LogLogisticDistribution(scale, shape, rnd);
+ }
+ }
}
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/LogNormalDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/LogNormalDistribution.java
index 4c3d9aa0..ca2fbbab 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/LogNormalDistribution.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/LogNormalDistribution.java
@@ -26,6 +26,11 @@ import java.util.Random;
import de.lmu.ifi.dbs.elki.math.MathUtil;
import de.lmu.ifi.dbs.elki.utilities.Alias;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
/**
* Log-Normal distribution.
@@ -40,7 +45,7 @@ import de.lmu.ifi.dbs.elki.utilities.Alias;
* @author Erich Schubert
*/
@Alias({ "lognormal" })
-public class LogNormalDistribution implements Distribution {
+public class LogNormalDistribution extends AbstractDistribution {
/**
* Mean value for the generator
*/
@@ -57,9 +62,19 @@ public class LogNormalDistribution implements Distribution {
private double shift = 0.;
/**
- * The random generator.
+ * Constructor for Log-Normal distribution
+ *
+ * @param logmean Mean
+ * @param logstddev Standard Deviation
+ * @param shift Shifting offset
+ * @param random Random generator
*/
- private Random random;
+ public LogNormalDistribution(double logmean, double logstddev, double shift, Random random) {
+ super(random);
+ this.logmean = logmean;
+ this.logstddev = logstddev;
+ this.shift = shift;
+ }
/**
* Constructor for Log-Normal distribution
@@ -69,12 +84,11 @@ public class LogNormalDistribution implements Distribution {
* @param shift Shifting offset
* @param random Random generator
*/
- public LogNormalDistribution(double logmean, double logstddev, double shift, Random random) {
- super();
+ public LogNormalDistribution(double logmean, double logstddev, double shift, RandomFactory random) {
+ super(random);
this.logmean = logmean;
this.logstddev = logstddev;
this.shift = shift;
- this.random = random;
}
/**
@@ -85,7 +99,7 @@ public class LogNormalDistribution implements Distribution {
* @param shift Shifting offset
*/
public LogNormalDistribution(double logmean, double logstddev, double shift) {
- this(logmean, logstddev, shift, null);
+ this(logmean, logstddev, shift, (Random) null);
}
@Override
@@ -117,7 +131,7 @@ public class LogNormalDistribution implements Distribution {
* @return PDF of the given normal distribution at x.
*/
public static double pdf(double x, double mu, double sigma) {
- if (x <= 0.) {
+ if(x <= 0.) {
return 0.;
}
final double x_mu = Math.log(x) - mu;
@@ -134,7 +148,7 @@ public class LogNormalDistribution implements Distribution {
* @return The CDF of the given normal distribution at x.
*/
public static double cdf(double x, double mu, double sigma) {
- if (x <= 0.) {
+ if(x <= 0.) {
return 0.;
}
return .5 * (1 + NormalDistribution.erf((Math.log(x) - mu) / (MathUtil.SQRT2 * sigma)));
@@ -162,4 +176,57 @@ public class LogNormalDistribution implements Distribution {
public String toString() {
return "LogNormalDistribution(logmean=" + logmean + ", logstddev=" + logstddev + ", shift=" + shift + ")";
}
+
+ /**
+ * Parameterization class
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractDistribution.Parameterizer {
+ /**
+ * LogMean parameter
+ */
+ public static final OptionID LOGMEAN_ID = new OptionID("distribution.lognormal.logmean", "Mean of the distribution before logscaling.");
+
+ /**
+ * LogScale parameter
+ */
+ public static final OptionID LOGSTDDEV_ID = new OptionID("distribution.lognormal.logstddev", "Standard deviation of the distribution before logscaling.");
+
+ /**
+ * Shift parameter
+ */
+ public static final OptionID SHIFT_ID = new OptionID("distribution.lognormal.shift", "Shifting offset, so the distribution does not begin at 0.");
+
+ /** Parameters. */
+ double shift, logmean, logsigma;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ DoubleParameter logmeanP = new DoubleParameter(LOGMEAN_ID);
+ if(config.grab(logmeanP)) {
+ logmean = logmeanP.doubleValue();
+ }
+
+ DoubleParameter logsigmaP = new DoubleParameter(LOGSTDDEV_ID);
+ logsigmaP.addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE);
+ if(config.grab(logsigmaP)) {
+ logsigma = logsigmaP.doubleValue();
+ }
+
+ DoubleParameter shiftP = new DoubleParameter(SHIFT_ID, 0.);
+ if(config.grab(shiftP)) {
+ shift = shiftP.doubleValue();
+ }
+ }
+
+ @Override
+ protected LogNormalDistribution makeInstance() {
+ return new LogNormalDistribution(logmean, logsigma, shift, rnd);
+ }
+ }
}
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/LogisticDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/LogisticDistribution.java
index 052847d6..12307a36 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/LogisticDistribution.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/LogisticDistribution.java
@@ -25,31 +25,44 @@ package de.lmu.ifi.dbs.elki.math.statistics.distribution;
import java.util.Random;
import de.lmu.ifi.dbs.elki.math.MathUtil;
+import de.lmu.ifi.dbs.elki.utilities.Alias;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
/**
* Logistic distribution.
*
* @author Erich Schubert
*/
-public class LogisticDistribution implements Distribution {
+@Alias({ "log" })
+public class LogisticDistribution extends AbstractDistribution {
/**
* Parameters: location and scale
*/
double location, scale;
/**
- * Random number generator
+ * Constructor.
+ *
+ * @param location Location
+ * @param scale Scale
*/
- Random random;
+ public LogisticDistribution(double location, double scale) {
+ this(location, scale, (Random) null);
+ }
/**
* Constructor.
*
* @param location Location
* @param scale Scale
+ * @param random Random number generator
*/
- public LogisticDistribution(double location, double scale) {
- this(location, scale, null);
+ public LogisticDistribution(double location, double scale, Random random) {
+ super(random);
+ this.location = location;
+ this.scale = scale;
}
/**
@@ -59,11 +72,10 @@ public class LogisticDistribution implements Distribution {
* @param scale Scale
* @param random Random number generator
*/
- public LogisticDistribution(double location, double scale, Random random) {
- super();
+ public LogisticDistribution(double location, double scale, RandomFactory random) {
+ super(random);
this.location = location;
this.scale = scale;
- this.random = random;
}
/**
@@ -183,4 +195,36 @@ public class LogisticDistribution implements Distribution {
public String toString() {
return "LogisticDistribution(location=" + location + ", scale=" + scale + ")";
}
+
+ /**
+ * Parameterization class
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractDistribution.Parameterizer {
+ /** Parameters. */
+ double location, scale;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ DoubleParameter scaleP = new DoubleParameter(SCALE_ID);
+ if (config.grab(scaleP)) {
+ scale = scaleP.doubleValue();
+ }
+
+ DoubleParameter locationP = new DoubleParameter(LOCATION_ID);
+ if (config.grab(locationP)) {
+ location = locationP.doubleValue();
+ }
+ }
+
+ @Override
+ protected LogisticDistribution makeInstance() {
+ return new LogisticDistribution(location, scale, rnd);
+ }
+ }
}
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/NormalDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/NormalDistribution.java
index c4ae7b6c..0a0d3d4e 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/NormalDistribution.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/NormalDistribution.java
@@ -27,6 +27,10 @@ import java.util.Random;
import de.lmu.ifi.dbs.elki.math.MathUtil;
import de.lmu.ifi.dbs.elki.utilities.Alias;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
/**
* Gaussian distribution aka normal distribution
@@ -34,7 +38,7 @@ import de.lmu.ifi.dbs.elki.utilities.Alias;
* @author Erich Schubert
*/
@Alias({ "GaussianDistribution", "normal", "gauss" })
-public class NormalDistribution implements Distribution {
+public class NormalDistribution extends AbstractDistribution {
/**
* Coefficients for erf approximation.
*
@@ -123,9 +127,17 @@ public class NormalDistribution implements Distribution {
private double stddev;
/**
- * The random generator.
+ * Constructor for Gaussian distribution
+ *
+ * @param mean Mean
+ * @param stddev Standard Deviation
+ * @param random Random generator
*/
- private Random random;
+ public NormalDistribution(double mean, double stddev, RandomFactory random) {
+ super(random);
+ this.mean = mean;
+ this.stddev = stddev;
+ }
/**
* Constructor for Gaussian distribution
@@ -135,10 +147,9 @@ public class NormalDistribution implements Distribution {
* @param random Random generator
*/
public NormalDistribution(double mean, double stddev, Random random) {
- super();
+ super(random);
this.mean = mean;
this.stddev = stddev;
- this.random = random;
}
/**
@@ -148,7 +159,7 @@ public class NormalDistribution implements Distribution {
* @param stddev Standard Deviation
*/
public NormalDistribution(double mean, double stddev) {
- this(mean, stddev, new Random());
+ this(mean, stddev, (Random) null);
}
@Override
@@ -368,4 +379,37 @@ public class NormalDistribution implements Distribution {
return (((((ERFINV_A[0] * r + ERFINV_A[1]) * r + ERFINV_A[2]) * r + ERFINV_A[3]) * r + ERFINV_A[4]) * r + ERFINV_A[5]) * q / (((((ERFINV_B[0] * r + ERFINV_B[1]) * r + ERFINV_B[2]) * r + ERFINV_B[3]) * r + ERFINV_B[4]) * r + 1);
}
}
+
+ /**
+ * Parameterization class
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractDistribution.Parameterizer {
+ /** Parameters. */
+ double mu, sigma;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ DoubleParameter muP = new DoubleParameter(LOCATION_ID);
+ if (config.grab(muP)) {
+ mu = muP.doubleValue();
+ }
+
+ DoubleParameter sigmaP = new DoubleParameter(SCALE_ID);
+ sigmaP.addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE);
+ if (config.grab(sigmaP)) {
+ sigma = sigmaP.doubleValue();
+ }
+ }
+
+ @Override
+ protected NormalDistribution makeInstance() {
+ return new NormalDistribution(mu, sigma, rnd);
+ }
+ }
}
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/PoissonDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/PoissonDistribution.java
index f6b2e0ca..b6b70b34 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/PoissonDistribution.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/PoissonDistribution.java
@@ -22,10 +22,18 @@ package de.lmu.ifi.dbs.elki.math.statistics.distribution;
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+import java.util.Random;
+
import de.lmu.ifi.dbs.elki.math.MathUtil;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.exceptions.ExceptionMessages;
import de.lmu.ifi.dbs.elki.utilities.exceptions.NotImplementedException;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
/**
* INCOMPLETE implementation of the poisson distribution.
@@ -40,7 +48,7 @@ import de.lmu.ifi.dbs.elki.utilities.exceptions.NotImplementedException;
*
* @author Erich Schubert
*/
-public class PoissonDistribution implements Distribution {
+public class PoissonDistribution extends AbstractDistribution {
/**
* Number of tries
*/
@@ -108,86 +116,91 @@ public class PoissonDistribution implements Distribution {
/**
* Constructor.
*
- * Private: API not yet completely implemented!
- *
* @param n Number of tries
* @param p Success probability
*/
public PoissonDistribution(int n, double p) {
- super();
+ this(n, p, (Random) null);
+ }
+
+ /**
+ * Constructor.
+ *
+ * @param n Number of tries
+ * @param p Success probability
+ * @param random Random generator
+ */
+ public PoissonDistribution(int n, double p, Random random) {
+ super(random);
+ this.n = n;
+ this.p = p;
+ }
+
+ /**
+ * Constructor.
+ *
+ * @param n Number of tries
+ * @param p Success probability
+ * @param random Random generator
+ */
+ public PoissonDistribution(int n, double p, RandomFactory random) {
+ super(random);
this.n = n;
this.p = p;
}
/**
- * Poisson PMF for integer values.
+ * Poisson probability mass function (PMF) for integer values.
*
* @param x integer values
* @return Probability
*/
- @Reference(title = "Fast and accurate computation of binomial probabilities", authors = "C. Loader", booktitle = "", url = "http://projects.scipy.org/scipy/raw-attachment/ticket/620/loader2000Fast.pdf")
public double pmf(int x) {
- // Invalid values
- if (x < 0 || x > n) {
- return 0.0;
- }
- // Extreme probabilities
- if (p <= 0d) {
- return x == 0 ? 1.0 : 0.0;
- }
- if (p >= 1d) {
- return x == n ? 1.0 : 0.0;
- }
- // Extreme values of x
- if (x == 0) {
- if (p < 0.1) {
- return Math.exp(-devianceTerm(n, n * (1.0 - p)) - n * p);
- } else {
- return Math.exp(n * Math.log(1.0 - p));
- }
- }
- if (x == n) {
- if (p > 0.9) {
- return Math.exp(-devianceTerm(n, n * p) - n * (1 - p));
- } else {
- return Math.exp(n * Math.log(p));
- }
- }
-
- final double lc = stirlingError(n) - stirlingError(x) - stirlingError(n - x) - devianceTerm(x, n * p) - devianceTerm(n - x, n * (1.0 - p));
- final double f = (MathUtil.TWOPI * x * (n - x)) / n;
- return Math.exp(lc) / Math.sqrt(f);
+ return pmf(x, n, p);
}
@Override
- @Reference(title = "Fast and accurate computation of binomial probabilities", authors = "C. Loader", booktitle = "", url = "http://projects.scipy.org/scipy/raw-attachment/ticket/620/loader2000Fast.pdf")
public double pdf(double x) {
+ // FIXME: return 0 for non-integer x?
+ return pmf(x, n, p);
+ }
+
+ /**
+ * Poisson probability mass function (PMF) for integer values.
+ *
+ * @param x integer values
+ * @return Probability
+ */
+ @Reference(title = "Fast and accurate computation of binomial probabilities", authors = "C. Loader", booktitle = "", url = "http://projects.scipy.org/scipy/raw-attachment/ticket/620/loader2000Fast.pdf")
+ public static double pmf(double x, int n, double p) {
// Invalid values
- if (x < 0 || x > n) {
- return 0.0;
+ if(x < 0 || x > n) {
+ return 0.;
}
// Extreme probabilities
- if (p <= 0d) {
- return x == 0 ? 1.0 : 0.0;
+ if(p <= 0.) {
+ return x == 0 ? 1. : 0.;
}
- if (p >= 1d) {
- return x == n ? 1.0 : 0.0;
+ if(p >= 1.) {
+ return x == n ? 1. : 0.;
}
final double q = 1 - p;
// FIXME: check for x to be integer, return 0 otherwise?
// Extreme values of x
- if (x == 0) {
- if (p < 0.1) {
+ if(x == 0) {
+ if(p < .1) {
return Math.exp(-devianceTerm(n, n * q) - n * p);
- } else {
+ }
+ else {
return Math.exp(n * Math.log(q));
}
}
- if (x == n) {
- if (p > 0.9) {
+ if(x == n) {
+ if(p > .9) {
return Math.exp(-devianceTerm(n, n * p) - n * q);
- } else {
+ }
+ else {
return Math.exp(n * Math.log(p));
}
}
@@ -224,15 +237,16 @@ public class PoissonDistribution implements Distribution {
* @return pdf
*/
public static double poissonPDFm1(double x_plus_1, double lambda) {
- if (Double.isInfinite(lambda)) {
+ if(Double.isInfinite(lambda)) {
return 0.;
}
- if (x_plus_1 > 1) {
+ if(x_plus_1 > 1) {
return rawProbability(x_plus_1 - 1, lambda);
}
- if (lambda > Math.abs(x_plus_1 - 1) * MathUtil.LOG2 * Double.MAX_EXPONENT / 1e-14) {
+ if(lambda > Math.abs(x_plus_1 - 1) * MathUtil.LOG2 * Double.MAX_EXPONENT / 1e-14) {
return Math.exp(-lambda - GammaDistribution.logGamma(x_plus_1));
- } else {
+ }
+ else {
return rawProbability(x_plus_1, lambda) * (x_plus_1 / lambda);
}
}
@@ -247,15 +261,16 @@ public class PoissonDistribution implements Distribution {
* @return pdf
*/
public static double logpoissonPDFm1(double x_plus_1, double lambda) {
- if (Double.isInfinite(lambda)) {
+ if(Double.isInfinite(lambda)) {
return Double.NEGATIVE_INFINITY;
}
- if (x_plus_1 > 1) {
+ if(x_plus_1 > 1) {
return rawLogProbability(x_plus_1 - 1, lambda);
}
- if (lambda > Math.abs(x_plus_1 - 1) * MathUtil.LOG2 * Double.MAX_EXPONENT / 1e-14) {
+ if(lambda > Math.abs(x_plus_1 - 1) * MathUtil.LOG2 * Double.MAX_EXPONENT / 1e-14) {
return -lambda - GammaDistribution.logGamma(x_plus_1);
- } else {
+ }
+ else {
return rawLogProbability(x_plus_1, lambda) + Math.log(x_plus_1 / lambda);
}
}
@@ -271,18 +286,18 @@ public class PoissonDistribution implements Distribution {
@Reference(title = "Fast and accurate computation of binomial probabilities", authors = "C. Loader", booktitle = "", url = "http://projects.scipy.org/scipy/raw-attachment/ticket/620/loader2000Fast.pdf")
private static double stirlingError(int n) {
// Try to use a table value:
- if (n < 16) {
+ if(n < 16) {
return STIRLING_EXACT_ERROR[n << 1];
}
final double nn = n * n;
// Use the appropriate number of terms
- if (n > 500) {
+ if(n > 500) {
return (S0 - S1 / nn) / n;
}
- if (n > 80) {
+ if(n > 80) {
return ((S0 - (S1 - S2 / nn)) / nn) / n;
}
- if (n > 35) {
+ if(n > 35) {
return ((S0 - (S1 - (S2 - S3 / nn) / nn) / nn) / n);
}
return ((S0 - (S1 - (S2 - (S3 - S4 / nn) / nn) / nn) / nn) / n);
@@ -298,23 +313,24 @@ public class PoissonDistribution implements Distribution {
*/
@Reference(title = "Fast and accurate computation of binomial probabilities", authors = "C. Loader", booktitle = "", url = "http://projects.scipy.org/scipy/raw-attachment/ticket/620/loader2000Fast.pdf")
private static double stirlingError(double n) {
- if (n < 16.0) {
+ if(n < 16.0) {
// Our table has a step size of 0.5
final double n2 = 2.0 * n;
- if (Math.floor(n2) == n2) { // Exact match
+ if(Math.floor(n2) == n2) { // Exact match
return STIRLING_EXACT_ERROR[(int) n2];
- } else {
+ }
+ else {
return GammaDistribution.logGamma(n + 1.0) - (n + 0.5) * Math.log(n) + n - MathUtil.LOGSQRTTWOPI;
}
}
final double nn = n * n;
- if (n > 500.0) {
+ if(n > 500.0) {
return (S0 - S1 / nn) / n;
}
- if (n > 80.0) {
+ if(n > 80.0) {
return ((S0 - (S1 - S2 / nn)) / nn) / n;
}
- if (n > 35.0) {
+ if(n > 35.0) {
return ((S0 - (S1 - (S2 - S3 / nn) / nn) / nn) / n);
}
return ((S0 - (S1 - (S2 - (S3 - S4 / nn) / nn) / nn) / nn) / n);
@@ -331,15 +347,15 @@ public class PoissonDistribution implements Distribution {
*/
@Reference(title = "Fast and accurate computation of binomial probabilities", authors = "C. Loader", booktitle = "", url = "http://projects.scipy.org/scipy/raw-attachment/ticket/620/loader2000Fast.pdf")
private static double devianceTerm(double x, double np) {
- if (Math.abs(x - np) < 0.1 * (x + np)) {
+ if(Math.abs(x - np) < 0.1 * (x + np)) {
final double v = (x - np) / (x + np);
double s = (x - np) * v;
double ej = 2.0d * x * v;
- for (int j = 1;; j++) {
+ for(int j = 1;; j++) {
ej *= v * v;
final double s1 = s + ej / (2 * j + 1);
- if (s1 == s) {
+ if(s1 == s) {
return s1;
}
s = s1;
@@ -359,17 +375,17 @@ public class PoissonDistribution implements Distribution {
*/
public static double rawProbability(double x, double lambda) {
// Extreme lambda
- if (lambda == 0) {
+ if(lambda == 0) {
return ((x == 0) ? 1. : 0.);
}
// Extreme values
- if (Double.isInfinite(lambda) || x < 0) {
+ if(Double.isInfinite(lambda) || x < 0) {
return 0.;
}
- if (x <= lambda * Double.MIN_NORMAL) {
+ if(x <= lambda * Double.MIN_NORMAL) {
return Math.exp(-lambda);
}
- if (lambda < x * Double.MIN_NORMAL) {
+ if(lambda < x * Double.MIN_NORMAL) {
double r = -lambda + x * Math.log(lambda) - GammaDistribution.logGamma(x + 1);
return Math.exp(r);
}
@@ -389,17 +405,17 @@ public class PoissonDistribution implements Distribution {
*/
public static double rawLogProbability(double x, double lambda) {
// Extreme lambda
- if (lambda == 0) {
+ if(lambda == 0) {
return ((x == 0) ? 1. : Double.NEGATIVE_INFINITY);
}
// Extreme values
- if (Double.isInfinite(lambda) || x < 0) {
+ if(Double.isInfinite(lambda) || x < 0) {
return Double.NEGATIVE_INFINITY;
}
- if (x <= lambda * Double.MIN_NORMAL) {
+ if(x <= lambda * Double.MIN_NORMAL) {
return -lambda;
}
- if (lambda < x * Double.MIN_NORMAL) {
+ if(lambda < x * Double.MIN_NORMAL) {
return -lambda + x * Math.log(lambda) - GammaDistribution.logGamma(x + 1);
}
final double f = MathUtil.TWOPI * x;
@@ -411,4 +427,56 @@ public class PoissonDistribution implements Distribution {
public String toString() {
return "PoissonDistribution(n=" + n + ", p=" + p + ")";
}
+
+ /**
+ * Parameterization class
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractDistribution.Parameterizer {
+ /**
+ * Number of trials.
+ */
+ public static final OptionID N_ID = new OptionID("distribution.poisson.n", "Number of trials.");
+
+ /**
+ * Success probability.
+ */
+ public static final OptionID PROB_ID = new OptionID("distribution.poisson.probability", "Success probability.");
+
+ /**
+ * Number of trials.
+ */
+ int n;
+
+ /**
+ * Success probability.
+ */
+ double p;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ IntParameter nP = new IntParameter(N_ID);
+ nP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
+ if(config.grab(nP)) {
+ n = nP.intValue();
+ }
+
+ DoubleParameter probP = new DoubleParameter(PROB_ID);
+ probP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE);
+ probP.addConstraint(CommonConstraints.LESS_EQUAL_ONE_DOUBLE);
+ if(config.grab(probP)) {
+ p = probP.doubleValue();
+ }
+ }
+
+ @Override
+ protected PoissonDistribution makeInstance() {
+ return new PoissonDistribution(n, p, rnd);
+ }
+ }
}
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/RayleighDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/RayleighDistribution.java
index 31faf8ed..68870f1a 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/RayleighDistribution.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/RayleighDistribution.java
@@ -25,16 +25,20 @@ package de.lmu.ifi.dbs.elki.math.statistics.distribution;
import java.util.Random;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+
/**
* Rayleigh distribution.
*
* @author Erich Schubert
*/
-public class RayleighDistribution implements Distribution {
+public class RayleighDistribution extends AbstractDistribution {
/**
- * Position parameter.
+ * Location parameter.
*/
- double mu = 0.0;
+ double mu = 0.;
/**
* Scale parameter.
@@ -42,17 +46,12 @@ public class RayleighDistribution implements Distribution {
double sigma;
/**
- * Random number generator.
- */
- Random random;
-
- /**
* Constructor.
*
* @param sigma Scale parameter
*/
public RayleighDistribution(double sigma) {
- this(0., sigma, null);
+ this(0., sigma, (Random) null);
}
/**
@@ -62,7 +61,7 @@ public class RayleighDistribution implements Distribution {
* @param sigma Scale parameter
*/
public RayleighDistribution(double mu, double sigma) {
- this(mu, sigma, null);
+ this(mu, sigma, (Random) null);
}
/**
@@ -83,10 +82,22 @@ public class RayleighDistribution implements Distribution {
* @param random Random number generator
*/
public RayleighDistribution(double mu, double sigma, Random random) {
- super();
+ super(random);
+ this.mu = mu;
+ this.sigma = sigma;
+ }
+
+ /**
+ * Constructor.
+ *
+ * @param mu Position parameter
+ * @param sigma Scale parameter
+ * @param random Random number generator
+ */
+ public RayleighDistribution(double mu, double sigma, RandomFactory random) {
+ super(random);
this.mu = mu;
this.sigma = sigma;
- this.random = random;
}
@Override
@@ -162,4 +173,36 @@ public class RayleighDistribution implements Distribution {
public String toString() {
return "RayleighDistribution(mu=" + mu + ", sigma=" + sigma + ")";
}
+
+ /**
+ * Parameterization class
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractDistribution.Parameterizer {
+ /** Parameters. */
+ double mean, scale;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ DoubleParameter meanP = new DoubleParameter(LOCATION_ID, 0.);
+ if (config.grab(meanP)) {
+ mean = meanP.doubleValue();
+ }
+
+ DoubleParameter scaleP = new DoubleParameter(SCALE_ID);
+ if (config.grab(scaleP)) {
+ scale = scaleP.doubleValue();
+ }
+ }
+
+ @Override
+ protected RayleighDistribution makeInstance() {
+ return new RayleighDistribution(mean, scale, rnd);
+ }
+ }
}
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/SkewGeneralizedNormalDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/SkewGeneralizedNormalDistribution.java
index f04e776b..76931029 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/SkewGeneralizedNormalDistribution.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/SkewGeneralizedNormalDistribution.java
@@ -26,6 +26,11 @@ package de.lmu.ifi.dbs.elki.math.statistics.distribution;
import java.util.Random;
import de.lmu.ifi.dbs.elki.math.MathUtil;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
/**
* Generalized Gaussian distribution by adding a skew term, similar to lognormal
@@ -36,7 +41,7 @@ import de.lmu.ifi.dbs.elki.math.MathUtil;
*
* @author Erich Schubert
*/
-public class SkewGeneralizedNormalDistribution implements Distribution {
+public class SkewGeneralizedNormalDistribution extends AbstractDistribution {
/**
* Mean value for the generator
*/
@@ -53,9 +58,19 @@ public class SkewGeneralizedNormalDistribution implements Distribution {
private double skew;
/**
- * The random generator.
+ * Constructor for Gaussian distribution
+ *
+ * @param mean Mean
+ * @param stddev Standard Deviation
+ * @param skew Skew
+ * @param random Random generator
*/
- private Random random;
+ public SkewGeneralizedNormalDistribution(double mean, double stddev, double skew, Random random) {
+ super(random);
+ this.mean = mean;
+ this.stddev = stddev;
+ this.skew = skew;
+ }
/**
* Constructor for Gaussian distribution
@@ -65,12 +80,11 @@ public class SkewGeneralizedNormalDistribution implements Distribution {
* @param skew Skew
* @param random Random generator
*/
- public SkewGeneralizedNormalDistribution(double mean, double stddev, double skew, Random random) {
- super();
+ public SkewGeneralizedNormalDistribution(double mean, double stddev, double skew, RandomFactory random) {
+ super(random);
this.mean = mean;
this.stddev = stddev;
this.skew = skew;
- this.random = random;
}
/**
@@ -81,7 +95,7 @@ public class SkewGeneralizedNormalDistribution implements Distribution {
* @param skew Skew
*/
public SkewGeneralizedNormalDistribution(double mean, double stddev, double skew) {
- this(mean, stddev, skew, null);
+ this(mean, stddev, skew, (Random) null);
}
@Override
@@ -102,7 +116,7 @@ public class SkewGeneralizedNormalDistribution implements Distribution {
@Override
public double nextRandom() {
double y = random.nextGaussian();
- if (Math.abs(skew) > 0.) {
+ if(Math.abs(skew) > 0.) {
y = (1. - Math.exp(-skew * y)) / skew;
}
return mean + stddev * y;
@@ -124,7 +138,7 @@ public class SkewGeneralizedNormalDistribution implements Distribution {
*/
public static double pdf(double x, double mu, double sigma, double skew) {
x = (x - mu) / sigma;
- if (Math.abs(skew) > 0.) {
+ if(Math.abs(skew) > 0.) {
x = -Math.log(1. - skew * x) / skew;
}
return MathUtil.SQRTHALF * Math.exp(-.5 * x * x) / sigma / (1 - skew * x);
@@ -140,9 +154,9 @@ public class SkewGeneralizedNormalDistribution implements Distribution {
*/
public static double cdf(double x, double mu, double sigma, double skew) {
x = (x - mu) / sigma;
- if (Math.abs(skew) > 0.) {
+ if(Math.abs(skew) > 0.) {
double tmp = 1 - skew * x;
- if (tmp < 1e-15) {
+ if(tmp < 1e-15) {
return (skew < 0.) ? 0. : 1.;
}
x = -Math.log(tmp) / skew;
@@ -161,9 +175,52 @@ public class SkewGeneralizedNormalDistribution implements Distribution {
*/
public static double quantile(double x, double mu, double sigma, double skew) {
x = NormalDistribution.standardNormalQuantile(x);
- if (Math.abs(skew) > 0.) {
+ if(Math.abs(skew) > 0.) {
x = (1. - Math.exp(-skew * x)) / skew;
}
return mu + sigma * x;
}
+
+ /**
+ * Parameterization class
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractDistribution.Parameterizer {
+ /**
+ * Skew parameter
+ */
+ public static final OptionID SKEW_ID = new OptionID("distribution.skewgnormal.skew", "Skew of the distribution.");
+
+ /** Parameters. */
+ double mean, sigma, skew;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ DoubleParameter meanP = new DoubleParameter(LOCATION_ID);
+ if(config.grab(meanP)) {
+ mean = meanP.doubleValue();
+ }
+
+ DoubleParameter sigmaP = new DoubleParameter(SCALE_ID);
+ sigmaP.addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE);
+ if(config.grab(sigmaP)) {
+ sigma = sigmaP.doubleValue();
+ }
+
+ DoubleParameter skewP = new DoubleParameter(SKEW_ID);
+ if(config.grab(skewP)) {
+ skew = skewP.doubleValue();
+ }
+ }
+
+ @Override
+ protected SkewGeneralizedNormalDistribution makeInstance() {
+ return new SkewGeneralizedNormalDistribution(mean, sigma, skew, rnd);
+ }
+ }
}
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/StudentsTDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/StudentsTDistribution.java
index 442df2e2..ef9f06d4 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/StudentsTDistribution.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/StudentsTDistribution.java
@@ -23,17 +23,23 @@ package de.lmu.ifi.dbs.elki.math.statistics.distribution;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+import java.util.Random;
+
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
import de.lmu.ifi.dbs.elki.utilities.exceptions.ExceptionMessages;
import de.lmu.ifi.dbs.elki.utilities.exceptions.NotImplementedException;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
/**
* Student's t distribution.
*
- * FIXME: add quantile function!
+ * FIXME: add quantile and random function!
*
* @author Jan Brusis
*/
-public class StudentsTDistribution implements Distribution {
+public class StudentsTDistribution extends AbstractDistribution {
/**
* Degrees of freedom
*/
@@ -45,6 +51,28 @@ public class StudentsTDistribution implements Distribution {
* @param v Degrees of freedom
*/
public StudentsTDistribution(int v) {
+ this(v, (Random) null);
+ }
+
+ /**
+ * Constructor.
+ *
+ * @param v Degrees of freedom
+ * @param random Random generator
+ */
+ public StudentsTDistribution(int v, Random random) {
+ super(random);
+ this.v = v;
+ }
+
+ /**
+ * Constructor.
+ *
+ * @param v Degrees of freedom
+ * @param random Random generator
+ */
+ public StudentsTDistribution(int v, RandomFactory random) {
+ super(random);
this.v = v;
}
@@ -98,4 +126,36 @@ public class StudentsTDistribution implements Distribution {
public String toString() {
return "StudentsTDistribution(v=" + v + ")";
}
+
+ /**
+ * Parameterization class
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractDistribution.Parameterizer {
+ /**
+ * Degrees of freedom.
+ */
+ public static final OptionID NU_ID = new OptionID("distribution.studentst.nu", "Degrees of freedom.");
+
+ /** Parameters. */
+ int nu;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ IntParameter nuP = new IntParameter(NU_ID);
+ if (config.grab(nuP)) {
+ nu = nuP.intValue();
+ }
+ }
+
+ @Override
+ protected StudentsTDistribution makeInstance() {
+ return new StudentsTDistribution(nu, rnd);
+ }
+ }
}
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/UniformDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/UniformDistribution.java
index efae5080..db2e2fb2 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/UniformDistribution.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/UniformDistribution.java
@@ -25,12 +25,17 @@ package de.lmu.ifi.dbs.elki.math.statistics.distribution;
import java.util.Random;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+
/**
* Uniform distribution.
*
* @author Erich Schubert
*/
-public class UniformDistribution implements Distribution {
+public class UniformDistribution extends AbstractDistribution {
/**
* Minimum
*/
@@ -47,9 +52,30 @@ public class UniformDistribution implements Distribution {
private double len;
/**
- * The random generator.
+ * Constructor for a uniform distribution on the interval [min, max[
+ *
+ * @param min Minimum value
+ * @param max Maximum value
+ * @param random Random generator
*/
- private Random random;
+ public UniformDistribution(double min, double max, RandomFactory random) {
+ super(random);
+ if (Double.isInfinite(min) || Double.isInfinite(max)) {
+ throw new ArithmeticException("Infinite values given for uniform distribution.");
+ }
+ if (Double.isNaN(min) || Double.isNaN(max)) {
+ throw new ArithmeticException("NaN values given for uniform distribution.");
+ }
+ // Swap parameters if they were given incorrectly.
+ if (min > max) {
+ double tmp = min;
+ min = max;
+ max = tmp;
+ }
+ this.min = min;
+ this.max = max;
+ this.len = max - min;
+ }
/**
* Constructor for a uniform distribution on the interval [min, max[
@@ -59,7 +85,7 @@ public class UniformDistribution implements Distribution {
* @param random Random generator
*/
public UniformDistribution(double min, double max, Random random) {
- super();
+ super(random);
if (Double.isInfinite(min) || Double.isInfinite(max)) {
throw new ArithmeticException("Infinite values given for uniform distribution.");
}
@@ -75,7 +101,6 @@ public class UniformDistribution implements Distribution {
this.min = min;
this.max = max;
this.len = max - min;
- this.random = random;
}
/**
@@ -85,7 +110,7 @@ public class UniformDistribution implements Distribution {
* @param max Maximum value
*/
public UniformDistribution(double min, double max) {
- this(min, max, null);
+ this(min, max, (Random) null);
}
@Override
@@ -135,4 +160,46 @@ public class UniformDistribution implements Distribution {
public double getMax() {
return max;
}
+
+ /**
+ * Parameterization class
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractDistribution.Parameterizer {
+ /**
+ * Minimum value
+ */
+ public static final OptionID MIN_ID = new OptionID("distribution.min", "Minimum value of distribution.");
+
+ /**
+ * Maximum value
+ */
+ public static final OptionID MAX_ID = new OptionID("distribution.max", "Maximum value of distribution.");
+
+ /** Parameters. */
+ double min, max;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ DoubleParameter minP = new DoubleParameter(MIN_ID);
+ if (config.grab(minP)) {
+ min = minP.doubleValue();
+ }
+
+ DoubleParameter maxP = new DoubleParameter(MAX_ID);
+ if (config.grab(maxP)) {
+ max = maxP.doubleValue();
+ }
+ }
+
+ @Override
+ protected UniformDistribution makeInstance() {
+ return new UniformDistribution(min, max, rnd);
+ }
+ }
}
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/WaldDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/WaldDistribution.java
index ec0ea712..123ece95 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/WaldDistribution.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/WaldDistribution.java
@@ -27,8 +27,11 @@ import java.util.Random;
import de.lmu.ifi.dbs.elki.math.MathUtil;
import de.lmu.ifi.dbs.elki.utilities.Alias;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
import de.lmu.ifi.dbs.elki.utilities.exceptions.ExceptionMessages;
import de.lmu.ifi.dbs.elki.utilities.exceptions.NotImplementedException;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
/**
* Inverse Gaussian distribution aka Wald distribution
@@ -36,9 +39,9 @@ import de.lmu.ifi.dbs.elki.utilities.exceptions.NotImplementedException;
* @author Erich Schubert
*/
@Alias({ "InverseGaussianDistribution", "invgauss" })
-public class WaldDistribution implements Distribution {
+public class WaldDistribution extends AbstractDistribution {
/**
- * Mean value
+ * Location value
*/
private double mean;
@@ -48,9 +51,17 @@ public class WaldDistribution implements Distribution {
private double shape;
/**
- * The random generator.
+ * Constructor for wald distribution
+ *
+ * @param mean Mean
+ * @param shape Shape parameter
+ * @param random Random generator
*/
- private Random random;
+ public WaldDistribution(double mean, double shape, Random random) {
+ super(random);
+ this.mean = mean;
+ this.shape = shape;
+ }
/**
* Constructor for wald distribution
@@ -59,11 +70,10 @@ public class WaldDistribution implements Distribution {
* @param shape Shape parameter
* @param random Random generator
*/
- public WaldDistribution(double mean, double shape, Random random) {
- super();
+ public WaldDistribution(double mean, double shape, RandomFactory random) {
+ super(random);
this.mean = mean;
this.shape = shape;
- this.random = random;
}
/**
@@ -73,7 +83,7 @@ public class WaldDistribution implements Distribution {
* @param shape Shape parameter
*/
public WaldDistribution(double mean, double shape) {
- this(mean, shape, null);
+ this(mean, shape, (Random) null);
}
@Override
@@ -170,4 +180,36 @@ public class WaldDistribution implements Distribution {
// FIXME: implement!
throw new NotImplementedException(ExceptionMessages.UNSUPPORTED_NOT_YET);
}
+
+ /**
+ * Parameterization class
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractDistribution.Parameterizer {
+ /** Parameters. */
+ double mean, shape;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ DoubleParameter meanP = new DoubleParameter(LOCATION_ID);
+ if (config.grab(meanP)) {
+ mean = meanP.doubleValue();
+ }
+
+ DoubleParameter shapeP = new DoubleParameter(SHAPE_ID);
+ if (config.grab(shapeP)) {
+ shape = shapeP.doubleValue();
+ }
+ }
+
+ @Override
+ protected WaldDistribution makeInstance() {
+ return new WaldDistribution(mean, shape, rnd);
+ }
+ }
}
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/WeibullDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/WeibullDistribution.java
index 165f536a..9b7af6d8 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/WeibullDistribution.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/WeibullDistribution.java
@@ -25,16 +25,20 @@ package de.lmu.ifi.dbs.elki.math.statistics.distribution;
import java.util.Random;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+
/**
* Weibull distribution.
*
* @author Erich Schubert
*/
-public class WeibullDistribution implements Distribution {
+public class WeibullDistribution extends AbstractDistribution {
/**
* Shift offset.
*/
- double theta = 0.0;
+ double theta = 0.;
/**
* Shape parameter k.
@@ -47,18 +51,13 @@ public class WeibullDistribution implements Distribution {
double lambda;
/**
- * Random number generator.
- */
- Random random;
-
- /**
* Constructor.
*
* @param k Shape parameter
* @param lambda Scale parameter
*/
public WeibullDistribution(double k, double lambda) {
- this(k, lambda, 0.0, null);
+ this(k, lambda, 0.0, (Random) null);
}
/**
@@ -69,7 +68,7 @@ public class WeibullDistribution implements Distribution {
* @param theta Shift offset parameter
*/
public WeibullDistribution(double k, double lambda, double theta) {
- this(k, lambda, theta, null);
+ this(k, lambda, theta, (Random) null);
}
/**
@@ -80,7 +79,7 @@ public class WeibullDistribution implements Distribution {
* @param random Random number generator
*/
public WeibullDistribution(double k, double lambda, Random random) {
- this(k, lambda, 0.0, random);
+ this(k, lambda, 0., random);
}
/**
@@ -92,11 +91,25 @@ public class WeibullDistribution implements Distribution {
* @param random Random number generator
*/
public WeibullDistribution(double k, double lambda, double theta, Random random) {
- super();
+ super(random);
+ this.k = k;
+ this.lambda = lambda;
+ this.theta = theta;
+ }
+
+ /**
+ * Constructor.
+ *
+ * @param k Shape parameter
+ * @param lambda Scale parameter
+ * @param theta Shift offset parameter
+ * @param random Random number generator
+ */
+ public WeibullDistribution(double k, double lambda, double theta, RandomFactory random) {
+ super(random);
this.k = k;
this.lambda = lambda;
this.theta = theta;
- this.random = random;
}
@Override
@@ -179,4 +192,41 @@ public class WeibullDistribution implements Distribution {
public String toString() {
return "WeibullDistribution(k=" + k + ", lambda=" + lambda + ", theta=" + theta + ")";
}
+
+ /**
+ * Parameterization class
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractDistribution.Parameterizer {
+ /** Parameters. */
+ double theta, k, lambda;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ DoubleParameter thetaP = new DoubleParameter(LOCATION_ID, 0.);
+ if (config.grab(thetaP)) {
+ theta = thetaP.doubleValue();
+ }
+
+ DoubleParameter lambdaP = new DoubleParameter(SCALE_ID);
+ if (config.grab(lambdaP)) {
+ lambda = lambdaP.doubleValue();
+ }
+
+ DoubleParameter kP = new DoubleParameter(SHAPE_ID);
+ if (config.grab(kP)) {
+ k = kP.doubleValue();
+ }
+ }
+
+ @Override
+ protected WeibullDistribution makeInstance() {
+ return new WeibullDistribution(theta, k, lambda, rnd);
+ }
+ }
}
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/GammaChoiWetteEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/GammaChoiWetteEstimator.java
index d41881f0..9e47e81d 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/GammaChoiWetteEstimator.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/GammaChoiWetteEstimator.java
@@ -72,7 +72,7 @@ public class GammaChoiWetteEstimator implements DistributionEstimator<GammaDistr
meanlogx += deltalogx / (i + 1.);
}
// Initial approximation
- final double logmeanx = Math.log(meanx);
+ final double logmeanx = (meanx > 0) ? Math.log(meanx) : meanlogx;
final double diff = logmeanx - meanlogx;
double k = (3 - diff + Math.sqrt((diff - 3) * (diff - 3) + 24 * diff)) / (12 * diff);
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LaplaceLMMEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LaplaceLMMEstimator.java
index 1e31af28..4026fdc5 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LaplaceLMMEstimator.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LaplaceLMMEstimator.java
@@ -30,7 +30,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
*
* @author Erich Schubert
*
- * @apiviz.has ExponentialDistribution
+ * @apiviz.has LaplaceDistribution
*/
public class LaplaceLMMEstimator extends AbstractLMMEstimator<LaplaceDistribution> {
/**
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LaplaceMADEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LaplaceMADEstimator.java
index d4671362..6fe6da0f 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LaplaceMADEstimator.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LaplaceMADEstimator.java
@@ -38,7 +38,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
*
* @author Erich Schubert
*
- * @apiviz.has ExponentialDistribution
+ * @apiviz.has LaplaceDistribution
*/
@Reference(title = "Applied Robust Statistics", authors = "D. J. Olive", booktitle = "Applied Robust Statistics", url="http://lagrange.math.siu.edu/Olive/preprints.htm")
public class LaplaceMADEstimator extends AbstractMADEstimator<LaplaceDistribution> {
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LaplaceMLEEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LaplaceMLEEstimator.java
index f44e2b3a..8d2c5707 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LaplaceMLEEstimator.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LaplaceMLEEstimator.java
@@ -42,7 +42,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
*
* @author Erich Schubert
*
- * @apiviz.has ExponentialDistribution
+ * @apiviz.has LaplaceDistribution
*/
@Reference(title = "The Double Exponential Distribution: Using Calculus to Find a Maximum Likelihood Estimator", authors = "R. M. Norton", booktitle = "The American Statistician 38 (2)", url = "http://dx.doi.org/10.2307%2F2683252")
public class LaplaceMLEEstimator implements DistributionEstimator<LaplaceDistribution> {
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/UniformMinMaxEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/UniformMinMaxEstimator.java
index e9870884..11bb8231 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/UniformMinMaxEstimator.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/UniformMinMaxEstimator.java
@@ -74,7 +74,8 @@ public class UniformMinMaxEstimator implements DistributionEstimator<UniformDist
/**
* Estimate parameters from minimum and maximum observed.
*
- * @param mm Minimum and Maximum
+ * @param min Minimum
+ * @param max Maximum
* @return Estimation
*/
public Distribution estimate(double min, double max) {
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/meta/BestFitEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/meta/BestFitEstimator.java
index dee3cbb3..8d57e0b7 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/meta/BestFitEstimator.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/meta/BestFitEstimator.java
@@ -87,11 +87,11 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
*
* @author Erich Schubert
*
- * @apiviz.composedOf MOMDistributionEstimator
- * @apiviz.composedOf MADDistributionEstimator
- * @apiviz.composedOf LMMDistributionEstimator
- * @apiviz.composedOf LogMOMDistributionEstimator
- * @apiviz.composedOf LogMADDistributionEstimator
+ * @apiviz.uses MOMDistributionEstimator
+ * @apiviz.uses MADDistributionEstimator
+ * @apiviz.uses LMMDistributionEstimator
+ * @apiviz.uses LogMOMDistributionEstimator
+ * @apiviz.uses LogMADDistributionEstimator
*/
public class BestFitEstimator implements DistributionEstimator<Distribution> {
/**
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/meta/TrimmedEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/meta/TrimmedEstimator.java
index 5c1cf448..a78d9760 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/meta/TrimmedEstimator.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/meta/TrimmedEstimator.java
@@ -31,8 +31,7 @@ import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayLikeUtil;
import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
@@ -43,6 +42,8 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
*
* @author Erich Schubert
*
+ * @apiviz.uses DistributionEstimator
+ *
* @param <D> Distribution type
*/
public class TrimmedEstimator<D extends Distribution> implements DistributionEstimator<D> {
@@ -75,7 +76,7 @@ public class TrimmedEstimator<D extends Distribution> implements DistributionEst
final int cut = ((int) (len * trim)) >> 1;
// X positions of samples
double[] x = new double[len];
- for (int i = 0; i < len; i++) {
+ for(int i = 0; i < len; i++) {
final double val = adapter.getDouble(data, i);
x[i] = val;
}
@@ -136,14 +137,14 @@ public class TrimmedEstimator<D extends Distribution> implements DistributionEst
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
ObjectParameter<DistributionEstimator<D>> innerP = new ObjectParameter<>(INNER_ID, DistributionEstimator.class);
- if (config.grab(innerP)) {
+ if(config.grab(innerP)) {
inner = innerP.instantiateClass(config);
}
DoubleParameter trimP = new DoubleParameter(TRIM_ID);
- trimP.addConstraint(new GreaterConstraint(0.));
- trimP.addConstraint(new LessConstraint(0.5));
- if (config.grab(trimP)) {
+ trimP.addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE);
+ trimP.addConstraint(CommonConstraints.LESS_THAN_HALF_DOUBLE);
+ if(config.grab(trimP)) {
trim = trimP.doubleValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/meta/WinsorisingEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/meta/WinsorisingEstimator.java
index 0ef6318d..47fe427e 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/meta/WinsorisingEstimator.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/meta/WinsorisingEstimator.java
@@ -31,8 +31,7 @@ import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
@@ -53,6 +52,8 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
*
* @author Erich Schubert
*
+ * @apiviz.uses DistributionEstimator
+ *
* @param <D> Distribution type
*/
@Reference(authors = "C. Hastings, F. Mosteller, J. W. Tukey, C. P. Winsor", title = "Low moments for small samples: a comparative study of order statistics", booktitle = "The Annals of Mathematical Statistics, 18(3)", url = "http://dx.doi.org/10.1214/aoms/1177730388")
@@ -86,7 +87,7 @@ public class WinsorisingEstimator<D extends Distribution> implements Distributio
final int cut = ((int) (len * winsorize)) >> 1;
// X positions of samples
double[] x = new double[len];
- for (int i = 0; i < len; i++) {
+ for(int i = 0; i < len; i++) {
final double val = adapter.getDouble(data, i);
x[i] = val;
}
@@ -95,7 +96,7 @@ public class WinsorisingEstimator<D extends Distribution> implements Distributio
double max = QuickSelect.quickSelect(x, cut, len, len - 1 - cut);
// Winsorize by replacing the smallest and largest values.
// QuickSelect ensured that these are correctly in place.
- for (int i = 0, j = len - 1; i < cut; i++, j--) {
+ for(int i = 0, j = len - 1; i < cut; i++, j--) {
x[i] = min;
x[j] = max;
}
@@ -146,14 +147,14 @@ public class WinsorisingEstimator<D extends Distribution> implements Distributio
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
ObjectParameter<DistributionEstimator<D>> innerP = new ObjectParameter<>(INNER_ID, DistributionEstimator.class);
- if (config.grab(innerP)) {
+ if(config.grab(innerP)) {
inner = innerP.instantiateClass(config);
}
DoubleParameter trimP = new DoubleParameter(WINSORIZE_ID);
- trimP.addConstraint(new GreaterConstraint(0.));
- trimP.addConstraint(new LessConstraint(0.5));
- if (config.grab(trimP)) {
+ trimP.addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE);
+ trimP.addConstraint(CommonConstraints.LESS_THAN_HALF_DOUBLE);
+ if(config.grab(trimP)) {
winsorize = trimP.doubleValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/meta/package-info.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/meta/package-info.java
index c4b75f2d..c06be5d7 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/meta/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/meta/package-info.java
@@ -3,4 +3,27 @@
*
* @author Erich Schubert
*/
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.meta; \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/package-info.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/package-info.java
index 62c98262..9a9f0993 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/package-info.java
@@ -2,6 +2,8 @@
* Estimators for statistical distributions.
*
* @author Erich Schubert
+ *
+ * @apiviz.exclude de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.meta.*
*/
package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator;
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/kernelfunctions/package-info.java b/src/de/lmu/ifi/dbs/elki/math/statistics/kernelfunctions/package-info.java
index adcadcaf..219509a5 100644
--- a/src/de/lmu/ifi/dbs/elki/math/statistics/kernelfunctions/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/math/statistics/kernelfunctions/package-info.java
@@ -1,4 +1,27 @@
/**
* Kernel functions from statistics.
*/
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
package de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions; \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/persistent/AbstractPageFileFactory.java b/src/de/lmu/ifi/dbs/elki/persistent/AbstractPageFileFactory.java
index 16094f90..3dce60eb 100644
--- a/src/de/lmu/ifi/dbs/elki/persistent/AbstractPageFileFactory.java
+++ b/src/de/lmu/ifi/dbs/elki/persistent/AbstractPageFileFactory.java
@@ -25,7 +25,7 @@ package de.lmu.ifi.dbs.elki.persistent;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -88,8 +88,8 @@ public abstract class AbstractPageFileFactory<P extends Page> implements PageFil
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
final IntParameter pageSizeP = new IntParameter(PAGE_SIZE_ID, 4000);
- pageSizeP.addConstraint(new GreaterConstraint(0));
- if (config.grab(pageSizeP)) {
+ pageSizeP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
+ if(config.grab(pageSizeP)) {
pageSize = pageSizeP.getValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/persistent/LRUCachePageFileFactory.java b/src/de/lmu/ifi/dbs/elki/persistent/LRUCachePageFileFactory.java
index a87f8e1f..54c3e58f 100644
--- a/src/de/lmu/ifi/dbs/elki/persistent/LRUCachePageFileFactory.java
+++ b/src/de/lmu/ifi/dbs/elki/persistent/LRUCachePageFileFactory.java
@@ -2,7 +2,7 @@ package de.lmu.ifi.dbs.elki.persistent;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
@@ -116,13 +116,13 @@ public class LRUCachePageFileFactory<P extends Page> implements PageFileFactory<
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
ObjectParameter<PageFileFactory<Page>> pffP = new ObjectParameter<>(PAGEFILE_ID, PageFileFactory.class, PersistentPageFileFactory.class);
- if (config.grab(pffP)) {
+ if(config.grab(pffP)) {
pageFileFactory = pffP.instantiateClass(config);
}
IntParameter cacheSizeP = new IntParameter(CACHE_SIZE_ID);
- cacheSizeP.addConstraint(new GreaterEqualConstraint(0));
- if (config.grab(cacheSizeP)) {
+ cacheSizeP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_INT);
+ if(config.grab(cacheSizeP)) {
cacheSize = cacheSizeP.getValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/BitsUtil.java b/src/de/lmu/ifi/dbs/elki/utilities/BitsUtil.java
index d1e0587c..64d5be25 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/BitsUtil.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/BitsUtil.java
@@ -42,16 +42,24 @@ public final class BitsUtil {
*/
private static final int LONG_LOG2_SIZE = 6;
- /**
- * Masking for long shifts.
- */
+ /** Masking for long shifts. */
private static final int LONG_LOG2_MASK = 0x3f; // 6 bits
- /**
- * Long with all bits set
- */
+ /** Long with all bits set */
private static final long LONG_ALL_BITS = -1L;
+ /** Long, with 63 bits set */
+ private static final long LONG_63_BITS = 0x7FFFFFFFFFFFFFFFL;
+
+ /** Masking 32 bit **/
+ private static final long LONG_32_BITS = 0xFFFFFFFFL;
+
+ /** Precomputed powers of 5 for pow5, pow10 on the bit representation. */
+ private static final int[] POW5_INT = { //
+ 1, 5, 25, 125, 625,//
+ 3125, 15625, 78125, 390625, 1953125,//
+ 9765625, 48828125, 244140625, 1220703125 };
+
/**
* Allocate a new long[].
*
@@ -1177,4 +1185,161 @@ public final class BitsUtil {
}
return 0;
}
+
+ public static double lpow2(long m, int n) {
+ if (m == 0) {
+ return 0.0;
+ }
+ if (m == Long.MIN_VALUE) {
+ return lpow2(Long.MIN_VALUE >> 1, n + 1);
+ }
+ if (m < 0) {
+ return -lpow2(-m, n);
+ }
+ assert(m >= 0);
+ int bitLength = magnitude(m);
+ int shift = bitLength - 53;
+ long exp = 1023L + 52 + n + shift; // Use long to avoid overflow.
+ if (exp >= 0x7FF) {
+ return Double.POSITIVE_INFINITY;
+ }
+ if (exp <= 0) { // Degenerated number (subnormal, assume 0 for bit 52)
+ if (exp <= -54) {
+ return 0.0;
+ }
+ return lpow2(m, n + 54) / 18014398509481984L; // 2^54 Exact.
+ }
+ // Normal number.
+ long bits = (shift > 0) ? (m >> shift) + ((m >> (shift - 1)) & 1) : // Rounding.
+ m << -shift;
+ if (((bits >> 52) != 1) && (++exp >= 0x7FF)) {
+ return Double.POSITIVE_INFINITY;
+ }
+ bits &= 0x000fffffffffffffL; // Clears MSB (bit 52)
+ bits |= exp << 52;
+ return Double.longBitsToDouble(bits);
+ }
+
+ /**
+ * Compute {@code m * Math.pow(10,e)} on the bit representation, for
+ * assembling a floating point decimal value.
+ *
+ * @param m Mantisse
+ * @param n Exponent to base 10.
+ * @return Double value.
+ */
+ public static double lpow10(long m, int n) {
+ if (m == 0) {
+ return 0.0;
+ }
+ if (m == Long.MIN_VALUE) {
+ return lpow10(Long.MIN_VALUE / 10, n + 1);
+ }
+ if (m < 0) {
+ return -lpow10(-m, n);
+ }
+ if (n >= 0) { // Positive power.
+ if (n > 308) {
+ return Double.POSITIVE_INFINITY;
+ }
+ // Works with 4 x 32 bits registers (x3:x2:x1:x0)
+ long x0 = 0; // 32 bits.
+ long x1 = 0; // 32 bits.
+ long x2 = m & LONG_32_BITS; // 32 bits.
+ long x3 = m >>> 32; // 32 bits.
+ int pow2 = 0;
+ while (n != 0) {
+ int i = (n >= POW5_INT.length) ? POW5_INT.length - 1 : n;
+ int coef = POW5_INT[i]; // 31 bits max.
+
+ if (((int) x0) != 0) {
+ x0 *= coef; // 63 bits max.
+ }
+ if (((int) x1) != 0) {
+ x1 *= coef; // 63 bits max.
+ }
+ x2 *= coef; // 63 bits max.
+ x3 *= coef; // 63 bits max.
+
+ x1 += x0 >>> 32;
+ x0 &= LONG_32_BITS;
+
+ x2 += x1 >>> 32;
+ x1 &= LONG_32_BITS;
+
+ x3 += x2 >>> 32;
+ x2 &= LONG_32_BITS;
+
+ // Adjusts powers.
+ pow2 += i;
+ n -= i;
+
+ // Normalizes (x3 should be 32 bits max).
+ long carry = x3 >>> 32;
+ if (carry != 0) { // Shift.
+ x0 = x1;
+ x1 = x2;
+ x2 = x3 & LONG_32_BITS;
+ x3 = carry;
+ pow2 += 32;
+ }
+ }
+
+ // Merges registers to a 63 bits mantissa.
+ assert(x3 >= 0);
+ int shift = 31 - magnitude(x3); // -1..30
+ pow2 -= shift;
+ long mantissa = (shift < 0) ? (x3 << 31) | (x2 >>> 1) : // x3 is 32 bits.
+ (((x3 << 32) | x2) << shift) | (x1 >>> (32 - shift));
+ return lpow2(mantissa, pow2);
+ } else { // n < 0
+ if (n < -324 - 20) {
+ return 0.;
+ }
+
+ // Works with x1:x0 126 bits register.
+ long x1 = m; // 63 bits.
+ long x0 = 0; // 63 bits.
+ int pow2 = 0;
+ while (true) {
+ // Normalizes x1:x0
+ assert(x1 >= 0);
+ int shift = 63 - magnitude(x1);
+ x1 <<= shift;
+ x1 |= x0 >>> (63 - shift);
+ x0 = (x0 << shift) & LONG_63_BITS;
+ pow2 -= shift;
+
+ // Checks if division has to be performed.
+ if (n == 0) {
+ break; // Done.
+ }
+
+ // Retrieves power of 5 divisor.
+ int i = (-n >= POW5_INT.length) ? POW5_INT.length - 1 : -n;
+ int divisor = POW5_INT[i];
+
+ // Performs the division (126 bits by 31 bits).
+ long wh = (x1 >>> 32);
+ long qh = wh / divisor;
+ long r = wh - qh * divisor;
+ long wl = (r << 32) | (x1 & LONG_32_BITS);
+ long ql = wl / divisor;
+ r = wl - ql * divisor;
+ x1 = (qh << 32) | ql;
+
+ wh = (r << 31) | (x0 >>> 32);
+ qh = wh / divisor;
+ r = wh - qh * divisor;
+ wl = (r << 32) | (x0 & LONG_32_BITS);
+ ql = wl / divisor;
+ x0 = (qh << 32) | ql;
+
+ // Adjusts powers.
+ n += i;
+ pow2 -= i;
+ }
+ return lpow2(x1, pow2);
+ }
+ }
}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/FormatUtil.java b/src/de/lmu/ifi/dbs/elki/utilities/FormatUtil.java
index 08b7bd0d..4601cce9 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/FormatUtil.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/FormatUtil.java
@@ -167,10 +167,11 @@ public final class FormatUtil {
*/
public static String format(double[] d, String sep) {
StringBuilder buffer = new StringBuilder();
- for (int i = 0; i < d.length; i++) {
- if (i > 0) {
+ for(int i = 0; i < d.length; i++) {
+ if(i > 0) {
buffer.append(sep).append(d[i]);
- } else {
+ }
+ else {
buffer.append(d[i]);
}
}
@@ -189,10 +190,11 @@ public final class FormatUtil {
*/
public static String format(double[] d, String sep, int digits) {
StringBuilder buffer = new StringBuilder();
- for (int i = 0; i < d.length; i++) {
- if (i < d.length - 1) {
+ for(int i = 0; i < d.length; i++) {
+ if(i < d.length - 1) {
buffer.append(format(d[i], digits)).append(sep);
- } else {
+ }
+ else {
buffer.append(format(d[i], digits));
}
}
@@ -221,10 +223,11 @@ public final class FormatUtil {
*/
public static String format(double[] d, String sep, NumberFormat nf) {
StringBuilder buffer = new StringBuilder();
- for (int i = 0; i < d.length; i++) {
- if (i < d.length - 1) {
+ for(int i = 0; i < d.length; i++) {
+ if(i < d.length - 1) {
buffer.append(format(d[i], nf)).append(sep);
- } else {
+ }
+ else {
buffer.append(format(d[i], nf));
}
}
@@ -261,7 +264,7 @@ public final class FormatUtil {
*/
public static String format(double[][] d) {
StringBuilder buffer = new StringBuilder();
- for (double[] array : d) {
+ for(double[] array : d) {
buffer.append(format(array, ", ", 2)).append('\n');
}
return buffer.toString();
@@ -280,10 +283,11 @@ public final class FormatUtil {
public static String format(double[][] d, String sep1, String sep2, int digits) {
StringBuilder buffer = new StringBuilder();
- for (int i = 0; i < d.length; i++) {
- if (i < d.length - 1) {
+ for(int i = 0; i < d.length; i++) {
+ if(i < d.length - 1) {
buffer.append(format(d[i], sep2, digits)).append(sep1);
- } else {
+ }
+ else {
buffer.append(format(d[i], sep2, digits));
}
}
@@ -303,10 +307,11 @@ public final class FormatUtil {
*/
public static String format(Double[] f, String sep, int digits) {
StringBuilder buffer = new StringBuilder();
- for (int i = 0; i < f.length; i++) {
- if (i < f.length - 1) {
+ for(int i = 0; i < f.length; i++) {
+ if(i < f.length - 1) {
buffer.append(format(f[i].doubleValue(), digits)).append(sep);
- } else {
+ }
+ else {
buffer.append(format(f[i].doubleValue(), digits));
}
}
@@ -335,10 +340,11 @@ public final class FormatUtil {
*/
public static String format(Double[] f, String sep, NumberFormat nf) {
StringBuilder buffer = new StringBuilder();
- for (int i = 0; i < f.length; i++) {
- if (i < f.length - 1) {
+ for(int i = 0; i < f.length; i++) {
+ if(i < f.length - 1) {
buffer.append(format(f[i].doubleValue(), nf)).append(sep);
- } else {
+ }
+ else {
buffer.append(format(f[i].doubleValue(), nf));
}
}
@@ -368,10 +374,11 @@ public final class FormatUtil {
*/
public static String format(float[] f, String sep, int digits) {
StringBuilder buffer = new StringBuilder();
- for (int i = 0; i < f.length; i++) {
- if (i < f.length - 1) {
+ for(int i = 0; i < f.length; i++) {
+ if(i < f.length - 1) {
buffer.append(format(f[i], digits)).append(sep);
- } else {
+ }
+ else {
buffer.append(format(f[i], digits));
}
}
@@ -398,10 +405,11 @@ public final class FormatUtil {
*/
public static String format(int[] a, String sep) {
StringBuilder buffer = new StringBuilder();
- for (int i = 0; i < a.length; i++) {
- if (i < a.length - 1) {
+ for(int i = 0; i < a.length; i++) {
+ if(i < a.length - 1) {
buffer.append(a[i]).append(sep);
- } else {
+ }
+ else {
buffer.append(a[i]);
}
}
@@ -428,10 +436,11 @@ public final class FormatUtil {
*/
public static String format(Integer[] a, String sep) {
StringBuilder buffer = new StringBuilder();
- for (int i = 0; i < a.length; i++) {
- if (i < a.length - 1) {
+ for(int i = 0; i < a.length; i++) {
+ if(i < a.length - 1) {
buffer.append(a[i]).append(sep);
- } else {
+ }
+ else {
buffer.append(a[i]);
}
}
@@ -456,10 +465,11 @@ public final class FormatUtil {
*/
public static String format(long[] a) {
StringBuilder buffer = new StringBuilder();
- for (int i = 0; i < a.length; i++) {
- if (i < a.length - 1) {
+ for(int i = 0; i < a.length; i++) {
+ if(i < a.length - 1) {
buffer.append(a[i]).append(", ");
- } else {
+ }
+ else {
buffer.append(a[i]);
}
}
@@ -474,10 +484,11 @@ public final class FormatUtil {
*/
public static String format(byte[] a) {
StringBuilder buffer = new StringBuilder();
- for (int i = 0; i < a.length; i++) {
- if (i < a.length - 1) {
+ for(int i = 0; i < a.length; i++) {
+ if(i < a.length - 1) {
buffer.append(a[i]).append(", ");
- } else {
+ }
+ else {
buffer.append(a[i]);
}
}
@@ -494,10 +505,11 @@ public final class FormatUtil {
*/
public static String format(boolean[] b, final String sep) {
StringBuilder buffer = new StringBuilder();
- for (int i = 0; i < b.length; i++) {
- if (i < b.length - 1) {
+ for(int i = 0; i < b.length; i++) {
+ if(i < b.length - 1) {
buffer.append(format(b[i])).append(sep);
- } else {
+ }
+ else {
buffer.append(format(b[i]));
}
}
@@ -511,7 +523,7 @@ public final class FormatUtil {
* @return a String representing of the boolean b
*/
public static String format(final boolean b) {
- if (b) {
+ if(b) {
return "1";
}
return "0";
@@ -528,13 +540,14 @@ public final class FormatUtil {
public static String format(BitSet bitSet, int dim, String sep) {
StringBuilder msg = new StringBuilder();
- for (int d = 0; d < dim; d++) {
- if (d > 0) {
+ for(int d = 0; d < dim; d++) {
+ if(d > 0) {
msg.append(sep);
}
- if (bitSet.get(d)) {
+ if(bitSet.get(d)) {
msg.append('1');
- } else {
+ }
+ else {
msg.append('0');
}
}
@@ -563,16 +576,16 @@ public final class FormatUtil {
* @return a String representing the String Collection d
*/
public static String format(Collection<String> d, String sep) {
- if (d.size() == 0) {
+ if(d.size() == 0) {
return "";
}
- if (d.size() == 1) {
+ if(d.size() == 1) {
return d.iterator().next();
}
StringBuilder buffer = new StringBuilder();
boolean first = true;
- for (String str : d) {
- if (!first) {
+ for(String str : d) {
+ if(!first) {
buffer.append(sep);
}
buffer.append(str);
@@ -600,12 +613,12 @@ public final class FormatUtil {
int width = w + 1;
StringBuilder msg = new StringBuilder();
msg.append('\n'); // start on new line.
- for (int i = 0; i < m.getRowDimensionality(); i++) {
- for (int j = 0; j < m.getColumnDimensionality(); j++) {
+ for(int i = 0; i < m.getRowDimensionality(); i++) {
+ for(int j = 0; j < m.getColumnDimensionality(); j++) {
String s = format.format(m.get(i, j)); // format the number
int padding = Math.max(1, width - s.length()); // At _least_ 1
// space
- for (int k = 0; k < padding; k++) {
+ for(int k = 0; k < padding; k++) {
msg.append(' ');
}
msg.append(s);
@@ -636,11 +649,11 @@ public final class FormatUtil {
int width = w + 1;
StringBuilder msg = new StringBuilder();
msg.append('\n'); // start on new line.
- for (int i = 0; i < v.getDimensionality(); i++) {
+ for(int i = 0; i < v.getDimensionality(); i++) {
String s = format.format(v.get(i)); // format the number
int padding = Math.max(1, width - s.length()); // At _least_ 1
// space
- for (int k = 0; k < padding; k++) {
+ for(int k = 0; k < padding; k++) {
msg.append(' ');
}
msg.append(s);
@@ -660,11 +673,11 @@ public final class FormatUtil {
public static String format(Matrix m, String pre) {
StringBuilder output = new StringBuilder();
output.append(pre).append("[\n").append(pre);
- for (int i = 0; i < m.getRowDimensionality(); i++) {
+ for(int i = 0; i < m.getRowDimensionality(); i++) {
output.append(" [");
- for (int j = 0; j < m.getColumnDimensionality(); j++) {
+ for(int j = 0; j < m.getColumnDimensionality(); j++) {
output.append(' ').append(m.get(i, j));
- if (j < m.getColumnDimensionality() - 1) {
+ if(j < m.getColumnDimensionality() - 1) {
output.append(',');
}
}
@@ -685,26 +698,26 @@ public final class FormatUtil {
public static String format(Matrix m, NumberFormat nf) {
int[] colMax = new int[m.getColumnDimensionality()];
String[][] entries = new String[m.getRowDimensionality()][m.getColumnDimensionality()];
- for (int i = 0; i < m.getRowDimensionality(); i++) {
- for (int j = 0; j < m.getColumnDimensionality(); j++) {
+ for(int i = 0; i < m.getRowDimensionality(); i++) {
+ for(int j = 0; j < m.getColumnDimensionality(); j++) {
entries[i][j] = nf.format(m.get(i, j));
- if (entries[i][j].length() > colMax[j]) {
+ if(entries[i][j].length() > colMax[j]) {
colMax[j] = entries[i][j].length();
}
}
}
StringBuilder output = new StringBuilder();
output.append("[\n");
- for (int i = 0; i < m.getRowDimensionality(); i++) {
+ for(int i = 0; i < m.getRowDimensionality(); i++) {
output.append(" [");
- for (int j = 0; j < m.getColumnDimensionality(); j++) {
+ for(int j = 0; j < m.getColumnDimensionality(); j++) {
output.append(' ');
int space = colMax[j] - entries[i][j].length();
- for (int s = 0; s < space; s++) {
+ for(int s = 0; s < space; s++) {
output.append(' ');
}
output.append(entries[i][j]);
- if (j < m.getColumnDimensionality() - 1) {
+ if(j < m.getColumnDimensionality() - 1) {
output.append(',');
}
}
@@ -754,9 +767,9 @@ public final class FormatUtil {
public static String format(Vector v, String pre) {
StringBuilder output = new StringBuilder();
output.append(pre).append("[\n").append(pre);
- for (int j = 0; j < v.getDimensionality(); j++) {
+ for(int j = 0; j < v.getDimensionality(); j++) {
output.append(' ').append(v.get(j));
- if (j < v.getDimensionality() - 1) {
+ if(j < v.getDimensionality() - 1) {
output.append(',');
}
}
@@ -774,32 +787,32 @@ public final class FormatUtil {
* @return a string representation of this matrix
*/
public static String format(Matrix m, String pre, NumberFormat nf) {
- if (nf == null) {
+ if(nf == null) {
return FormatUtil.format(m, pre);
}
int[] colMax = new int[m.getColumnDimensionality()];
String[][] entries = new String[m.getRowDimensionality()][m.getColumnDimensionality()];
- for (int i = 0; i < m.getRowDimensionality(); i++) {
- for (int j = 0; j < m.getColumnDimensionality(); j++) {
+ for(int i = 0; i < m.getRowDimensionality(); i++) {
+ for(int j = 0; j < m.getColumnDimensionality(); j++) {
entries[i][j] = nf.format(m.get(i, j));
- if (entries[i][j].length() > colMax[j]) {
+ if(entries[i][j].length() > colMax[j]) {
colMax[j] = entries[i][j].length();
}
}
}
StringBuilder output = new StringBuilder();
output.append(pre).append("[\n").append(pre);
- for (int i = 0; i < m.getRowDimensionality(); i++) {
+ for(int i = 0; i < m.getRowDimensionality(); i++) {
output.append(" [");
- for (int j = 0; j < m.getColumnDimensionality(); j++) {
+ for(int j = 0; j < m.getColumnDimensionality(); j++) {
output.append(' ');
int space = colMax[j] - entries[i][j].length();
- for (int s = 0; s < space; s++) {
+ for(int s = 0; s < space; s++) {
output.append(' ');
}
output.append(entries[i][j]);
- if (j < m.getColumnDimensionality() - 1) {
+ if(j < m.getColumnDimensionality() - 1) {
output.append(',');
}
}
@@ -821,22 +834,22 @@ public final class FormatUtil {
public static int findSplitpoint(String s, int width) {
// the newline (or EOS) is the fallback split position.
int in = s.indexOf(NEWLINE);
- if (in < 0) {
+ if(in < 0) {
in = s.length();
}
// Good enough?
- if (in < width) {
+ if(in < width) {
return in;
}
// otherwise, search for whitespace
int iw = s.lastIndexOf(' ', width);
// good whitespace found?
- if (iw >= 0 && iw < width) {
+ if(iw >= 0 && iw < width) {
return iw;
}
// sub-optimal splitpoint - retry AFTER the given position
int bp = nextPosition(s.indexOf(' ', width), s.indexOf(NEWLINE, width));
- if (bp >= 0) {
+ if(bp >= 0) {
return bp;
}
// even worse - can't split!
@@ -853,10 +866,10 @@ public final class FormatUtil {
* otherwise whichever is positive.
*/
private static int nextPosition(int a, int b) {
- if (a < 0) {
+ if(a < 0) {
return b;
}
- if (b < 0) {
+ if(b < 0) {
return a;
}
return Math.min(a, b);
@@ -874,19 +887,19 @@ public final class FormatUtil {
List<String> chunks = new ArrayList<>();
String tmp = s;
- while (tmp.length() > 0) {
+ while(tmp.length() > 0) {
int index = findSplitpoint(tmp, width);
// store first part
chunks.add(tmp.substring(0, index));
// skip whitespace at beginning of line
- while (index < tmp.length() && tmp.charAt(index) == ' ') {
+ while(index < tmp.length() && tmp.charAt(index) == ' ') {
index += 1;
}
// remove a newline
- if (index < tmp.length() && tmp.regionMatches(index, NEWLINE, 0, NEWLINE.length())) {
+ if(index < tmp.length() && tmp.regionMatches(index, NEWLINE, 0, NEWLINE.length())) {
index += NEWLINE.length();
}
- if (index >= tmp.length()) {
+ if(index >= tmp.length()) {
break;
}
tmp = tmp.substring(index);
@@ -902,11 +915,11 @@ public final class FormatUtil {
* @return a string with the specified number of blanks
*/
public static String whitespace(int n) {
- if (n < WHITESPACE_BUFFER.length()) {
+ if(n < WHITESPACE_BUFFER.length()) {
return WHITESPACE_BUFFER.substring(0, n);
}
char[] buf = new char[n];
- for (int i = 0; i < n; i++) {
+ for(int i = 0; i < n; i++) {
buf[i] = WHITESPACE_BUFFER.charAt(0);
}
return new String(buf);
@@ -920,7 +933,7 @@ public final class FormatUtil {
* @return padded string of at least length len (and o otherwise)
*/
public static String pad(String o, int len) {
- if (o.length() >= len) {
+ if(o.length() >= len) {
return o;
}
return o + whitespace(len - o.length());
@@ -934,7 +947,7 @@ public final class FormatUtil {
* @return padded string of at least length len (and o otherwise)
*/
public static String padRightAligned(String o, int len) {
- if (o.length() >= len) {
+ if(o.length() >= len) {
return o;
}
return whitespace(len - o.length()) + o;
@@ -950,9 +963,11 @@ public final class FormatUtil {
final int default_termwidth = 78;
try {
return Integer.parseInt(System.getenv("COLUMNS")) - 1;
- } catch (SecurityException e) {
+ }
+ catch(SecurityException e) {
return default_termwidth;
- } catch (NumberFormatException e) {
+ }
+ catch(NumberFormatException e) {
return default_termwidth;
}
}
@@ -967,18 +982,18 @@ public final class FormatUtil {
final StringBuilder sb = new StringBuilder();
final Formatter fmt = new Formatter(sb);
- for (int i = TIME_UNIT_SIZES.length - 1; i >= 0; --i) {
+ for(int i = TIME_UNIT_SIZES.length - 1; i >= 0; --i) {
// We do not include ms if we are in the order of minutes.
- if (i == 0 && sb.length() > 4) {
+ if(i == 0 && sb.length() > 4) {
continue;
}
// Separator
- if (sb.length() > 0) {
+ if(sb.length() > 0) {
sb.append(sep);
}
final long acValue = time / TIME_UNIT_SIZES[i];
time = time % TIME_UNIT_SIZES[i];
- if (!(acValue == 0 && sb.length() == 0)) {
+ if(!(acValue == 0 && sb.length() == 0)) {
fmt.format("%0" + TIME_UNIT_DIGITS[i] + "d%s", Long.valueOf(acValue), TIME_UNIT_NAMES[i]);
}
}
@@ -996,13 +1011,341 @@ public final class FormatUtil {
*/
public static String format(String[] d, String sep) {
StringBuilder buffer = new StringBuilder();
- for (int i = 0; i < d.length; i++) {
- if (i > 0) {
+ for(int i = 0; i < d.length; i++) {
+ if(i > 0) {
buffer.append(sep).append(d[i]);
- } else {
+ }
+ else {
buffer.append(d[i]);
}
}
return buffer.toString();
}
+
+ /**
+ * Preallocated exceptions.
+ */
+ private static final NumberFormatException EXPONENT_OVERFLOW = new NumberFormatException("Precision overflow for double exponent.") {
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public synchronized Throwable fillInStackTrace() {
+ return this;
+ }
+ };
+
+ /**
+ * Preallocated exceptions.
+ */
+ private static final NumberFormatException INVALID_EXPONENT = new NumberFormatException("Invalid exponent") {
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public synchronized Throwable fillInStackTrace() {
+ return this;
+ }
+ };
+
+ /**
+ * Preallocated exceptions.
+ */
+ private static final NumberFormatException TRAILING_CHARACTERS = new NumberFormatException("String sequence was not completely consumed.") {
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public synchronized Throwable fillInStackTrace() {
+ return this;
+ }
+ };
+
+ /**
+ * Preallocated exceptions.
+ */
+ private static final NumberFormatException PRECISION_OVERFLOW = new NumberFormatException("Precision overflow for long values.") {
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public synchronized Throwable fillInStackTrace() {
+ return this;
+ }
+ };
+
+ /**
+ * Preallocated exceptions.
+ */
+ private static final NumberFormatException NOT_A_NUMBER = new NumberFormatException("Number must start with a digit or dot.") {
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public synchronized Throwable fillInStackTrace() {
+ return this;
+ }
+ };
+
+ /**
+ * Parse a double from a character sequence.
+ *
+ * In contrast to Javas {@link Double#parseDouble}, this will <em>not</em>
+ * create an object and thus is expected to put less load on the garbage
+ * collector. It will accept some more spellings of NaN and infinity, thus
+ * removing the need for checking for these independently.
+ *
+ * @param str String
+ * @return Double value
+ */
+ public static double parseDouble(final CharSequence str) {
+ return parseDouble(str, 0, str.length());
+ }
+
+ /**
+ * Parse a double from a character sequence.
+ *
+ * In contrast to Javas {@link Double#parseDouble}, this will <em>not</em>
+ * create an object and thus is expected to put less load on the garbage
+ * collector. It will accept some more spellings of NaN and infinity, thus
+ * removing the need for checking for these independently.
+ *
+ * @param str String
+ * @param start Begin
+ * @param end End
+ * @return Double value
+ */
+ public static double parseDouble(final CharSequence str, final int start, final int end) {
+ // Current position and character.
+ int pos = start;
+ char cur = str.charAt(pos);
+
+ // Match for NaN spellings
+ if(matchNaN(str, cur, pos, end)) {
+ return Double.NaN;
+ }
+ // Match sign
+ boolean isNegative = (cur == '-');
+ // Carefully consume the - character, update c and i:
+ if((isNegative || (cur == '+')) && (++pos < end)) {
+ cur = str.charAt(pos);
+ }
+ if(matchInf(str, cur, pos, end)) {
+ return isNegative ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY;
+ }
+
+ // Begin parsing real numbers!
+ if(((cur < '0') || (cur > '9')) && (cur != '.')) {
+ throw NOT_A_NUMBER;
+ }
+
+ // Parse digits into a long, remember offset of decimal point.
+ long decimal = 0;
+ int decimalPoint = -1;
+ while(true) {
+ final int digit = cur - '0';
+ if((digit >= 0) && (digit <= 9)) {
+ final long tmp = (decimal << 3) + (decimal << 1) + digit;
+ if((decimal > MAX_LONG_OVERFLOW) || (tmp < decimal)) {
+ throw PRECISION_OVERFLOW;
+ }
+ decimal = tmp;
+ }
+ else if((cur == '.') && (decimalPoint < 0)) {
+ decimalPoint = pos;
+ }
+ else { // No more digits, or a second dot.
+ break;
+ }
+ if(++pos < end) {
+ cur = str.charAt(pos);
+ }
+ else {
+ break;
+ }
+ }
+ // We need the offset from the back for adjusting the exponent:
+ // Note that we need the current value of i!
+ decimalPoint = (decimalPoint >= 0) ? pos - decimalPoint - 1 : 0;
+
+ // Reads exponent.
+ int exp = 0;
+ if((pos < end) && ((cur == 'E') || (cur == 'e'))) {
+ cur = str.charAt(++pos);
+ final boolean isNegativeExp = (cur == '-');
+ if((isNegativeExp || (cur == '+')) && (++pos < end)) {
+ cur = str.charAt(pos);
+ }
+ if((cur < '0') || (cur > '9')) { // At least one digit required.
+ throw INVALID_EXPONENT;
+ }
+ while(true) {
+ final int digit = cur - '0';
+ if((digit >= 0) && (digit < 10)) {
+ final int tmp = (exp << 3) + (exp << 1) + digit;
+ // Actually, double can only handle Double.MAX_EXPONENT? How about
+ // subnormal?
+ if((exp > MAX_INT_OVERFLOW) || (tmp < exp)) {
+ throw EXPONENT_OVERFLOW;
+ }
+ exp = tmp;
+ }
+ else {
+ break;
+ }
+ if(++pos < end) {
+ cur = str.charAt(pos);
+ }
+ else {
+ break;
+ }
+ }
+ if(isNegativeExp) {
+ exp = -exp;
+ }
+ }
+ // Adjust exponent by the offset of the dot in our long.
+ if(decimalPoint >= 0) {
+ exp = exp - decimalPoint;
+ }
+ if(pos != end) {
+ throw TRAILING_CHARACTERS;
+ }
+
+ return BitsUtil.lpow10(isNegative ? -decimal : decimal, exp);
+ }
+
+ /**
+ * Match "NaN" in a number of different capitalizations.
+ *
+ * @param str String to match
+ * @param firstchar First character
+ * @param start Interval begin
+ * @param end Interval end
+ * @return {@code true} when NaN was recognized.
+ */
+ private static boolean matchNaN(CharSequence str, char firstchar, int start, int end) {
+ final int len = end - start;
+ if(len < 2 || len > 3) {
+ return false;
+ }
+ if(firstchar != 'N' && firstchar != 'n') {
+ return false;
+ }
+ final char c1 = str.charAt(start + 1);
+ if(c1 != 'a' && c1 != 'A') {
+ return false;
+ }
+ // Accept just "NA", too:
+ if(len == 2) {
+ return true;
+ }
+ final char c2 = str.charAt(start + 2);
+ if(c2 != 'N' && c2 != 'n') {
+ return false;
+ }
+ return true;
+ }
+
+ /**
+ * Maximum long that we can process without overflowing.
+ */
+ private static final long MAX_LONG_OVERFLOW = Long.MAX_VALUE / 10;
+
+ /**
+ * Maximum integer that we can process without overflowing.
+ */
+ private static final int MAX_INT_OVERFLOW = Integer.MAX_VALUE / 10;
+
+ /**
+ * Infinity pattern, with any capitalization
+ */
+ private static final char[] INFINITY_PATTERN = { //
+ 'I', 'n', 'f', 'i', 'n', 'i', 't', 'y', //
+ 'i', 'N', 'F', 'I', 'N', 'I', 'T', 'Y' };
+
+ /** Length of pattern */
+ private static final int INFINITY_LENGTH = INFINITY_PATTERN.length >> 1;
+
+ /**
+ * Match "inf", "infinity" in a number of different capitalizations.
+ *
+ * @param str String to match
+ * @param firstchar First character
+ * @param start Interval begin
+ * @param end Interval end
+ * @return {@code true} when infinity was recognized.
+ */
+ private static boolean matchInf(CharSequence str, char firstchar, int start, int end) {
+ final int len = end - start;
+ // The wonders of unicode. This is more than one byte on UTF-8
+ if(len == 1 && firstchar == '∞') {
+ return true;
+ }
+ if(len != 3 && len != INFINITY_LENGTH) {
+ return false;
+ }
+ // Test beginning: "inf"
+ if(firstchar != 'I' && firstchar != 'i') {
+ return false;
+ }
+ for(int i = 1, j = INFINITY_LENGTH + 1; i < INFINITY_LENGTH; i++, j++) {
+ final char c = str.charAt(start + i);
+ if(c != INFINITY_PATTERN[i] && c != INFINITY_PATTERN[j]) {
+ return false;
+ }
+ if(i == 2 && len == 3) {
+ return true;
+ }
+ }
+ return true;
+ }
+
+ /**
+ * Parse a long integer from a character sequence.
+ *
+ * @param str String
+ * @param start Begin
+ * @param end End
+ * @return Double value
+ */
+ public static long parseLongBase10(final CharSequence str, final int start, final int end) {
+ // Current position and character.
+ int pos = start;
+ char cur = str.charAt(pos);
+
+ // Match sign
+ boolean isNegative = (cur == '-');
+ // Carefully consume the - character, update c and i:
+ if((isNegative || (cur == '+')) && (++pos < end)) {
+ cur = str.charAt(pos);
+ }
+
+ // Begin parsing real numbers!
+ if((cur < '0') || (cur > '9')) {
+ throw NOT_A_NUMBER;
+ }
+
+ // Parse digits into a long, remember offset of decimal point.
+ long decimal = 0;
+ while(true) {
+ final int digit = cur - '0';
+ if((digit >= 0) && (digit <= 9)) {
+ final long tmp = (decimal << 3) + (decimal << 1) + digit;
+ if(tmp < decimal) {
+ throw PRECISION_OVERFLOW;
+ }
+ decimal = tmp;
+ }
+ else { // No more digits, or a second dot.
+ break;
+ }
+ if(++pos < end) {
+ cur = str.charAt(pos);
+ }
+ else {
+ break;
+ }
+ }
+ if(pos != end) {
+ throw TRAILING_CHARACTERS;
+ }
+
+ return isNegative ? -decimal : decimal;
+ }
}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/InspectionUtil.java b/src/de/lmu/ifi/dbs/elki/utilities/InspectionUtil.java
index 0d147420..29745335 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/InspectionUtil.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/InspectionUtil.java
@@ -45,8 +45,6 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
* A collection of inspection-related utility functions.
*
* @author Erich Schubert
- *
- * @apiviz.uses InspectionUtilFrequentlyScanned
*/
public class InspectionUtil {
/**
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/RandomFactory.java b/src/de/lmu/ifi/dbs/elki/utilities/RandomFactory.java
index 9b870acb..229afe5f 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/RandomFactory.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/RandomFactory.java
@@ -86,4 +86,18 @@ public class RandomFactory {
return new Random();
}
}
+
+ /**
+ * Get a <em>non-threadsafe</em> random generator.
+ *
+ * @return Random generator
+ */
+ public Random getSingleThreadedRandom() {
+ if(seed != null) {
+ return new UnsafeRandom(seed.longValue());
+ }
+ else {
+ return new UnsafeRandom();
+ }
+ }
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/UnsafeRandom.java b/src/de/lmu/ifi/dbs/elki/utilities/UnsafeRandom.java
new file mode 100644
index 00000000..898b685a
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/UnsafeRandom.java
@@ -0,0 +1,81 @@
+package de.lmu.ifi.dbs.elki.utilities;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+import java.util.Random;
+
+/**
+ * Drop-in replacement for {@link java.util.Random}, but not using atomic long
+ * seeds. This implementation is <em>no longer thread-safe</em> (but faster)!
+ *
+ * @author Erich Schubert
+ */
+public class UnsafeRandom extends Random {
+ /**
+ * Serial version number.
+ */
+ private static final long serialVersionUID = 1L;
+
+ // These are the same constants as in {@link java.util.Random}
+ // since we want to leave the random sequence unchanged.
+ private static final long multiplier = 0x5DEECE66DL, addend = 0xBL,
+ mask = (1L << 48) - 1;
+
+ /**
+ * The random seed. We can't use super.seed.
+ */
+ private long seed;
+
+ /**
+ * Constructor called only by localRandom.initialValue.
+ */
+ public UnsafeRandom() {
+ super();
+ }
+
+ /**
+ * Constructor.
+ *
+ * @param seed Random generator seed.
+ */
+ public UnsafeRandom(long seed) {
+ this.seed = (seed ^ multiplier) & mask;
+ }
+
+ /**
+ * Throws {@code UnsupportedOperationException}. Setting seeds in this
+ * generator is not supported.
+ *
+ * @throws UnsupportedOperationException always
+ */
+ @Override
+ public void setSeed(long seed) {
+ this.seed = (seed ^ multiplier) & mask;
+ }
+
+ @Override
+ protected int next(int bits) {
+ seed = (seed * multiplier + addend) & mask;
+ return (int) (seed >>> (48 - bits));
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/Util.java b/src/de/lmu/ifi/dbs/elki/utilities/Util.java
index 439ef171..ffac6573 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/Util.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/Util.java
@@ -42,6 +42,16 @@ public final class Util {
private static final long HASHPRIME = 2654435761L;
/**
+ * Detect Java 7.
+ */
+ public static final boolean IS_JAVA7 = System.getProperty("java.version").startsWith("1.7.");
+
+ /**
+ * Detect Oracle Java.
+ */
+ public static final boolean IS_ORACLE_JAVA = System.getProperty("java.vm.vendor").startsWith("Oracle");
+
+ /**
* Fake constructor: do not instantiate.
*/
private Util() {
@@ -64,13 +74,14 @@ public final class Util {
assert (cardinality >= 0) : "Cannot set a negative number of bits!";
assert (cardinality < capacity) : "Cannot set " + cardinality + " of " + capacity + " bits!";
BitSet bitset = new BitSet(capacity);
- if (cardinality < capacity >>> 1) {
- while (bitset.cardinality() < cardinality) {
+ if(cardinality < capacity >>> 1) {
+ while(bitset.cardinality() < cardinality) {
bitset.set(random.nextInt(capacity));
}
- } else {
+ }
+ else {
bitset.flip(0, capacity);
- while (bitset.cardinality() > cardinality) {
+ while(bitset.cardinality() > cardinality) {
bitset.clear(random.nextInt(capacity));
}
}
@@ -119,11 +130,11 @@ public final class Util {
* @return Mixed hash code
*/
public static int mixHashCodes(int... hash) {
- if (hash.length == 0) {
+ if(hash.length == 0) {
return 0;
}
long result = hash[0];
- for (int i = 1; i < hash.length; i++) {
+ for(int i = 1; i < hash.length; i++) {
result = result * HASHPRIME + hash[i];
}
return (int) result;
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/QuickSelect.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/QuickSelect.java
index 3746ff87..d0a2cd20 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/QuickSelect.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/QuickSelect.java
@@ -1347,7 +1347,7 @@ public class QuickSelect {
pivot.seek(end - 1);
// Begin partitioning
- int i = start, j = end - 3;
+ int i = start, j = end - 2;
refi.seek(i);
refj.seek(j);
// This is classic quicksort stuff
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ArrayLikeUtil.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ArrayLikeUtil.java
index 8fab6f2b..f03d39e9 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ArrayLikeUtil.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/ArrayLikeUtil.java
@@ -66,6 +66,11 @@ public final class ArrayLikeUtil {
public static final NumberVectorAdapter<?> NUMBERVECTORADAPTER = new NumberVectorAdapter<Double>();
/**
+ * Adapter for matrixes, reinterpreted as flat arrays.
+ */
+ public static final FlatMatrixAdapter FLATMATRIXADAPTER = new FlatMatrixAdapter();
+
+ /**
* Use a double array in the array API.
*/
public static final NumberArrayAdapter<Double, double[]> DOUBLEARRAYADAPTER = new DoubleArrayAdapter();
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/FlatMatrixAdapter.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/FlatMatrixAdapter.java
new file mode 100644
index 00000000..18fbae5d
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/arraylike/FlatMatrixAdapter.java
@@ -0,0 +1,85 @@
+package de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
+
+/**
+ * Use a matrix as array, by flattening it into a sequence.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+class FlatMatrixAdapter implements NumberArrayAdapter<Double, Matrix> {
+ /**
+ * Constructor.
+ *
+ * Use the static instance from {@link ArrayLikeUtil}!
+ */
+ protected FlatMatrixAdapter() {
+ super();
+ }
+
+ @Override
+ public int size(Matrix array) {
+ return array.getColumnDimensionality() * array.getRowDimensionality();
+ }
+
+ @Override
+ @Deprecated
+ public Double get(Matrix array, int off) throws IndexOutOfBoundsException {
+ return Double.valueOf(getDouble(array, off));
+ }
+
+ @Override
+ public double getDouble(Matrix array, int off) throws IndexOutOfBoundsException {
+ return array.get(off / array.getColumnDimensionality(), off % array.getColumnDimensionality());
+ }
+
+ @Override
+ public float getFloat(Matrix array, int off) throws IndexOutOfBoundsException {
+ return (float) getDouble(array, off);
+ }
+
+ @Override
+ public int getInteger(Matrix array, int off) throws IndexOutOfBoundsException {
+ return (int) getDouble(array, off);
+ }
+
+ @Override
+ public short getShort(Matrix array, int off) throws IndexOutOfBoundsException {
+ return (short) getDouble(array, off);
+ }
+
+ @Override
+ public long getLong(Matrix array, int off) throws IndexOutOfBoundsException {
+ return (long) getDouble(array, off);
+ }
+
+ @Override
+ public byte getByte(Matrix array, int off) throws IndexOutOfBoundsException {
+ return (byte) getDouble(array, off);
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMaxHeap.java
index 222fe83a..d6937b4d 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMaxHeap.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMaxHeap.java
@@ -24,32 +24,11 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
*/
import java.util.Arrays;
-import java.util.ConcurrentModificationException;
import de.lmu.ifi.dbs.elki.math.MathUtil;
/**
- * Advanced priority queue class, based on a binary heap (for small sizes),
- * which will for larger heaps be accompanied by a 4-ary heap (attached below
- * the root of the two-ary heap, making the root actually 3-ary).
- *
- * This code was automatically instantiated for the type: Comparable
- *
- * This combination was found to work quite well in benchmarks, but YMMV.
- *
- * Some other observations from benchmarking:
- * <ul>
- * <li>Bulk loading did not improve things</li>
- * <li>Primitive heaps are substantially faster.</li>
- * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and
- * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li>
- * <li>Workload makes a huge difference. A load-once, poll-until-empty priority
- * queue is something different than e.g. a top-k heap, which will see a lot of
- * top element replacements.</li>
- * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for
- * top-k make a difference.</li>
- * <li>Different day, different benchmark results ...</li>
- * </ul>
+ * Binary heap for primitive types.
*
* @author Erich Schubert
*
@@ -63,44 +42,16 @@ public class ComparableMaxHeap<K extends Comparable<? super K>> implements Objec
protected Comparable<Object>[] twoheap;
/**
- * Extension heap.
- */
- protected Comparable<Object>[] fourheap;
-
- /**
* Current size of heap.
*/
protected int size;
/**
- * (Structural) modification counter. Used to invalidate iterators.
- */
- protected int modCount = 0;
-
- /**
- * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements.
- */
- private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1;
-
- /**
* Initial size of the 2-ary heap.
*/
private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1;
/**
- * Initial size of 4-ary heap when initialized.
- *
- * 21 = 4-ary heap of height 2: 1 + 4 + 4*4
- *
- * 85 = 4-ary heap of height 3: 21 + 4*4*4
- *
- * 341 = 4-ary heap of height 4: 85 + 4*4*4*4
- *
- * Since we last grew by 255 (to 511), let's use 341.
- */
- private final static int FOUR_HEAP_INITIAL_SIZE = 341;
-
- /**
* Constructor, with default size.
*/
@SuppressWarnings("unchecked")
@@ -109,9 +60,6 @@ public class ComparableMaxHeap<K extends Comparable<? super K>> implements Objec
Comparable<Object>[] twoheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, TWO_HEAP_INITIAL_SIZE);
this.twoheap = twoheap;
- this.fourheap = null;
- this.size = 0;
- this.modCount = 0;
}
/**
@@ -122,27 +70,15 @@ public class ComparableMaxHeap<K extends Comparable<? super K>> implements Objec
@SuppressWarnings("unchecked")
public ComparableMaxHeap(int minsize) {
super();
- if (minsize < TWO_HEAP_MAX_SIZE) {
- final int size = MathUtil.nextPow2Int(minsize + 1) - 1;
- Comparable<Object>[] twoheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, size);
+ final int size = MathUtil.nextPow2Int(minsize + 1) - 1;
+ Comparable<Object>[] twoheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, size);
- this.twoheap = twoheap;
- this.fourheap = null;
- } else {
- Comparable<Object>[] twoheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, TWO_HEAP_INITIAL_SIZE);
- Comparable<Object>[] fourheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, minsize - TWO_HEAP_MAX_SIZE);
- this.twoheap = twoheap;
- this.fourheap = fourheap;
- }
- this.size = 0;
- this.modCount = 0;
+ this.twoheap = twoheap;
}
@Override
public void clear() {
size = 0;
- ++modCount;
- fourheap = null;
Arrays.fill(twoheap, null);
}
@@ -161,29 +97,14 @@ public class ComparableMaxHeap<K extends Comparable<? super K>> implements Objec
public void add(K o) {
final Comparable<Object> co = (Comparable<Object>)o;
// System.err.println("Add: " + o);
- if (size < TWO_HEAP_MAX_SIZE) {
- if (size >= twoheap.length) {
- // Grow by one layer.
- twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1);
- }
- final int twopos = size;
- twoheap[twopos] = co;
- ++size;
- heapifyUp2(twopos, co);
- ++modCount;
- } else {
- final int fourpos = size - TWO_HEAP_MAX_SIZE;
- if (fourheap == null) {
- fourheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, FOUR_HEAP_INITIAL_SIZE);
- } else if (fourpos >= fourheap.length) {
- // Grow extension heap by half.
- fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1));
- }
- fourheap[fourpos] = co;
- ++size;
- heapifyUp4(fourpos, co);
- ++modCount;
+ if (size >= twoheap.length) {
+ // Grow by one layer.
+ twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1);
}
+ final int twopos = size;
+ twoheap[twopos] = co;
+ ++size;
+ heapifyUp(twopos, co);
}
@Override
@@ -200,7 +121,6 @@ public class ComparableMaxHeap<K extends Comparable<? super K>> implements Objec
public K replaceTopElement(K reinsert) {
final Comparable<Object> ret = twoheap[0];
heapifyDown((Comparable<Object>) reinsert);
- ++modCount;
return (K)ret;
}
@@ -210,7 +130,7 @@ public class ComparableMaxHeap<K extends Comparable<? super K>> implements Objec
* @param twopos Position in 2-ary heap.
* @param cur Current object
*/
- private void heapifyUp2(int twopos, Comparable<Object> cur) {
+ private void heapifyUp(int twopos, Comparable<Object> cur) {
while (twopos > 0) {
final int parent = (twopos - 1) >>> 1;
Comparable<Object> par = twoheap[parent];
@@ -223,81 +143,30 @@ public class ComparableMaxHeap<K extends Comparable<? super K>> implements Objec
twoheap[twopos] = cur;
}
- /**
- * Heapify-Up method for 4-ary heap.
- *
- * @param fourpos Position in 4-ary heap.
- * @param cur Current object
- */
- private void heapifyUp4(int fourpos, Comparable<Object> cur) {
- while (fourpos > 0) {
- final int parent = (fourpos - 1) >> 2;
- Comparable<Object> par = fourheap[parent];
- if (cur.compareTo(par) <= 0) {
- break;
- }
- fourheap[fourpos] = par;
- fourpos = parent;
- }
- if (fourpos == 0 && twoheap[0].compareTo(cur) < 0) {
- fourheap[0] = twoheap[0];
- twoheap[0] = cur;
- } else {
- fourheap[fourpos] = cur;
- }
- }
-
@Override
@SuppressWarnings("unchecked")
public K poll() {
final Comparable<Object> ret = twoheap[0];
--size;
// Replacement object:
- if (size >= TWO_HEAP_MAX_SIZE) {
- final int last = size - TWO_HEAP_MAX_SIZE;
- final Comparable<Object> reinsert = fourheap[last];
- fourheap[last] = null;
- heapifyDown(reinsert);
- } else if (size > 0) {
+ if (size > 0) {
final Comparable<Object> reinsert = twoheap[size];
twoheap[size] = null;
heapifyDown(reinsert);
} else {
twoheap[0] = null;
}
- ++modCount;
return (K)ret;
}
/**
* Invoke heapify-down for the root object.
*
- * @param reinsert Object to insert.
- */
- private void heapifyDown(Comparable<Object> reinsert) {
- if (size > TWO_HEAP_MAX_SIZE) {
- // Special case: 3-ary situation.
- final int best = (twoheap[1].compareTo(twoheap[2]) >= 0) ? 1 : 2;
- if (fourheap[0].compareTo(twoheap[best]) > 0) {
- twoheap[0] = fourheap[0];
- heapifyDown4(0, reinsert);
- } else {
- twoheap[0] = twoheap[best];
- heapifyDown2(best, reinsert);
- }
- return;
- }
- heapifyDown2(0, reinsert);
- }
-
- /**
- * Heapify-Down for 2-ary heap.
- *
- * @param twopos Position in 2-ary heap.
- * @param cur Current object
+ * @param cur Object to insert.
*/
- private void heapifyDown2(int twopos, Comparable<Object> cur) {
- final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1;
+ private void heapifyDown(Comparable<Object> cur) {
+ final int stop = size >>> 1;
+ int twopos = 0;
while (twopos < stop) {
int bestchild = (twopos << 1) + 1;
Comparable<Object> best = twoheap[bestchild];
@@ -315,51 +184,6 @@ public class ComparableMaxHeap<K extends Comparable<? super K>> implements Objec
twoheap[twopos] = cur;
}
- /**
- * Heapify-Down for 4-ary heap.
- *
- * @param fourpos Position in 4-ary heap.
- * @param cur Current object
- */
- private void heapifyDown4(int fourpos, Comparable<Object> cur) {
- final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2;
- while (fourpos < stop) {
- final int child = (fourpos << 2) + 1;
- Comparable<Object> best = fourheap[child];
- int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE;
- if (size > minsize) {
- Comparable<Object> nextchild = fourheap[candidate];
- if (best.compareTo(nextchild) < 0) {
- bestchild = candidate;
- best = nextchild;
- }
-
- minsize += 2;
- if (size >= minsize) {
- nextchild = fourheap[++candidate];
- if (best.compareTo(nextchild) < 0) {
- bestchild = candidate;
- best = nextchild;
- }
-
- if (size > minsize) {
- nextchild = fourheap[++candidate];
- if (best.compareTo(nextchild) < 0) {
- bestchild = candidate;
- best = nextchild;
- }
- }
- }
- }
- if (cur.compareTo(best) >= 0) {
- break;
- }
- fourheap[fourpos] = best;
- fourpos = bestchild;
- }
- fourheap[fourpos] = cur;
- }
-
@Override
@SuppressWarnings("unchecked")
public K peek() {
@@ -403,16 +227,8 @@ public class ComparableMaxHeap<K extends Comparable<? super K>> implements Objec
*/
protected int pos = 0;
- /**
- * Modification counter we were initialized at.
- */
- protected final int myModCount = modCount;
-
@Override
public boolean valid() {
- if (modCount != myModCount) {
- throw new ConcurrentModificationException();
- }
return pos < size;
}
@@ -425,7 +241,7 @@ public class ComparableMaxHeap<K extends Comparable<? super K>> implements Objec
@Override
public K get() {
- return (K)((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]);
+ return (K)twoheap[pos];
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMinHeap.java
index 3cc5a02f..167c6bc7 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMinHeap.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparableMinHeap.java
@@ -24,32 +24,11 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
*/
import java.util.Arrays;
-import java.util.ConcurrentModificationException;
import de.lmu.ifi.dbs.elki.math.MathUtil;
/**
- * Advanced priority queue class, based on a binary heap (for small sizes),
- * which will for larger heaps be accompanied by a 4-ary heap (attached below
- * the root of the two-ary heap, making the root actually 3-ary).
- *
- * This code was automatically instantiated for the type: Comparable
- *
- * This combination was found to work quite well in benchmarks, but YMMV.
- *
- * Some other observations from benchmarking:
- * <ul>
- * <li>Bulk loading did not improve things</li>
- * <li>Primitive heaps are substantially faster.</li>
- * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and
- * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li>
- * <li>Workload makes a huge difference. A load-once, poll-until-empty priority
- * queue is something different than e.g. a top-k heap, which will see a lot of
- * top element replacements.</li>
- * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for
- * top-k make a difference.</li>
- * <li>Different day, different benchmark results ...</li>
- * </ul>
+ * Binary heap for primitive types.
*
* @author Erich Schubert
*
@@ -63,44 +42,16 @@ public class ComparableMinHeap<K extends Comparable<? super K>> implements Objec
protected Comparable<Object>[] twoheap;
/**
- * Extension heap.
- */
- protected Comparable<Object>[] fourheap;
-
- /**
* Current size of heap.
*/
protected int size;
/**
- * (Structural) modification counter. Used to invalidate iterators.
- */
- protected int modCount = 0;
-
- /**
- * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements.
- */
- private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1;
-
- /**
* Initial size of the 2-ary heap.
*/
private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1;
/**
- * Initial size of 4-ary heap when initialized.
- *
- * 21 = 4-ary heap of height 2: 1 + 4 + 4*4
- *
- * 85 = 4-ary heap of height 3: 21 + 4*4*4
- *
- * 341 = 4-ary heap of height 4: 85 + 4*4*4*4
- *
- * Since we last grew by 255 (to 511), let's use 341.
- */
- private final static int FOUR_HEAP_INITIAL_SIZE = 341;
-
- /**
* Constructor, with default size.
*/
@SuppressWarnings("unchecked")
@@ -109,9 +60,6 @@ public class ComparableMinHeap<K extends Comparable<? super K>> implements Objec
Comparable<Object>[] twoheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, TWO_HEAP_INITIAL_SIZE);
this.twoheap = twoheap;
- this.fourheap = null;
- this.size = 0;
- this.modCount = 0;
}
/**
@@ -122,27 +70,15 @@ public class ComparableMinHeap<K extends Comparable<? super K>> implements Objec
@SuppressWarnings("unchecked")
public ComparableMinHeap(int minsize) {
super();
- if (minsize < TWO_HEAP_MAX_SIZE) {
- final int size = MathUtil.nextPow2Int(minsize + 1) - 1;
- Comparable<Object>[] twoheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, size);
+ final int size = MathUtil.nextPow2Int(minsize + 1) - 1;
+ Comparable<Object>[] twoheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, size);
- this.twoheap = twoheap;
- this.fourheap = null;
- } else {
- Comparable<Object>[] twoheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, TWO_HEAP_INITIAL_SIZE);
- Comparable<Object>[] fourheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, minsize - TWO_HEAP_MAX_SIZE);
- this.twoheap = twoheap;
- this.fourheap = fourheap;
- }
- this.size = 0;
- this.modCount = 0;
+ this.twoheap = twoheap;
}
@Override
public void clear() {
size = 0;
- ++modCount;
- fourheap = null;
Arrays.fill(twoheap, null);
}
@@ -161,29 +97,14 @@ public class ComparableMinHeap<K extends Comparable<? super K>> implements Objec
public void add(K o) {
final Comparable<Object> co = (Comparable<Object>)o;
// System.err.println("Add: " + o);
- if (size < TWO_HEAP_MAX_SIZE) {
- if (size >= twoheap.length) {
- // Grow by one layer.
- twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1);
- }
- final int twopos = size;
- twoheap[twopos] = co;
- ++size;
- heapifyUp2(twopos, co);
- ++modCount;
- } else {
- final int fourpos = size - TWO_HEAP_MAX_SIZE;
- if (fourheap == null) {
- fourheap = (Comparable<Object>[]) java.lang.reflect.Array.newInstance(Comparable.class, FOUR_HEAP_INITIAL_SIZE);
- } else if (fourpos >= fourheap.length) {
- // Grow extension heap by half.
- fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1));
- }
- fourheap[fourpos] = co;
- ++size;
- heapifyUp4(fourpos, co);
- ++modCount;
+ if (size >= twoheap.length) {
+ // Grow by one layer.
+ twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1);
}
+ final int twopos = size;
+ twoheap[twopos] = co;
+ ++size;
+ heapifyUp(twopos, co);
}
@Override
@@ -200,7 +121,6 @@ public class ComparableMinHeap<K extends Comparable<? super K>> implements Objec
public K replaceTopElement(K reinsert) {
final Comparable<Object> ret = twoheap[0];
heapifyDown((Comparable<Object>) reinsert);
- ++modCount;
return (K)ret;
}
@@ -210,7 +130,7 @@ public class ComparableMinHeap<K extends Comparable<? super K>> implements Objec
* @param twopos Position in 2-ary heap.
* @param cur Current object
*/
- private void heapifyUp2(int twopos, Comparable<Object> cur) {
+ private void heapifyUp(int twopos, Comparable<Object> cur) {
while (twopos > 0) {
final int parent = (twopos - 1) >>> 1;
Comparable<Object> par = twoheap[parent];
@@ -223,81 +143,30 @@ public class ComparableMinHeap<K extends Comparable<? super K>> implements Objec
twoheap[twopos] = cur;
}
- /**
- * Heapify-Up method for 4-ary heap.
- *
- * @param fourpos Position in 4-ary heap.
- * @param cur Current object
- */
- private void heapifyUp4(int fourpos, Comparable<Object> cur) {
- while (fourpos > 0) {
- final int parent = (fourpos - 1) >> 2;
- Comparable<Object> par = fourheap[parent];
- if (cur.compareTo(par) >= 0) {
- break;
- }
- fourheap[fourpos] = par;
- fourpos = parent;
- }
- if (fourpos == 0 && twoheap[0].compareTo(cur) > 0) {
- fourheap[0] = twoheap[0];
- twoheap[0] = cur;
- } else {
- fourheap[fourpos] = cur;
- }
- }
-
@Override
@SuppressWarnings("unchecked")
public K poll() {
final Comparable<Object> ret = twoheap[0];
--size;
// Replacement object:
- if (size >= TWO_HEAP_MAX_SIZE) {
- final int last = size - TWO_HEAP_MAX_SIZE;
- final Comparable<Object> reinsert = fourheap[last];
- fourheap[last] = null;
- heapifyDown(reinsert);
- } else if (size > 0) {
+ if (size > 0) {
final Comparable<Object> reinsert = twoheap[size];
twoheap[size] = null;
heapifyDown(reinsert);
} else {
twoheap[0] = null;
}
- ++modCount;
return (K)ret;
}
/**
* Invoke heapify-down for the root object.
*
- * @param reinsert Object to insert.
- */
- private void heapifyDown(Comparable<Object> reinsert) {
- if (size > TWO_HEAP_MAX_SIZE) {
- // Special case: 3-ary situation.
- final int best = (twoheap[1].compareTo(twoheap[2]) <= 0) ? 1 : 2;
- if (fourheap[0].compareTo(twoheap[best]) < 0) {
- twoheap[0] = fourheap[0];
- heapifyDown4(0, reinsert);
- } else {
- twoheap[0] = twoheap[best];
- heapifyDown2(best, reinsert);
- }
- return;
- }
- heapifyDown2(0, reinsert);
- }
-
- /**
- * Heapify-Down for 2-ary heap.
- *
- * @param twopos Position in 2-ary heap.
- * @param cur Current object
+ * @param cur Object to insert.
*/
- private void heapifyDown2(int twopos, Comparable<Object> cur) {
- final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1;
+ private void heapifyDown(Comparable<Object> cur) {
+ final int stop = size >>> 1;
+ int twopos = 0;
while (twopos < stop) {
int bestchild = (twopos << 1) + 1;
Comparable<Object> best = twoheap[bestchild];
@@ -315,51 +184,6 @@ public class ComparableMinHeap<K extends Comparable<? super K>> implements Objec
twoheap[twopos] = cur;
}
- /**
- * Heapify-Down for 4-ary heap.
- *
- * @param fourpos Position in 4-ary heap.
- * @param cur Current object
- */
- private void heapifyDown4(int fourpos, Comparable<Object> cur) {
- final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2;
- while (fourpos < stop) {
- final int child = (fourpos << 2) + 1;
- Comparable<Object> best = fourheap[child];
- int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE;
- if (size > minsize) {
- Comparable<Object> nextchild = fourheap[candidate];
- if (best.compareTo(nextchild) > 0) {
- bestchild = candidate;
- best = nextchild;
- }
-
- minsize += 2;
- if (size >= minsize) {
- nextchild = fourheap[++candidate];
- if (best.compareTo(nextchild) > 0) {
- bestchild = candidate;
- best = nextchild;
- }
-
- if (size > minsize) {
- nextchild = fourheap[++candidate];
- if (best.compareTo(nextchild) > 0) {
- bestchild = candidate;
- best = nextchild;
- }
- }
- }
- }
- if (cur.compareTo(best) <= 0) {
- break;
- }
- fourheap[fourpos] = best;
- fourpos = bestchild;
- }
- fourheap[fourpos] = cur;
- }
-
@Override
@SuppressWarnings("unchecked")
public K peek() {
@@ -403,16 +227,8 @@ public class ComparableMinHeap<K extends Comparable<? super K>> implements Objec
*/
protected int pos = 0;
- /**
- * Modification counter we were initialized at.
- */
- protected final int myModCount = modCount;
-
@Override
public boolean valid() {
- if (modCount != myModCount) {
- throw new ConcurrentModificationException();
- }
return pos < size;
}
@@ -425,7 +241,7 @@ public class ComparableMinHeap<K extends Comparable<? super K>> implements Objec
@Override
public K get() {
- return (K)((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]);
+ return (K)twoheap[pos];
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparatorMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparatorMaxHeap.java
index 7b660d31..e5887c73 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparatorMaxHeap.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparatorMaxHeap.java
@@ -24,32 +24,11 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
*/
import java.util.Arrays;
-import java.util.ConcurrentModificationException;
import de.lmu.ifi.dbs.elki.math.MathUtil;
/**
- * Advanced priority queue class, based on a binary heap (for small sizes),
- * which will for larger heaps be accompanied by a 4-ary heap (attached below
- * the root of the two-ary heap, making the root actually 3-ary).
- *
- * This code was automatically instantiated for the type: Comparator
- *
- * This combination was found to work quite well in benchmarks, but YMMV.
- *
- * Some other observations from benchmarking:
- * <ul>
- * <li>Bulk loading did not improve things</li>
- * <li>Primitive heaps are substantially faster.</li>
- * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and
- * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li>
- * <li>Workload makes a huge difference. A load-once, poll-until-empty priority
- * queue is something different than e.g. a top-k heap, which will see a lot of
- * top element replacements.</li>
- * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for
- * top-k make a difference.</li>
- * <li>Different day, different benchmark results ...</li>
- * </ul>
+ * Binary heap for primitive types.
*
* @author Erich Schubert
*
@@ -63,43 +42,15 @@ public class ComparatorMaxHeap<K> implements ObjectHeap<K> {
protected Object[] twoheap;
/**
- * Extension heap.
- */
- protected Object[] fourheap;
-
- /**
* Current size of heap.
*/
protected int size;
/**
- * (Structural) modification counter. Used to invalidate iterators.
- */
- protected int modCount = 0;
-
- /**
- * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements.
- */
- private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1;
-
- /**
* Initial size of the 2-ary heap.
*/
private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1;
- /**
- * Initial size of 4-ary heap when initialized.
- *
- * 21 = 4-ary heap of height 2: 1 + 4 + 4*4
- *
- * 85 = 4-ary heap of height 3: 21 + 4*4*4
- *
- * 341 = 4-ary heap of height 4: 85 + 4*4*4*4
- *
- * Since we last grew by 255 (to 511), let's use 341.
- */
- private final static int FOUR_HEAP_INITIAL_SIZE = 341;
-
/**
* Comparator
@@ -117,9 +68,6 @@ public class ComparatorMaxHeap<K> implements ObjectHeap<K> {
Object[] twoheap = new Object[TWO_HEAP_INITIAL_SIZE];
this.twoheap = twoheap;
- this.fourheap = null;
- this.size = 0;
- this.modCount = 0;
}
/**
@@ -132,27 +80,15 @@ public class ComparatorMaxHeap<K> implements ObjectHeap<K> {
public ComparatorMaxHeap(int minsize, java.util.Comparator<? super K> comparator) {
super();
this.comparator = (java.util.Comparator<Object>) java.util.Comparator.class.cast(comparator);
- if (minsize < TWO_HEAP_MAX_SIZE) {
- final int size = MathUtil.nextPow2Int(minsize + 1) - 1;
- Object[] twoheap = new Object[size];
+ final int size = MathUtil.nextPow2Int(minsize + 1) - 1;
+ Object[] twoheap = new Object[size];
- this.twoheap = twoheap;
- this.fourheap = null;
- } else {
- Object[] twoheap = new Object[TWO_HEAP_INITIAL_SIZE];
- Object[] fourheap = new Object[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)];
- this.twoheap = twoheap;
- this.fourheap = fourheap;
- }
- this.size = 0;
- this.modCount = 0;
+ this.twoheap = twoheap;
}
@Override
public void clear() {
size = 0;
- ++modCount;
- fourheap = null;
Arrays.fill(twoheap, null);
}
@@ -170,29 +106,14 @@ public class ComparatorMaxHeap<K> implements ObjectHeap<K> {
public void add(K o) {
final Object co = o;
// System.err.println("Add: " + o);
- if (size < TWO_HEAP_MAX_SIZE) {
- if (size >= twoheap.length) {
- // Grow by one layer.
- twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1);
- }
- final int twopos = size;
- twoheap[twopos] = co;
- ++size;
- heapifyUp2(twopos, co);
- ++modCount;
- } else {
- final int fourpos = size - TWO_HEAP_MAX_SIZE;
- if (fourheap == null) {
- fourheap = new Object[FOUR_HEAP_INITIAL_SIZE];
- } else if (fourpos >= fourheap.length) {
- // Grow extension heap by half.
- fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1));
- }
- fourheap[fourpos] = co;
- ++size;
- heapifyUp4(fourpos, co);
- ++modCount;
+ if (size >= twoheap.length) {
+ // Grow by one layer.
+ twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1);
}
+ final int twopos = size;
+ twoheap[twopos] = co;
+ ++size;
+ heapifyUp(twopos, co);
}
@Override
@@ -209,7 +130,6 @@ public class ComparatorMaxHeap<K> implements ObjectHeap<K> {
public K replaceTopElement(K reinsert) {
final Object ret = twoheap[0];
heapifyDown( reinsert);
- ++modCount;
return (K)ret;
}
@@ -219,7 +139,7 @@ public class ComparatorMaxHeap<K> implements ObjectHeap<K> {
* @param twopos Position in 2-ary heap.
* @param cur Current object
*/
- private void heapifyUp2(int twopos, Object cur) {
+ private void heapifyUp(int twopos, Object cur) {
while (twopos > 0) {
final int parent = (twopos - 1) >>> 1;
Object par = twoheap[parent];
@@ -232,81 +152,30 @@ public class ComparatorMaxHeap<K> implements ObjectHeap<K> {
twoheap[twopos] = cur;
}
- /**
- * Heapify-Up method for 4-ary heap.
- *
- * @param fourpos Position in 4-ary heap.
- * @param cur Current object
- */
- private void heapifyUp4(int fourpos, Object cur) {
- while (fourpos > 0) {
- final int parent = (fourpos - 1) >> 2;
- Object par = fourheap[parent];
- if (comparator.compare(cur, par) <= 0) {
- break;
- }
- fourheap[fourpos] = par;
- fourpos = parent;
- }
- if (fourpos == 0 && comparator.compare(twoheap[0], cur) < 0) {
- fourheap[0] = twoheap[0];
- twoheap[0] = cur;
- } else {
- fourheap[fourpos] = cur;
- }
- }
-
@Override
@SuppressWarnings("unchecked")
public K poll() {
final Object ret = twoheap[0];
--size;
// Replacement object:
- if (size >= TWO_HEAP_MAX_SIZE) {
- final int last = size - TWO_HEAP_MAX_SIZE;
- final Object reinsert = fourheap[last];
- fourheap[last] = null;
- heapifyDown(reinsert);
- } else if (size > 0) {
+ if (size > 0) {
final Object reinsert = twoheap[size];
twoheap[size] = null;
heapifyDown(reinsert);
} else {
twoheap[0] = null;
}
- ++modCount;
return (K)ret;
}
/**
* Invoke heapify-down for the root object.
*
- * @param reinsert Object to insert.
- */
- private void heapifyDown(Object reinsert) {
- if (size > TWO_HEAP_MAX_SIZE) {
- // Special case: 3-ary situation.
- final int best = (comparator.compare(twoheap[1], twoheap[2]) >= 0) ? 1 : 2;
- if (comparator.compare(fourheap[0], twoheap[best]) > 0) {
- twoheap[0] = fourheap[0];
- heapifyDown4(0, reinsert);
- } else {
- twoheap[0] = twoheap[best];
- heapifyDown2(best, reinsert);
- }
- return;
- }
- heapifyDown2(0, reinsert);
- }
-
- /**
- * Heapify-Down for 2-ary heap.
- *
- * @param twopos Position in 2-ary heap.
- * @param cur Current object
+ * @param cur Object to insert.
*/
- private void heapifyDown2(int twopos, Object cur) {
- final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1;
+ private void heapifyDown(Object cur) {
+ final int stop = size >>> 1;
+ int twopos = 0;
while (twopos < stop) {
int bestchild = (twopos << 1) + 1;
Object best = twoheap[bestchild];
@@ -324,51 +193,6 @@ public class ComparatorMaxHeap<K> implements ObjectHeap<K> {
twoheap[twopos] = cur;
}
- /**
- * Heapify-Down for 4-ary heap.
- *
- * @param fourpos Position in 4-ary heap.
- * @param cur Current object
- */
- private void heapifyDown4(int fourpos, Object cur) {
- final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2;
- while (fourpos < stop) {
- final int child = (fourpos << 2) + 1;
- Object best = fourheap[child];
- int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE;
- if (size > minsize) {
- Object nextchild = fourheap[candidate];
- if (comparator.compare(best, nextchild) < 0) {
- bestchild = candidate;
- best = nextchild;
- }
-
- minsize += 2;
- if (size >= minsize) {
- nextchild = fourheap[++candidate];
- if (comparator.compare(best, nextchild) < 0) {
- bestchild = candidate;
- best = nextchild;
- }
-
- if (size > minsize) {
- nextchild = fourheap[++candidate];
- if (comparator.compare(best, nextchild) < 0) {
- bestchild = candidate;
- best = nextchild;
- }
- }
- }
- }
- if (comparator.compare(cur, best) >= 0) {
- break;
- }
- fourheap[fourpos] = best;
- fourpos = bestchild;
- }
- fourheap[fourpos] = cur;
- }
-
@Override
@SuppressWarnings("unchecked")
public K peek() {
@@ -412,16 +236,8 @@ public class ComparatorMaxHeap<K> implements ObjectHeap<K> {
*/
protected int pos = 0;
- /**
- * Modification counter we were initialized at.
- */
- protected final int myModCount = modCount;
-
@Override
public boolean valid() {
- if (modCount != myModCount) {
- throw new ConcurrentModificationException();
- }
return pos < size;
}
@@ -434,7 +250,7 @@ public class ComparatorMaxHeap<K> implements ObjectHeap<K> {
@Override
public K get() {
- return (K)((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]);
+ return (K)twoheap[pos];
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparatorMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparatorMinHeap.java
index e12c5f64..215a78b6 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparatorMinHeap.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ComparatorMinHeap.java
@@ -24,32 +24,11 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
*/
import java.util.Arrays;
-import java.util.ConcurrentModificationException;
import de.lmu.ifi.dbs.elki.math.MathUtil;
/**
- * Advanced priority queue class, based on a binary heap (for small sizes),
- * which will for larger heaps be accompanied by a 4-ary heap (attached below
- * the root of the two-ary heap, making the root actually 3-ary).
- *
- * This code was automatically instantiated for the type: Comparator
- *
- * This combination was found to work quite well in benchmarks, but YMMV.
- *
- * Some other observations from benchmarking:
- * <ul>
- * <li>Bulk loading did not improve things</li>
- * <li>Primitive heaps are substantially faster.</li>
- * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and
- * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li>
- * <li>Workload makes a huge difference. A load-once, poll-until-empty priority
- * queue is something different than e.g. a top-k heap, which will see a lot of
- * top element replacements.</li>
- * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for
- * top-k make a difference.</li>
- * <li>Different day, different benchmark results ...</li>
- * </ul>
+ * Binary heap for primitive types.
*
* @author Erich Schubert
*
@@ -63,43 +42,15 @@ public class ComparatorMinHeap<K> implements ObjectHeap<K> {
protected Object[] twoheap;
/**
- * Extension heap.
- */
- protected Object[] fourheap;
-
- /**
* Current size of heap.
*/
protected int size;
/**
- * (Structural) modification counter. Used to invalidate iterators.
- */
- protected int modCount = 0;
-
- /**
- * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements.
- */
- private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1;
-
- /**
* Initial size of the 2-ary heap.
*/
private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1;
- /**
- * Initial size of 4-ary heap when initialized.
- *
- * 21 = 4-ary heap of height 2: 1 + 4 + 4*4
- *
- * 85 = 4-ary heap of height 3: 21 + 4*4*4
- *
- * 341 = 4-ary heap of height 4: 85 + 4*4*4*4
- *
- * Since we last grew by 255 (to 511), let's use 341.
- */
- private final static int FOUR_HEAP_INITIAL_SIZE = 341;
-
/**
* Comparator
@@ -117,9 +68,6 @@ public class ComparatorMinHeap<K> implements ObjectHeap<K> {
Object[] twoheap = new Object[TWO_HEAP_INITIAL_SIZE];
this.twoheap = twoheap;
- this.fourheap = null;
- this.size = 0;
- this.modCount = 0;
}
/**
@@ -132,27 +80,15 @@ public class ComparatorMinHeap<K> implements ObjectHeap<K> {
public ComparatorMinHeap(int minsize, java.util.Comparator<? super K> comparator) {
super();
this.comparator = (java.util.Comparator<Object>) java.util.Comparator.class.cast(comparator);
- if (minsize < TWO_HEAP_MAX_SIZE) {
- final int size = MathUtil.nextPow2Int(minsize + 1) - 1;
- Object[] twoheap = new Object[size];
+ final int size = MathUtil.nextPow2Int(minsize + 1) - 1;
+ Object[] twoheap = new Object[size];
- this.twoheap = twoheap;
- this.fourheap = null;
- } else {
- Object[] twoheap = new Object[TWO_HEAP_INITIAL_SIZE];
- Object[] fourheap = new Object[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)];
- this.twoheap = twoheap;
- this.fourheap = fourheap;
- }
- this.size = 0;
- this.modCount = 0;
+ this.twoheap = twoheap;
}
@Override
public void clear() {
size = 0;
- ++modCount;
- fourheap = null;
Arrays.fill(twoheap, null);
}
@@ -170,29 +106,14 @@ public class ComparatorMinHeap<K> implements ObjectHeap<K> {
public void add(K o) {
final Object co = o;
// System.err.println("Add: " + o);
- if (size < TWO_HEAP_MAX_SIZE) {
- if (size >= twoheap.length) {
- // Grow by one layer.
- twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1);
- }
- final int twopos = size;
- twoheap[twopos] = co;
- ++size;
- heapifyUp2(twopos, co);
- ++modCount;
- } else {
- final int fourpos = size - TWO_HEAP_MAX_SIZE;
- if (fourheap == null) {
- fourheap = new Object[FOUR_HEAP_INITIAL_SIZE];
- } else if (fourpos >= fourheap.length) {
- // Grow extension heap by half.
- fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1));
- }
- fourheap[fourpos] = co;
- ++size;
- heapifyUp4(fourpos, co);
- ++modCount;
+ if (size >= twoheap.length) {
+ // Grow by one layer.
+ twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1);
}
+ final int twopos = size;
+ twoheap[twopos] = co;
+ ++size;
+ heapifyUp(twopos, co);
}
@Override
@@ -209,7 +130,6 @@ public class ComparatorMinHeap<K> implements ObjectHeap<K> {
public K replaceTopElement(K reinsert) {
final Object ret = twoheap[0];
heapifyDown( reinsert);
- ++modCount;
return (K)ret;
}
@@ -219,7 +139,7 @@ public class ComparatorMinHeap<K> implements ObjectHeap<K> {
* @param twopos Position in 2-ary heap.
* @param cur Current object
*/
- private void heapifyUp2(int twopos, Object cur) {
+ private void heapifyUp(int twopos, Object cur) {
while (twopos > 0) {
final int parent = (twopos - 1) >>> 1;
Object par = twoheap[parent];
@@ -232,81 +152,30 @@ public class ComparatorMinHeap<K> implements ObjectHeap<K> {
twoheap[twopos] = cur;
}
- /**
- * Heapify-Up method for 4-ary heap.
- *
- * @param fourpos Position in 4-ary heap.
- * @param cur Current object
- */
- private void heapifyUp4(int fourpos, Object cur) {
- while (fourpos > 0) {
- final int parent = (fourpos - 1) >> 2;
- Object par = fourheap[parent];
- if (comparator.compare(cur, par) >= 0) {
- break;
- }
- fourheap[fourpos] = par;
- fourpos = parent;
- }
- if (fourpos == 0 && comparator.compare(twoheap[0], cur) > 0) {
- fourheap[0] = twoheap[0];
- twoheap[0] = cur;
- } else {
- fourheap[fourpos] = cur;
- }
- }
-
@Override
@SuppressWarnings("unchecked")
public K poll() {
final Object ret = twoheap[0];
--size;
// Replacement object:
- if (size >= TWO_HEAP_MAX_SIZE) {
- final int last = size - TWO_HEAP_MAX_SIZE;
- final Object reinsert = fourheap[last];
- fourheap[last] = null;
- heapifyDown(reinsert);
- } else if (size > 0) {
+ if (size > 0) {
final Object reinsert = twoheap[size];
twoheap[size] = null;
heapifyDown(reinsert);
} else {
twoheap[0] = null;
}
- ++modCount;
return (K)ret;
}
/**
* Invoke heapify-down for the root object.
*
- * @param reinsert Object to insert.
- */
- private void heapifyDown(Object reinsert) {
- if (size > TWO_HEAP_MAX_SIZE) {
- // Special case: 3-ary situation.
- final int best = (comparator.compare(twoheap[1], twoheap[2]) <= 0) ? 1 : 2;
- if (comparator.compare(fourheap[0], twoheap[best]) < 0) {
- twoheap[0] = fourheap[0];
- heapifyDown4(0, reinsert);
- } else {
- twoheap[0] = twoheap[best];
- heapifyDown2(best, reinsert);
- }
- return;
- }
- heapifyDown2(0, reinsert);
- }
-
- /**
- * Heapify-Down for 2-ary heap.
- *
- * @param twopos Position in 2-ary heap.
- * @param cur Current object
+ * @param cur Object to insert.
*/
- private void heapifyDown2(int twopos, Object cur) {
- final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1;
+ private void heapifyDown(Object cur) {
+ final int stop = size >>> 1;
+ int twopos = 0;
while (twopos < stop) {
int bestchild = (twopos << 1) + 1;
Object best = twoheap[bestchild];
@@ -324,51 +193,6 @@ public class ComparatorMinHeap<K> implements ObjectHeap<K> {
twoheap[twopos] = cur;
}
- /**
- * Heapify-Down for 4-ary heap.
- *
- * @param fourpos Position in 4-ary heap.
- * @param cur Current object
- */
- private void heapifyDown4(int fourpos, Object cur) {
- final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2;
- while (fourpos < stop) {
- final int child = (fourpos << 2) + 1;
- Object best = fourheap[child];
- int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE;
- if (size > minsize) {
- Object nextchild = fourheap[candidate];
- if (comparator.compare(best, nextchild) > 0) {
- bestchild = candidate;
- best = nextchild;
- }
-
- minsize += 2;
- if (size >= minsize) {
- nextchild = fourheap[++candidate];
- if (comparator.compare(best, nextchild) > 0) {
- bestchild = candidate;
- best = nextchild;
- }
-
- if (size > minsize) {
- nextchild = fourheap[++candidate];
- if (comparator.compare(best, nextchild) > 0) {
- bestchild = candidate;
- best = nextchild;
- }
- }
- }
- }
- if (comparator.compare(cur, best) <= 0) {
- break;
- }
- fourheap[fourpos] = best;
- fourpos = bestchild;
- }
- fourheap[fourpos] = cur;
- }
-
@Override
@SuppressWarnings("unchecked")
public K peek() {
@@ -412,16 +236,8 @@ public class ComparatorMinHeap<K> implements ObjectHeap<K> {
*/
protected int pos = 0;
- /**
- * Modification counter we were initialized at.
- */
- protected final int myModCount = modCount;
-
@Override
public boolean valid() {
- if (modCount != myModCount) {
- throw new ConcurrentModificationException();
- }
return pos < size;
}
@@ -434,7 +250,7 @@ public class ComparatorMinHeap<K> implements ObjectHeap<K> {
@Override
public K get() {
- return (K)((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]);
+ return (K)twoheap[pos];
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleHeap.java
index acf77d86..c82f2a4a 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleHeap.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleHeap.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerHeap.java
index c3bf85f4..5d8d31f7 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerHeap.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerHeap.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerMaxHeap.java
index 34f1e889..e903c23d 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerMaxHeap.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerMaxHeap.java
@@ -24,32 +24,11 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
*/
import java.util.Arrays;
-import java.util.ConcurrentModificationException;
import de.lmu.ifi.dbs.elki.math.MathUtil;
/**
- * Advanced priority queue class, based on a binary heap (for small sizes),
- * which will for larger heaps be accompanied by a 4-ary heap (attached below
- * the root of the two-ary heap, making the root actually 3-ary).
- *
- * This code was automatically instantiated for the types: Double and Integer
- *
- * This combination was found to work quite well in benchmarks, but YMMV.
- *
- * Some other observations from benchmarking:
- * <ul>
- * <li>Bulk loading did not improve things</li>
- * <li>Primitive heaps are substantially faster.</li>
- * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and
- * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li>
- * <li>Workload makes a huge difference. A load-once, poll-until-empty priority
- * queue is something different than e.g. a top-k heap, which will see a lot of
- * top element replacements.</li>
- * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for
- * top-k make a difference.</li>
- * <li>Different day, different benchmark results ...</li>
- * </ul>
+ * Binary heap for primitive types.
*
* @author Erich Schubert
*
@@ -67,49 +46,16 @@ public class DoubleIntegerMaxHeap implements DoubleIntegerHeap {
protected int[] twovals;
/**
- * Extension heap.
- */
- protected double[] fourheap;
-
- /**
- * Extension heapvalues.
- */
- protected int[] fourvals;
-
- /**
* Current size of heap.
*/
protected int size;
/**
- * (Structural) modification counter. Used to invalidate iterators.
- */
- protected int modCount = 0;
-
- /**
- * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements.
- */
- private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1;
-
- /**
* Initial size of the 2-ary heap.
*/
private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1;
/**
- * Initial size of 4-ary heap when initialized.
- *
- * 21 = 4-ary heap of height 2: 1 + 4 + 4*4
- *
- * 85 = 4-ary heap of height 3: 21 + 4*4*4
- *
- * 341 = 4-ary heap of height 4: 85 + 4*4*4*4
- *
- * Since we last grew by 255 (to 511), let's use 341.
- */
- private final static int FOUR_HEAP_INITIAL_SIZE = 341;
-
- /**
* Constructor, with default size.
*/
public DoubleIntegerMaxHeap() {
@@ -119,10 +65,6 @@ public class DoubleIntegerMaxHeap implements DoubleIntegerHeap {
this.twoheap = twoheap;
this.twovals = twovals;
- this.fourheap = null;
- this.fourvals = null;
- this.size = 0;
- this.modCount = 0;
}
/**
@@ -132,35 +74,17 @@ public class DoubleIntegerMaxHeap implements DoubleIntegerHeap {
*/
public DoubleIntegerMaxHeap(int minsize) {
super();
- if (minsize < TWO_HEAP_MAX_SIZE) {
- final int size = MathUtil.nextPow2Int(minsize + 1) - 1;
- double[] twoheap = new double[size];
- int[] twovals = new int[size];
+ final int size = MathUtil.nextPow2Int(minsize + 1) - 1;
+ double[] twoheap = new double[size];
+ int[] twovals = new int[size];
- this.twoheap = twoheap;
- this.twovals = twovals;
- this.fourheap = null;
- this.fourvals = null;
- } else {
- double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE];
- int[] twovals = new int[TWO_HEAP_INITIAL_SIZE];
- double[] fourheap = new double[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)];
- int[] fourvals = new int[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)];
- this.twoheap = twoheap;
- this.twovals = twovals;
- this.fourheap = fourheap;
- this.fourvals = fourvals;
- }
- this.size = 0;
- this.modCount = 0;
+ this.twoheap = twoheap;
+ this.twovals = twovals;
}
@Override
public void clear() {
size = 0;
- ++modCount;
- fourheap = null;
- fourvals = null;
Arrays.fill(twoheap, 0.0);
Arrays.fill(twovals, 0);
}
@@ -180,34 +104,16 @@ public class DoubleIntegerMaxHeap implements DoubleIntegerHeap {
final double co = o;
final int cv = v;
// System.err.println("Add: " + o);
- if (size < TWO_HEAP_MAX_SIZE) {
- if (size >= twoheap.length) {
- // Grow by one layer.
- twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1);
- twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1);
- }
- final int twopos = size;
- twoheap[twopos] = co;
- twovals[twopos] = cv;
- ++size;
- heapifyUp2(twopos, co, cv);
- ++modCount;
- } else {
- final int fourpos = size - TWO_HEAP_MAX_SIZE;
- if (fourheap == null) {
- fourheap = new double[FOUR_HEAP_INITIAL_SIZE];
- fourvals = new int[FOUR_HEAP_INITIAL_SIZE];
- } else if (fourpos >= fourheap.length) {
- // Grow extension heap by half.
- fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1));
- fourvals = Arrays.copyOf(fourvals, fourvals.length + (fourvals.length >> 1));
- }
- fourheap[fourpos] = co;
- fourvals[fourpos] = cv;
- ++size;
- heapifyUp4(fourpos, co, cv);
- ++modCount;
+ if (size >= twoheap.length) {
+ // Grow by one layer.
+ twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1);
+ twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1);
}
+ final int twopos = size;
+ twoheap[twopos] = co;
+ twovals[twopos] = cv;
+ ++size;
+ heapifyUp(twopos, co, cv);
}
@Override
@@ -222,7 +128,6 @@ public class DoubleIntegerMaxHeap implements DoubleIntegerHeap {
@Override
public void replaceTopElement(double reinsert, int val) {
heapifyDown(reinsert, val);
- ++modCount;
}
/**
@@ -232,7 +137,7 @@ public class DoubleIntegerMaxHeap implements DoubleIntegerHeap {
* @param cur Current object
* @param val Current value
*/
- private void heapifyUp2(int twopos, double cur, int val) {
+ private void heapifyUp(int twopos, double cur, int val) {
while (twopos > 0) {
final int parent = (twopos - 1) >>> 1;
double par = twoheap[parent];
@@ -247,47 +152,11 @@ public class DoubleIntegerMaxHeap implements DoubleIntegerHeap {
twovals[twopos] = val;
}
- /**
- * Heapify-Up method for 4-ary heap.
- *
- * @param fourpos Position in 4-ary heap.
- * @param cur Current object
- * @param val Current value
- */
- private void heapifyUp4(int fourpos, double cur, int val) {
- while (fourpos > 0) {
- final int parent = (fourpos - 1) >> 2;
- double par = fourheap[parent];
- if (cur <= par) {
- break;
- }
- fourheap[fourpos] = par;
- fourvals[fourpos] = fourvals[parent];
- fourpos = parent;
- }
- if (fourpos == 0 && twoheap[0] < cur) {
- fourheap[0] = twoheap[0];
- fourvals[0] = twovals[0];
- twoheap[0] = cur;
- twovals[0] = val;
- } else {
- fourheap[fourpos] = cur;
- fourvals[fourpos] = val;
- }
- }
-
@Override
public void poll() {
--size;
// Replacement object:
- if (size >= TWO_HEAP_MAX_SIZE) {
- final int last = size - TWO_HEAP_MAX_SIZE;
- final double reinsert = fourheap[last];
- final int reinsertv = fourvals[last];
- fourheap[last] = 0.0;
- fourvals[last] = 0;
- heapifyDown(reinsert, reinsertv);
- } else if (size > 0) {
+ if (size > 0) {
final double reinsert = twoheap[size];
final int reinsertv = twovals[size];
twoheap[size] = 0.0;
@@ -297,42 +166,17 @@ public class DoubleIntegerMaxHeap implements DoubleIntegerHeap {
twoheap[0] = 0.0;
twovals[0] = 0;
}
- ++modCount;
}
/**
* Invoke heapify-down for the root object.
*
- * @param reinsert Object to insert.
- * @param val Value to reinsert.
- */
- private void heapifyDown(double reinsert, int val) {
- if (size > TWO_HEAP_MAX_SIZE) {
- // Special case: 3-ary situation.
- final int best = (twoheap[1] >= twoheap[2]) ? 1 : 2;
- if (fourheap[0] > twoheap[best]) {
- twoheap[0] = fourheap[0];
- twovals[0] = fourvals[0];
- heapifyDown4(0, reinsert, val);
- } else {
- twoheap[0] = twoheap[best];
- twovals[0] = twovals[best];
- heapifyDown2(best, reinsert, val);
- }
- return;
- }
- heapifyDown2(0, reinsert, val);
- }
-
- /**
- * Heapify-Down for 2-ary heap.
- *
- * @param twopos Position in 2-ary heap.
- * @param cur Current object
+ * @param cur Object to insert.
* @param val Value to reinsert.
*/
- private void heapifyDown2(int twopos, double cur, int val) {
- final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1;
+ private void heapifyDown(double cur, int val) {
+ final int stop = size >>> 1;
+ int twopos = 0;
while (twopos < stop) {
int bestchild = (twopos << 1) + 1;
double best = twoheap[bestchild];
@@ -352,54 +196,6 @@ public class DoubleIntegerMaxHeap implements DoubleIntegerHeap {
twovals[twopos] = val;
}
- /**
- * Heapify-Down for 4-ary heap.
- *
- * @param fourpos Position in 4-ary heap.
- * @param cur Current object
- * @param val Value to reinsert.
- */
- private void heapifyDown4(int fourpos, double cur, int val) {
- final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2;
- while (fourpos < stop) {
- final int child = (fourpos << 2) + 1;
- double best = fourheap[child];
- int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE;
- if (size > minsize) {
- double nextchild = fourheap[candidate];
- if (best < nextchild) {
- bestchild = candidate;
- best = nextchild;
- }
-
- minsize += 2;
- if (size >= minsize) {
- nextchild = fourheap[++candidate];
- if (best < nextchild) {
- bestchild = candidate;
- best = nextchild;
- }
-
- if (size > minsize) {
- nextchild = fourheap[++candidate];
- if (best < nextchild) {
- bestchild = candidate;
- best = nextchild;
- }
- }
- }
- }
- if (cur >= best) {
- break;
- }
- fourheap[fourpos] = best;
- fourvals[fourpos] = fourvals[bestchild];
- fourpos = bestchild;
- }
- fourheap[fourpos] = cur;
- fourvals[fourpos] = val;
- }
-
@Override
public double peekKey() {
return twoheap[0];
@@ -447,16 +243,8 @@ public class DoubleIntegerMaxHeap implements DoubleIntegerHeap {
*/
protected int pos = 0;
- /**
- * Modification counter we were initialized at.
- */
- protected final int myModCount = modCount;
-
@Override
public boolean valid() {
- if (modCount != myModCount) {
- throw new ConcurrentModificationException();
- }
return pos < size;
}
@@ -467,12 +255,12 @@ public class DoubleIntegerMaxHeap implements DoubleIntegerHeap {
@Override
public double getKey() {
- return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]);
+ return twoheap[pos];
}
@Override
public int getValue() {
- return ((pos < TWO_HEAP_MAX_SIZE) ? twovals[pos] : fourvals[pos - TWO_HEAP_MAX_SIZE]);
+ return twovals[pos];
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerMinHeap.java
index ca6192ad..0e4e2204 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerMinHeap.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleIntegerMinHeap.java
@@ -24,32 +24,11 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
*/
import java.util.Arrays;
-import java.util.ConcurrentModificationException;
import de.lmu.ifi.dbs.elki.math.MathUtil;
/**
- * Advanced priority queue class, based on a binary heap (for small sizes),
- * which will for larger heaps be accompanied by a 4-ary heap (attached below
- * the root of the two-ary heap, making the root actually 3-ary).
- *
- * This code was automatically instantiated for the types: Double and Integer
- *
- * This combination was found to work quite well in benchmarks, but YMMV.
- *
- * Some other observations from benchmarking:
- * <ul>
- * <li>Bulk loading did not improve things</li>
- * <li>Primitive heaps are substantially faster.</li>
- * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and
- * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li>
- * <li>Workload makes a huge difference. A load-once, poll-until-empty priority
- * queue is something different than e.g. a top-k heap, which will see a lot of
- * top element replacements.</li>
- * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for
- * top-k make a difference.</li>
- * <li>Different day, different benchmark results ...</li>
- * </ul>
+ * Binary heap for primitive types.
*
* @author Erich Schubert
*
@@ -67,49 +46,16 @@ public class DoubleIntegerMinHeap implements DoubleIntegerHeap {
protected int[] twovals;
/**
- * Extension heap.
- */
- protected double[] fourheap;
-
- /**
- * Extension heapvalues.
- */
- protected int[] fourvals;
-
- /**
* Current size of heap.
*/
protected int size;
/**
- * (Structural) modification counter. Used to invalidate iterators.
- */
- protected int modCount = 0;
-
- /**
- * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements.
- */
- private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1;
-
- /**
* Initial size of the 2-ary heap.
*/
private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1;
/**
- * Initial size of 4-ary heap when initialized.
- *
- * 21 = 4-ary heap of height 2: 1 + 4 + 4*4
- *
- * 85 = 4-ary heap of height 3: 21 + 4*4*4
- *
- * 341 = 4-ary heap of height 4: 85 + 4*4*4*4
- *
- * Since we last grew by 255 (to 511), let's use 341.
- */
- private final static int FOUR_HEAP_INITIAL_SIZE = 341;
-
- /**
* Constructor, with default size.
*/
public DoubleIntegerMinHeap() {
@@ -119,10 +65,6 @@ public class DoubleIntegerMinHeap implements DoubleIntegerHeap {
this.twoheap = twoheap;
this.twovals = twovals;
- this.fourheap = null;
- this.fourvals = null;
- this.size = 0;
- this.modCount = 0;
}
/**
@@ -132,35 +74,17 @@ public class DoubleIntegerMinHeap implements DoubleIntegerHeap {
*/
public DoubleIntegerMinHeap(int minsize) {
super();
- if (minsize < TWO_HEAP_MAX_SIZE) {
- final int size = MathUtil.nextPow2Int(minsize + 1) - 1;
- double[] twoheap = new double[size];
- int[] twovals = new int[size];
+ final int size = MathUtil.nextPow2Int(minsize + 1) - 1;
+ double[] twoheap = new double[size];
+ int[] twovals = new int[size];
- this.twoheap = twoheap;
- this.twovals = twovals;
- this.fourheap = null;
- this.fourvals = null;
- } else {
- double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE];
- int[] twovals = new int[TWO_HEAP_INITIAL_SIZE];
- double[] fourheap = new double[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)];
- int[] fourvals = new int[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)];
- this.twoheap = twoheap;
- this.twovals = twovals;
- this.fourheap = fourheap;
- this.fourvals = fourvals;
- }
- this.size = 0;
- this.modCount = 0;
+ this.twoheap = twoheap;
+ this.twovals = twovals;
}
@Override
public void clear() {
size = 0;
- ++modCount;
- fourheap = null;
- fourvals = null;
Arrays.fill(twoheap, 0.0);
Arrays.fill(twovals, 0);
}
@@ -180,34 +104,16 @@ public class DoubleIntegerMinHeap implements DoubleIntegerHeap {
final double co = o;
final int cv = v;
// System.err.println("Add: " + o);
- if (size < TWO_HEAP_MAX_SIZE) {
- if (size >= twoheap.length) {
- // Grow by one layer.
- twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1);
- twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1);
- }
- final int twopos = size;
- twoheap[twopos] = co;
- twovals[twopos] = cv;
- ++size;
- heapifyUp2(twopos, co, cv);
- ++modCount;
- } else {
- final int fourpos = size - TWO_HEAP_MAX_SIZE;
- if (fourheap == null) {
- fourheap = new double[FOUR_HEAP_INITIAL_SIZE];
- fourvals = new int[FOUR_HEAP_INITIAL_SIZE];
- } else if (fourpos >= fourheap.length) {
- // Grow extension heap by half.
- fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1));
- fourvals = Arrays.copyOf(fourvals, fourvals.length + (fourvals.length >> 1));
- }
- fourheap[fourpos] = co;
- fourvals[fourpos] = cv;
- ++size;
- heapifyUp4(fourpos, co, cv);
- ++modCount;
+ if (size >= twoheap.length) {
+ // Grow by one layer.
+ twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1);
+ twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1);
}
+ final int twopos = size;
+ twoheap[twopos] = co;
+ twovals[twopos] = cv;
+ ++size;
+ heapifyUp(twopos, co, cv);
}
@Override
@@ -222,7 +128,6 @@ public class DoubleIntegerMinHeap implements DoubleIntegerHeap {
@Override
public void replaceTopElement(double reinsert, int val) {
heapifyDown(reinsert, val);
- ++modCount;
}
/**
@@ -232,7 +137,7 @@ public class DoubleIntegerMinHeap implements DoubleIntegerHeap {
* @param cur Current object
* @param val Current value
*/
- private void heapifyUp2(int twopos, double cur, int val) {
+ private void heapifyUp(int twopos, double cur, int val) {
while (twopos > 0) {
final int parent = (twopos - 1) >>> 1;
double par = twoheap[parent];
@@ -247,47 +152,11 @@ public class DoubleIntegerMinHeap implements DoubleIntegerHeap {
twovals[twopos] = val;
}
- /**
- * Heapify-Up method for 4-ary heap.
- *
- * @param fourpos Position in 4-ary heap.
- * @param cur Current object
- * @param val Current value
- */
- private void heapifyUp4(int fourpos, double cur, int val) {
- while (fourpos > 0) {
- final int parent = (fourpos - 1) >> 2;
- double par = fourheap[parent];
- if (cur >= par) {
- break;
- }
- fourheap[fourpos] = par;
- fourvals[fourpos] = fourvals[parent];
- fourpos = parent;
- }
- if (fourpos == 0 && twoheap[0] > cur) {
- fourheap[0] = twoheap[0];
- fourvals[0] = twovals[0];
- twoheap[0] = cur;
- twovals[0] = val;
- } else {
- fourheap[fourpos] = cur;
- fourvals[fourpos] = val;
- }
- }
-
@Override
public void poll() {
--size;
// Replacement object:
- if (size >= TWO_HEAP_MAX_SIZE) {
- final int last = size - TWO_HEAP_MAX_SIZE;
- final double reinsert = fourheap[last];
- final int reinsertv = fourvals[last];
- fourheap[last] = 0.0;
- fourvals[last] = 0;
- heapifyDown(reinsert, reinsertv);
- } else if (size > 0) {
+ if (size > 0) {
final double reinsert = twoheap[size];
final int reinsertv = twovals[size];
twoheap[size] = 0.0;
@@ -297,42 +166,17 @@ public class DoubleIntegerMinHeap implements DoubleIntegerHeap {
twoheap[0] = 0.0;
twovals[0] = 0;
}
- ++modCount;
}
/**
* Invoke heapify-down for the root object.
*
- * @param reinsert Object to insert.
- * @param val Value to reinsert.
- */
- private void heapifyDown(double reinsert, int val) {
- if (size > TWO_HEAP_MAX_SIZE) {
- // Special case: 3-ary situation.
- final int best = (twoheap[1] <= twoheap[2]) ? 1 : 2;
- if (fourheap[0] < twoheap[best]) {
- twoheap[0] = fourheap[0];
- twovals[0] = fourvals[0];
- heapifyDown4(0, reinsert, val);
- } else {
- twoheap[0] = twoheap[best];
- twovals[0] = twovals[best];
- heapifyDown2(best, reinsert, val);
- }
- return;
- }
- heapifyDown2(0, reinsert, val);
- }
-
- /**
- * Heapify-Down for 2-ary heap.
- *
- * @param twopos Position in 2-ary heap.
- * @param cur Current object
+ * @param cur Object to insert.
* @param val Value to reinsert.
*/
- private void heapifyDown2(int twopos, double cur, int val) {
- final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1;
+ private void heapifyDown(double cur, int val) {
+ final int stop = size >>> 1;
+ int twopos = 0;
while (twopos < stop) {
int bestchild = (twopos << 1) + 1;
double best = twoheap[bestchild];
@@ -352,54 +196,6 @@ public class DoubleIntegerMinHeap implements DoubleIntegerHeap {
twovals[twopos] = val;
}
- /**
- * Heapify-Down for 4-ary heap.
- *
- * @param fourpos Position in 4-ary heap.
- * @param cur Current object
- * @param val Value to reinsert.
- */
- private void heapifyDown4(int fourpos, double cur, int val) {
- final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2;
- while (fourpos < stop) {
- final int child = (fourpos << 2) + 1;
- double best = fourheap[child];
- int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE;
- if (size > minsize) {
- double nextchild = fourheap[candidate];
- if (best > nextchild) {
- bestchild = candidate;
- best = nextchild;
- }
-
- minsize += 2;
- if (size >= minsize) {
- nextchild = fourheap[++candidate];
- if (best > nextchild) {
- bestchild = candidate;
- best = nextchild;
- }
-
- if (size > minsize) {
- nextchild = fourheap[++candidate];
- if (best > nextchild) {
- bestchild = candidate;
- best = nextchild;
- }
- }
- }
- }
- if (cur <= best) {
- break;
- }
- fourheap[fourpos] = best;
- fourvals[fourpos] = fourvals[bestchild];
- fourpos = bestchild;
- }
- fourheap[fourpos] = cur;
- fourvals[fourpos] = val;
- }
-
@Override
public double peekKey() {
return twoheap[0];
@@ -447,16 +243,8 @@ public class DoubleIntegerMinHeap implements DoubleIntegerHeap {
*/
protected int pos = 0;
- /**
- * Modification counter we were initialized at.
- */
- protected final int myModCount = modCount;
-
@Override
public boolean valid() {
- if (modCount != myModCount) {
- throw new ConcurrentModificationException();
- }
return pos < size;
}
@@ -467,12 +255,12 @@ public class DoubleIntegerMinHeap implements DoubleIntegerHeap {
@Override
public double getKey() {
- return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]);
+ return twoheap[pos];
}
@Override
public int getValue() {
- return ((pos < TWO_HEAP_MAX_SIZE) ? twovals[pos] : fourvals[pos - TWO_HEAP_MAX_SIZE]);
+ return twovals[pos];
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleLongMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleLongMaxHeap.java
index 6d15656c..b7508a61 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleLongMaxHeap.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleLongMaxHeap.java
@@ -24,32 +24,11 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
*/
import java.util.Arrays;
-import java.util.ConcurrentModificationException;
import de.lmu.ifi.dbs.elki.math.MathUtil;
/**
- * Advanced priority queue class, based on a binary heap (for small sizes),
- * which will for larger heaps be accompanied by a 4-ary heap (attached below
- * the root of the two-ary heap, making the root actually 3-ary).
- *
- * This code was automatically instantiated for the types: Double and Long
- *
- * This combination was found to work quite well in benchmarks, but YMMV.
- *
- * Some other observations from benchmarking:
- * <ul>
- * <li>Bulk loading did not improve things</li>
- * <li>Primitive heaps are substantially faster.</li>
- * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and
- * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li>
- * <li>Workload makes a huge difference. A load-once, poll-until-empty priority
- * queue is something different than e.g. a top-k heap, which will see a lot of
- * top element replacements.</li>
- * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for
- * top-k make a difference.</li>
- * <li>Different day, different benchmark results ...</li>
- * </ul>
+ * Binary heap for primitive types.
*
* @author Erich Schubert
*
@@ -67,49 +46,16 @@ public class DoubleLongMaxHeap implements DoubleLongHeap {
protected long[] twovals;
/**
- * Extension heap.
- */
- protected double[] fourheap;
-
- /**
- * Extension heapvalues.
- */
- protected long[] fourvals;
-
- /**
* Current size of heap.
*/
protected int size;
/**
- * (Structural) modification counter. Used to invalidate iterators.
- */
- protected int modCount = 0;
-
- /**
- * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements.
- */
- private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1;
-
- /**
* Initial size of the 2-ary heap.
*/
private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1;
/**
- * Initial size of 4-ary heap when initialized.
- *
- * 21 = 4-ary heap of height 2: 1 + 4 + 4*4
- *
- * 85 = 4-ary heap of height 3: 21 + 4*4*4
- *
- * 341 = 4-ary heap of height 4: 85 + 4*4*4*4
- *
- * Since we last grew by 255 (to 511), let's use 341.
- */
- private final static int FOUR_HEAP_INITIAL_SIZE = 341;
-
- /**
* Constructor, with default size.
*/
public DoubleLongMaxHeap() {
@@ -119,10 +65,6 @@ public class DoubleLongMaxHeap implements DoubleLongHeap {
this.twoheap = twoheap;
this.twovals = twovals;
- this.fourheap = null;
- this.fourvals = null;
- this.size = 0;
- this.modCount = 0;
}
/**
@@ -132,35 +74,17 @@ public class DoubleLongMaxHeap implements DoubleLongHeap {
*/
public DoubleLongMaxHeap(int minsize) {
super();
- if (minsize < TWO_HEAP_MAX_SIZE) {
- final int size = MathUtil.nextPow2Int(minsize + 1) - 1;
- double[] twoheap = new double[size];
- long[] twovals = new long[size];
+ final int size = MathUtil.nextPow2Int(minsize + 1) - 1;
+ double[] twoheap = new double[size];
+ long[] twovals = new long[size];
- this.twoheap = twoheap;
- this.twovals = twovals;
- this.fourheap = null;
- this.fourvals = null;
- } else {
- double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE];
- long[] twovals = new long[TWO_HEAP_INITIAL_SIZE];
- double[] fourheap = new double[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)];
- long[] fourvals = new long[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)];
- this.twoheap = twoheap;
- this.twovals = twovals;
- this.fourheap = fourheap;
- this.fourvals = fourvals;
- }
- this.size = 0;
- this.modCount = 0;
+ this.twoheap = twoheap;
+ this.twovals = twovals;
}
@Override
public void clear() {
size = 0;
- ++modCount;
- fourheap = null;
- fourvals = null;
Arrays.fill(twoheap, 0.0);
Arrays.fill(twovals, 0);
}
@@ -180,34 +104,16 @@ public class DoubleLongMaxHeap implements DoubleLongHeap {
final double co = o;
final long cv = v;
// System.err.println("Add: " + o);
- if (size < TWO_HEAP_MAX_SIZE) {
- if (size >= twoheap.length) {
- // Grow by one layer.
- twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1);
- twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1);
- }
- final int twopos = size;
- twoheap[twopos] = co;
- twovals[twopos] = cv;
- ++size;
- heapifyUp2(twopos, co, cv);
- ++modCount;
- } else {
- final int fourpos = size - TWO_HEAP_MAX_SIZE;
- if (fourheap == null) {
- fourheap = new double[FOUR_HEAP_INITIAL_SIZE];
- fourvals = new long[FOUR_HEAP_INITIAL_SIZE];
- } else if (fourpos >= fourheap.length) {
- // Grow extension heap by half.
- fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1));
- fourvals = Arrays.copyOf(fourvals, fourvals.length + (fourvals.length >> 1));
- }
- fourheap[fourpos] = co;
- fourvals[fourpos] = cv;
- ++size;
- heapifyUp4(fourpos, co, cv);
- ++modCount;
+ if (size >= twoheap.length) {
+ // Grow by one layer.
+ twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1);
+ twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1);
}
+ final int twopos = size;
+ twoheap[twopos] = co;
+ twovals[twopos] = cv;
+ ++size;
+ heapifyUp(twopos, co, cv);
}
@Override
@@ -222,7 +128,6 @@ public class DoubleLongMaxHeap implements DoubleLongHeap {
@Override
public void replaceTopElement(double reinsert, long val) {
heapifyDown(reinsert, val);
- ++modCount;
}
/**
@@ -232,7 +137,7 @@ public class DoubleLongMaxHeap implements DoubleLongHeap {
* @param cur Current object
* @param val Current value
*/
- private void heapifyUp2(int twopos, double cur, long val) {
+ private void heapifyUp(int twopos, double cur, long val) {
while (twopos > 0) {
final int parent = (twopos - 1) >>> 1;
double par = twoheap[parent];
@@ -247,47 +152,11 @@ public class DoubleLongMaxHeap implements DoubleLongHeap {
twovals[twopos] = val;
}
- /**
- * Heapify-Up method for 4-ary heap.
- *
- * @param fourpos Position in 4-ary heap.
- * @param cur Current object
- * @param val Current value
- */
- private void heapifyUp4(int fourpos, double cur, long val) {
- while (fourpos > 0) {
- final int parent = (fourpos - 1) >> 2;
- double par = fourheap[parent];
- if (cur <= par) {
- break;
- }
- fourheap[fourpos] = par;
- fourvals[fourpos] = fourvals[parent];
- fourpos = parent;
- }
- if (fourpos == 0 && twoheap[0] < cur) {
- fourheap[0] = twoheap[0];
- fourvals[0] = twovals[0];
- twoheap[0] = cur;
- twovals[0] = val;
- } else {
- fourheap[fourpos] = cur;
- fourvals[fourpos] = val;
- }
- }
-
@Override
public void poll() {
--size;
// Replacement object:
- if (size >= TWO_HEAP_MAX_SIZE) {
- final int last = size - TWO_HEAP_MAX_SIZE;
- final double reinsert = fourheap[last];
- final long reinsertv = fourvals[last];
- fourheap[last] = 0.0;
- fourvals[last] = 0;
- heapifyDown(reinsert, reinsertv);
- } else if (size > 0) {
+ if (size > 0) {
final double reinsert = twoheap[size];
final long reinsertv = twovals[size];
twoheap[size] = 0.0;
@@ -297,42 +166,17 @@ public class DoubleLongMaxHeap implements DoubleLongHeap {
twoheap[0] = 0.0;
twovals[0] = 0;
}
- ++modCount;
}
/**
* Invoke heapify-down for the root object.
*
- * @param reinsert Object to insert.
- * @param val Value to reinsert.
- */
- private void heapifyDown(double reinsert, long val) {
- if (size > TWO_HEAP_MAX_SIZE) {
- // Special case: 3-ary situation.
- final int best = (twoheap[1] >= twoheap[2]) ? 1 : 2;
- if (fourheap[0] > twoheap[best]) {
- twoheap[0] = fourheap[0];
- twovals[0] = fourvals[0];
- heapifyDown4(0, reinsert, val);
- } else {
- twoheap[0] = twoheap[best];
- twovals[0] = twovals[best];
- heapifyDown2(best, reinsert, val);
- }
- return;
- }
- heapifyDown2(0, reinsert, val);
- }
-
- /**
- * Heapify-Down for 2-ary heap.
- *
- * @param twopos Position in 2-ary heap.
- * @param cur Current object
+ * @param cur Object to insert.
* @param val Value to reinsert.
*/
- private void heapifyDown2(int twopos, double cur, long val) {
- final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1;
+ private void heapifyDown(double cur, long val) {
+ final int stop = size >>> 1;
+ int twopos = 0;
while (twopos < stop) {
int bestchild = (twopos << 1) + 1;
double best = twoheap[bestchild];
@@ -352,54 +196,6 @@ public class DoubleLongMaxHeap implements DoubleLongHeap {
twovals[twopos] = val;
}
- /**
- * Heapify-Down for 4-ary heap.
- *
- * @param fourpos Position in 4-ary heap.
- * @param cur Current object
- * @param val Value to reinsert.
- */
- private void heapifyDown4(int fourpos, double cur, long val) {
- final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2;
- while (fourpos < stop) {
- final int child = (fourpos << 2) + 1;
- double best = fourheap[child];
- int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE;
- if (size > minsize) {
- double nextchild = fourheap[candidate];
- if (best < nextchild) {
- bestchild = candidate;
- best = nextchild;
- }
-
- minsize += 2;
- if (size >= minsize) {
- nextchild = fourheap[++candidate];
- if (best < nextchild) {
- bestchild = candidate;
- best = nextchild;
- }
-
- if (size > minsize) {
- nextchild = fourheap[++candidate];
- if (best < nextchild) {
- bestchild = candidate;
- best = nextchild;
- }
- }
- }
- }
- if (cur >= best) {
- break;
- }
- fourheap[fourpos] = best;
- fourvals[fourpos] = fourvals[bestchild];
- fourpos = bestchild;
- }
- fourheap[fourpos] = cur;
- fourvals[fourpos] = val;
- }
-
@Override
public double peekKey() {
return twoheap[0];
@@ -447,16 +243,8 @@ public class DoubleLongMaxHeap implements DoubleLongHeap {
*/
protected int pos = 0;
- /**
- * Modification counter we were initialized at.
- */
- protected final int myModCount = modCount;
-
@Override
public boolean valid() {
- if (modCount != myModCount) {
- throw new ConcurrentModificationException();
- }
return pos < size;
}
@@ -467,12 +255,12 @@ public class DoubleLongMaxHeap implements DoubleLongHeap {
@Override
public double getKey() {
- return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]);
+ return twoheap[pos];
}
@Override
public long getValue() {
- return ((pos < TWO_HEAP_MAX_SIZE) ? twovals[pos] : fourvals[pos - TWO_HEAP_MAX_SIZE]);
+ return twovals[pos];
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleLongMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleLongMinHeap.java
index d38eb6e3..9fbe0300 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleLongMinHeap.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleLongMinHeap.java
@@ -24,32 +24,11 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
*/
import java.util.Arrays;
-import java.util.ConcurrentModificationException;
import de.lmu.ifi.dbs.elki.math.MathUtil;
/**
- * Advanced priority queue class, based on a binary heap (for small sizes),
- * which will for larger heaps be accompanied by a 4-ary heap (attached below
- * the root of the two-ary heap, making the root actually 3-ary).
- *
- * This code was automatically instantiated for the types: Double and Long
- *
- * This combination was found to work quite well in benchmarks, but YMMV.
- *
- * Some other observations from benchmarking:
- * <ul>
- * <li>Bulk loading did not improve things</li>
- * <li>Primitive heaps are substantially faster.</li>
- * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and
- * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li>
- * <li>Workload makes a huge difference. A load-once, poll-until-empty priority
- * queue is something different than e.g. a top-k heap, which will see a lot of
- * top element replacements.</li>
- * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for
- * top-k make a difference.</li>
- * <li>Different day, different benchmark results ...</li>
- * </ul>
+ * Binary heap for primitive types.
*
* @author Erich Schubert
*
@@ -67,49 +46,16 @@ public class DoubleLongMinHeap implements DoubleLongHeap {
protected long[] twovals;
/**
- * Extension heap.
- */
- protected double[] fourheap;
-
- /**
- * Extension heapvalues.
- */
- protected long[] fourvals;
-
- /**
* Current size of heap.
*/
protected int size;
/**
- * (Structural) modification counter. Used to invalidate iterators.
- */
- protected int modCount = 0;
-
- /**
- * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements.
- */
- private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1;
-
- /**
* Initial size of the 2-ary heap.
*/
private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1;
/**
- * Initial size of 4-ary heap when initialized.
- *
- * 21 = 4-ary heap of height 2: 1 + 4 + 4*4
- *
- * 85 = 4-ary heap of height 3: 21 + 4*4*4
- *
- * 341 = 4-ary heap of height 4: 85 + 4*4*4*4
- *
- * Since we last grew by 255 (to 511), let's use 341.
- */
- private final static int FOUR_HEAP_INITIAL_SIZE = 341;
-
- /**
* Constructor, with default size.
*/
public DoubleLongMinHeap() {
@@ -119,10 +65,6 @@ public class DoubleLongMinHeap implements DoubleLongHeap {
this.twoheap = twoheap;
this.twovals = twovals;
- this.fourheap = null;
- this.fourvals = null;
- this.size = 0;
- this.modCount = 0;
}
/**
@@ -132,35 +74,17 @@ public class DoubleLongMinHeap implements DoubleLongHeap {
*/
public DoubleLongMinHeap(int minsize) {
super();
- if (minsize < TWO_HEAP_MAX_SIZE) {
- final int size = MathUtil.nextPow2Int(minsize + 1) - 1;
- double[] twoheap = new double[size];
- long[] twovals = new long[size];
+ final int size = MathUtil.nextPow2Int(minsize + 1) - 1;
+ double[] twoheap = new double[size];
+ long[] twovals = new long[size];
- this.twoheap = twoheap;
- this.twovals = twovals;
- this.fourheap = null;
- this.fourvals = null;
- } else {
- double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE];
- long[] twovals = new long[TWO_HEAP_INITIAL_SIZE];
- double[] fourheap = new double[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)];
- long[] fourvals = new long[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)];
- this.twoheap = twoheap;
- this.twovals = twovals;
- this.fourheap = fourheap;
- this.fourvals = fourvals;
- }
- this.size = 0;
- this.modCount = 0;
+ this.twoheap = twoheap;
+ this.twovals = twovals;
}
@Override
public void clear() {
size = 0;
- ++modCount;
- fourheap = null;
- fourvals = null;
Arrays.fill(twoheap, 0.0);
Arrays.fill(twovals, 0);
}
@@ -180,34 +104,16 @@ public class DoubleLongMinHeap implements DoubleLongHeap {
final double co = o;
final long cv = v;
// System.err.println("Add: " + o);
- if (size < TWO_HEAP_MAX_SIZE) {
- if (size >= twoheap.length) {
- // Grow by one layer.
- twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1);
- twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1);
- }
- final int twopos = size;
- twoheap[twopos] = co;
- twovals[twopos] = cv;
- ++size;
- heapifyUp2(twopos, co, cv);
- ++modCount;
- } else {
- final int fourpos = size - TWO_HEAP_MAX_SIZE;
- if (fourheap == null) {
- fourheap = new double[FOUR_HEAP_INITIAL_SIZE];
- fourvals = new long[FOUR_HEAP_INITIAL_SIZE];
- } else if (fourpos >= fourheap.length) {
- // Grow extension heap by half.
- fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1));
- fourvals = Arrays.copyOf(fourvals, fourvals.length + (fourvals.length >> 1));
- }
- fourheap[fourpos] = co;
- fourvals[fourpos] = cv;
- ++size;
- heapifyUp4(fourpos, co, cv);
- ++modCount;
+ if (size >= twoheap.length) {
+ // Grow by one layer.
+ twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1);
+ twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1);
}
+ final int twopos = size;
+ twoheap[twopos] = co;
+ twovals[twopos] = cv;
+ ++size;
+ heapifyUp(twopos, co, cv);
}
@Override
@@ -222,7 +128,6 @@ public class DoubleLongMinHeap implements DoubleLongHeap {
@Override
public void replaceTopElement(double reinsert, long val) {
heapifyDown(reinsert, val);
- ++modCount;
}
/**
@@ -232,7 +137,7 @@ public class DoubleLongMinHeap implements DoubleLongHeap {
* @param cur Current object
* @param val Current value
*/
- private void heapifyUp2(int twopos, double cur, long val) {
+ private void heapifyUp(int twopos, double cur, long val) {
while (twopos > 0) {
final int parent = (twopos - 1) >>> 1;
double par = twoheap[parent];
@@ -247,47 +152,11 @@ public class DoubleLongMinHeap implements DoubleLongHeap {
twovals[twopos] = val;
}
- /**
- * Heapify-Up method for 4-ary heap.
- *
- * @param fourpos Position in 4-ary heap.
- * @param cur Current object
- * @param val Current value
- */
- private void heapifyUp4(int fourpos, double cur, long val) {
- while (fourpos > 0) {
- final int parent = (fourpos - 1) >> 2;
- double par = fourheap[parent];
- if (cur >= par) {
- break;
- }
- fourheap[fourpos] = par;
- fourvals[fourpos] = fourvals[parent];
- fourpos = parent;
- }
- if (fourpos == 0 && twoheap[0] > cur) {
- fourheap[0] = twoheap[0];
- fourvals[0] = twovals[0];
- twoheap[0] = cur;
- twovals[0] = val;
- } else {
- fourheap[fourpos] = cur;
- fourvals[fourpos] = val;
- }
- }
-
@Override
public void poll() {
--size;
// Replacement object:
- if (size >= TWO_HEAP_MAX_SIZE) {
- final int last = size - TWO_HEAP_MAX_SIZE;
- final double reinsert = fourheap[last];
- final long reinsertv = fourvals[last];
- fourheap[last] = 0.0;
- fourvals[last] = 0;
- heapifyDown(reinsert, reinsertv);
- } else if (size > 0) {
+ if (size > 0) {
final double reinsert = twoheap[size];
final long reinsertv = twovals[size];
twoheap[size] = 0.0;
@@ -297,42 +166,17 @@ public class DoubleLongMinHeap implements DoubleLongHeap {
twoheap[0] = 0.0;
twovals[0] = 0;
}
- ++modCount;
}
/**
* Invoke heapify-down for the root object.
*
- * @param reinsert Object to insert.
- * @param val Value to reinsert.
- */
- private void heapifyDown(double reinsert, long val) {
- if (size > TWO_HEAP_MAX_SIZE) {
- // Special case: 3-ary situation.
- final int best = (twoheap[1] <= twoheap[2]) ? 1 : 2;
- if (fourheap[0] < twoheap[best]) {
- twoheap[0] = fourheap[0];
- twovals[0] = fourvals[0];
- heapifyDown4(0, reinsert, val);
- } else {
- twoheap[0] = twoheap[best];
- twovals[0] = twovals[best];
- heapifyDown2(best, reinsert, val);
- }
- return;
- }
- heapifyDown2(0, reinsert, val);
- }
-
- /**
- * Heapify-Down for 2-ary heap.
- *
- * @param twopos Position in 2-ary heap.
- * @param cur Current object
+ * @param cur Object to insert.
* @param val Value to reinsert.
*/
- private void heapifyDown2(int twopos, double cur, long val) {
- final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1;
+ private void heapifyDown(double cur, long val) {
+ final int stop = size >>> 1;
+ int twopos = 0;
while (twopos < stop) {
int bestchild = (twopos << 1) + 1;
double best = twoheap[bestchild];
@@ -352,54 +196,6 @@ public class DoubleLongMinHeap implements DoubleLongHeap {
twovals[twopos] = val;
}
- /**
- * Heapify-Down for 4-ary heap.
- *
- * @param fourpos Position in 4-ary heap.
- * @param cur Current object
- * @param val Value to reinsert.
- */
- private void heapifyDown4(int fourpos, double cur, long val) {
- final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2;
- while (fourpos < stop) {
- final int child = (fourpos << 2) + 1;
- double best = fourheap[child];
- int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE;
- if (size > minsize) {
- double nextchild = fourheap[candidate];
- if (best > nextchild) {
- bestchild = candidate;
- best = nextchild;
- }
-
- minsize += 2;
- if (size >= minsize) {
- nextchild = fourheap[++candidate];
- if (best > nextchild) {
- bestchild = candidate;
- best = nextchild;
- }
-
- if (size > minsize) {
- nextchild = fourheap[++candidate];
- if (best > nextchild) {
- bestchild = candidate;
- best = nextchild;
- }
- }
- }
- }
- if (cur <= best) {
- break;
- }
- fourheap[fourpos] = best;
- fourvals[fourpos] = fourvals[bestchild];
- fourpos = bestchild;
- }
- fourheap[fourpos] = cur;
- fourvals[fourpos] = val;
- }
-
@Override
public double peekKey() {
return twoheap[0];
@@ -447,16 +243,8 @@ public class DoubleLongMinHeap implements DoubleLongHeap {
*/
protected int pos = 0;
- /**
- * Modification counter we were initialized at.
- */
- protected final int myModCount = modCount;
-
@Override
public boolean valid() {
- if (modCount != myModCount) {
- throw new ConcurrentModificationException();
- }
return pos < size;
}
@@ -467,12 +255,12 @@ public class DoubleLongMinHeap implements DoubleLongHeap {
@Override
public double getKey() {
- return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]);
+ return twoheap[pos];
}
@Override
public long getValue() {
- return ((pos < TWO_HEAP_MAX_SIZE) ? twovals[pos] : fourvals[pos - TWO_HEAP_MAX_SIZE]);
+ return twovals[pos];
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMaxHeap.java
index 7ea28f14..2c74b34b 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMaxHeap.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMaxHeap.java
@@ -24,32 +24,11 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
*/
import java.util.Arrays;
-import java.util.ConcurrentModificationException;
import de.lmu.ifi.dbs.elki.math.MathUtil;
/**
- * Advanced priority queue class, based on a binary heap (for small sizes),
- * which will for larger heaps be accompanied by a 4-ary heap (attached below
- * the root of the two-ary heap, making the root actually 3-ary).
- *
- * This code was automatically instantiated for the type: Double
- *
- * This combination was found to work quite well in benchmarks, but YMMV.
- *
- * Some other observations from benchmarking:
- * <ul>
- * <li>Bulk loading did not improve things</li>
- * <li>Primitive heaps are substantially faster.</li>
- * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and
- * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li>
- * <li>Workload makes a huge difference. A load-once, poll-until-empty priority
- * queue is something different than e.g. a top-k heap, which will see a lot of
- * top element replacements.</li>
- * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for
- * top-k make a difference.</li>
- * <li>Different day, different benchmark results ...</li>
- * </ul>
+ * Binary heap for primitive types.
*
* @author Erich Schubert
*
@@ -62,44 +41,16 @@ public class DoubleMaxHeap implements DoubleHeap {
protected double[] twoheap;
/**
- * Extension heap.
- */
- protected double[] fourheap;
-
- /**
* Current size of heap.
*/
protected int size;
/**
- * (Structural) modification counter. Used to invalidate iterators.
- */
- protected int modCount = 0;
-
- /**
- * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements.
- */
- private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1;
-
- /**
* Initial size of the 2-ary heap.
*/
private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1;
/**
- * Initial size of 4-ary heap when initialized.
- *
- * 21 = 4-ary heap of height 2: 1 + 4 + 4*4
- *
- * 85 = 4-ary heap of height 3: 21 + 4*4*4
- *
- * 341 = 4-ary heap of height 4: 85 + 4*4*4*4
- *
- * Since we last grew by 255 (to 511), let's use 341.
- */
- private final static int FOUR_HEAP_INITIAL_SIZE = 341;
-
- /**
* Constructor, with default size.
*/
public DoubleMaxHeap() {
@@ -107,9 +58,6 @@ public class DoubleMaxHeap implements DoubleHeap {
double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE];
this.twoheap = twoheap;
- this.fourheap = null;
- this.size = 0;
- this.modCount = 0;
}
/**
@@ -119,27 +67,15 @@ public class DoubleMaxHeap implements DoubleHeap {
*/
public DoubleMaxHeap(int minsize) {
super();
- if (minsize < TWO_HEAP_MAX_SIZE) {
- final int size = MathUtil.nextPow2Int(minsize + 1) - 1;
- double[] twoheap = new double[size];
+ final int size = MathUtil.nextPow2Int(minsize + 1) - 1;
+ double[] twoheap = new double[size];
- this.twoheap = twoheap;
- this.fourheap = null;
- } else {
- double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE];
- double[] fourheap = new double[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)];
- this.twoheap = twoheap;
- this.fourheap = fourheap;
- }
- this.size = 0;
- this.modCount = 0;
+ this.twoheap = twoheap;
}
@Override
public void clear() {
size = 0;
- ++modCount;
- fourheap = null;
Arrays.fill(twoheap, 0.0);
}
@@ -157,29 +93,14 @@ public class DoubleMaxHeap implements DoubleHeap {
public void add(double o) {
final double co = o;
// System.err.println("Add: " + o);
- if (size < TWO_HEAP_MAX_SIZE) {
- if (size >= twoheap.length) {
- // Grow by one layer.
- twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1);
- }
- final int twopos = size;
- twoheap[twopos] = co;
- ++size;
- heapifyUp2(twopos, co);
- ++modCount;
- } else {
- final int fourpos = size - TWO_HEAP_MAX_SIZE;
- if (fourheap == null) {
- fourheap = new double[FOUR_HEAP_INITIAL_SIZE];
- } else if (fourpos >= fourheap.length) {
- // Grow extension heap by half.
- fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1));
- }
- fourheap[fourpos] = co;
- ++size;
- heapifyUp4(fourpos, co);
- ++modCount;
+ if (size >= twoheap.length) {
+ // Grow by one layer.
+ twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1);
}
+ final int twopos = size;
+ twoheap[twopos] = co;
+ ++size;
+ heapifyUp(twopos, co);
}
@Override
@@ -195,7 +116,6 @@ public class DoubleMaxHeap implements DoubleHeap {
public double replaceTopElement(double reinsert) {
final double ret = twoheap[0];
heapifyDown( reinsert);
- ++modCount;
return ret;
}
@@ -205,7 +125,7 @@ public class DoubleMaxHeap implements DoubleHeap {
* @param twopos Position in 2-ary heap.
* @param cur Current object
*/
- private void heapifyUp2(int twopos, double cur) {
+ private void heapifyUp(int twopos, double cur) {
while (twopos > 0) {
final int parent = (twopos - 1) >>> 1;
double par = twoheap[parent];
@@ -218,80 +138,29 @@ public class DoubleMaxHeap implements DoubleHeap {
twoheap[twopos] = cur;
}
- /**
- * Heapify-Up method for 4-ary heap.
- *
- * @param fourpos Position in 4-ary heap.
- * @param cur Current object
- */
- private void heapifyUp4(int fourpos, double cur) {
- while (fourpos > 0) {
- final int parent = (fourpos - 1) >> 2;
- double par = fourheap[parent];
- if (cur <= par) {
- break;
- }
- fourheap[fourpos] = par;
- fourpos = parent;
- }
- if (fourpos == 0 && twoheap[0] < cur) {
- fourheap[0] = twoheap[0];
- twoheap[0] = cur;
- } else {
- fourheap[fourpos] = cur;
- }
- }
-
@Override
public double poll() {
final double ret = twoheap[0];
--size;
// Replacement object:
- if (size >= TWO_HEAP_MAX_SIZE) {
- final int last = size - TWO_HEAP_MAX_SIZE;
- final double reinsert = fourheap[last];
- fourheap[last] = 0.0;
- heapifyDown(reinsert);
- } else if (size > 0) {
+ if (size > 0) {
final double reinsert = twoheap[size];
twoheap[size] = 0.0;
heapifyDown(reinsert);
} else {
twoheap[0] = 0.0;
}
- ++modCount;
return ret;
}
/**
* Invoke heapify-down for the root object.
*
- * @param reinsert Object to insert.
- */
- private void heapifyDown(double reinsert) {
- if (size > TWO_HEAP_MAX_SIZE) {
- // Special case: 3-ary situation.
- final int best = (twoheap[1] >= twoheap[2]) ? 1 : 2;
- if (fourheap[0] > twoheap[best]) {
- twoheap[0] = fourheap[0];
- heapifyDown4(0, reinsert);
- } else {
- twoheap[0] = twoheap[best];
- heapifyDown2(best, reinsert);
- }
- return;
- }
- heapifyDown2(0, reinsert);
- }
-
- /**
- * Heapify-Down for 2-ary heap.
- *
- * @param twopos Position in 2-ary heap.
- * @param cur Current object
+ * @param cur Object to insert.
*/
- private void heapifyDown2(int twopos, double cur) {
- final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1;
+ private void heapifyDown(double cur) {
+ final int stop = size >>> 1;
+ int twopos = 0;
while (twopos < stop) {
int bestchild = (twopos << 1) + 1;
double best = twoheap[bestchild];
@@ -309,51 +178,6 @@ public class DoubleMaxHeap implements DoubleHeap {
twoheap[twopos] = cur;
}
- /**
- * Heapify-Down for 4-ary heap.
- *
- * @param fourpos Position in 4-ary heap.
- * @param cur Current object
- */
- private void heapifyDown4(int fourpos, double cur) {
- final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2;
- while (fourpos < stop) {
- final int child = (fourpos << 2) + 1;
- double best = fourheap[child];
- int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE;
- if (size > minsize) {
- double nextchild = fourheap[candidate];
- if (best < nextchild) {
- bestchild = candidate;
- best = nextchild;
- }
-
- minsize += 2;
- if (size >= minsize) {
- nextchild = fourheap[++candidate];
- if (best < nextchild) {
- bestchild = candidate;
- best = nextchild;
- }
-
- if (size > minsize) {
- nextchild = fourheap[++candidate];
- if (best < nextchild) {
- bestchild = candidate;
- best = nextchild;
- }
- }
- }
- }
- if (cur >= best) {
- break;
- }
- fourheap[fourpos] = best;
- fourpos = bestchild;
- }
- fourheap[fourpos] = cur;
- }
-
@Override
public double peek() {
return twoheap[0];
@@ -396,16 +220,8 @@ public class DoubleMaxHeap implements DoubleHeap {
*/
protected int pos = 0;
- /**
- * Modification counter we were initialized at.
- */
- protected final int myModCount = modCount;
-
@Override
public boolean valid() {
- if (modCount != myModCount) {
- throw new ConcurrentModificationException();
- }
return pos < size;
}
@@ -416,7 +232,7 @@ public class DoubleMaxHeap implements DoubleHeap {
@Override
public double get() {
- return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]);
+ return twoheap[pos];
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMinHeap.java
index e9334153..afc50296 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMinHeap.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleMinHeap.java
@@ -24,32 +24,11 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
*/
import java.util.Arrays;
-import java.util.ConcurrentModificationException;
import de.lmu.ifi.dbs.elki.math.MathUtil;
/**
- * Advanced priority queue class, based on a binary heap (for small sizes),
- * which will for larger heaps be accompanied by a 4-ary heap (attached below
- * the root of the two-ary heap, making the root actually 3-ary).
- *
- * This code was automatically instantiated for the type: Double
- *
- * This combination was found to work quite well in benchmarks, but YMMV.
- *
- * Some other observations from benchmarking:
- * <ul>
- * <li>Bulk loading did not improve things</li>
- * <li>Primitive heaps are substantially faster.</li>
- * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and
- * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li>
- * <li>Workload makes a huge difference. A load-once, poll-until-empty priority
- * queue is something different than e.g. a top-k heap, which will see a lot of
- * top element replacements.</li>
- * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for
- * top-k make a difference.</li>
- * <li>Different day, different benchmark results ...</li>
- * </ul>
+ * Binary heap for primitive types.
*
* @author Erich Schubert
*
@@ -62,44 +41,16 @@ public class DoubleMinHeap implements DoubleHeap {
protected double[] twoheap;
/**
- * Extension heap.
- */
- protected double[] fourheap;
-
- /**
* Current size of heap.
*/
protected int size;
/**
- * (Structural) modification counter. Used to invalidate iterators.
- */
- protected int modCount = 0;
-
- /**
- * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements.
- */
- private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1;
-
- /**
* Initial size of the 2-ary heap.
*/
private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1;
/**
- * Initial size of 4-ary heap when initialized.
- *
- * 21 = 4-ary heap of height 2: 1 + 4 + 4*4
- *
- * 85 = 4-ary heap of height 3: 21 + 4*4*4
- *
- * 341 = 4-ary heap of height 4: 85 + 4*4*4*4
- *
- * Since we last grew by 255 (to 511), let's use 341.
- */
- private final static int FOUR_HEAP_INITIAL_SIZE = 341;
-
- /**
* Constructor, with default size.
*/
public DoubleMinHeap() {
@@ -107,9 +58,6 @@ public class DoubleMinHeap implements DoubleHeap {
double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE];
this.twoheap = twoheap;
- this.fourheap = null;
- this.size = 0;
- this.modCount = 0;
}
/**
@@ -119,27 +67,15 @@ public class DoubleMinHeap implements DoubleHeap {
*/
public DoubleMinHeap(int minsize) {
super();
- if (minsize < TWO_HEAP_MAX_SIZE) {
- final int size = MathUtil.nextPow2Int(minsize + 1) - 1;
- double[] twoheap = new double[size];
+ final int size = MathUtil.nextPow2Int(minsize + 1) - 1;
+ double[] twoheap = new double[size];
- this.twoheap = twoheap;
- this.fourheap = null;
- } else {
- double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE];
- double[] fourheap = new double[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)];
- this.twoheap = twoheap;
- this.fourheap = fourheap;
- }
- this.size = 0;
- this.modCount = 0;
+ this.twoheap = twoheap;
}
@Override
public void clear() {
size = 0;
- ++modCount;
- fourheap = null;
Arrays.fill(twoheap, 0.0);
}
@@ -157,29 +93,14 @@ public class DoubleMinHeap implements DoubleHeap {
public void add(double o) {
final double co = o;
// System.err.println("Add: " + o);
- if (size < TWO_HEAP_MAX_SIZE) {
- if (size >= twoheap.length) {
- // Grow by one layer.
- twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1);
- }
- final int twopos = size;
- twoheap[twopos] = co;
- ++size;
- heapifyUp2(twopos, co);
- ++modCount;
- } else {
- final int fourpos = size - TWO_HEAP_MAX_SIZE;
- if (fourheap == null) {
- fourheap = new double[FOUR_HEAP_INITIAL_SIZE];
- } else if (fourpos >= fourheap.length) {
- // Grow extension heap by half.
- fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1));
- }
- fourheap[fourpos] = co;
- ++size;
- heapifyUp4(fourpos, co);
- ++modCount;
+ if (size >= twoheap.length) {
+ // Grow by one layer.
+ twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1);
}
+ final int twopos = size;
+ twoheap[twopos] = co;
+ ++size;
+ heapifyUp(twopos, co);
}
@Override
@@ -195,7 +116,6 @@ public class DoubleMinHeap implements DoubleHeap {
public double replaceTopElement(double reinsert) {
final double ret = twoheap[0];
heapifyDown( reinsert);
- ++modCount;
return ret;
}
@@ -205,7 +125,7 @@ public class DoubleMinHeap implements DoubleHeap {
* @param twopos Position in 2-ary heap.
* @param cur Current object
*/
- private void heapifyUp2(int twopos, double cur) {
+ private void heapifyUp(int twopos, double cur) {
while (twopos > 0) {
final int parent = (twopos - 1) >>> 1;
double par = twoheap[parent];
@@ -218,80 +138,29 @@ public class DoubleMinHeap implements DoubleHeap {
twoheap[twopos] = cur;
}
- /**
- * Heapify-Up method for 4-ary heap.
- *
- * @param fourpos Position in 4-ary heap.
- * @param cur Current object
- */
- private void heapifyUp4(int fourpos, double cur) {
- while (fourpos > 0) {
- final int parent = (fourpos - 1) >> 2;
- double par = fourheap[parent];
- if (cur >= par) {
- break;
- }
- fourheap[fourpos] = par;
- fourpos = parent;
- }
- if (fourpos == 0 && twoheap[0] > cur) {
- fourheap[0] = twoheap[0];
- twoheap[0] = cur;
- } else {
- fourheap[fourpos] = cur;
- }
- }
-
@Override
public double poll() {
final double ret = twoheap[0];
--size;
// Replacement object:
- if (size >= TWO_HEAP_MAX_SIZE) {
- final int last = size - TWO_HEAP_MAX_SIZE;
- final double reinsert = fourheap[last];
- fourheap[last] = 0.0;
- heapifyDown(reinsert);
- } else if (size > 0) {
+ if (size > 0) {
final double reinsert = twoheap[size];
twoheap[size] = 0.0;
heapifyDown(reinsert);
} else {
twoheap[0] = 0.0;
}
- ++modCount;
return ret;
}
/**
* Invoke heapify-down for the root object.
*
- * @param reinsert Object to insert.
- */
- private void heapifyDown(double reinsert) {
- if (size > TWO_HEAP_MAX_SIZE) {
- // Special case: 3-ary situation.
- final int best = (twoheap[1] <= twoheap[2]) ? 1 : 2;
- if (fourheap[0] < twoheap[best]) {
- twoheap[0] = fourheap[0];
- heapifyDown4(0, reinsert);
- } else {
- twoheap[0] = twoheap[best];
- heapifyDown2(best, reinsert);
- }
- return;
- }
- heapifyDown2(0, reinsert);
- }
-
- /**
- * Heapify-Down for 2-ary heap.
- *
- * @param twopos Position in 2-ary heap.
- * @param cur Current object
+ * @param cur Object to insert.
*/
- private void heapifyDown2(int twopos, double cur) {
- final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1;
+ private void heapifyDown(double cur) {
+ final int stop = size >>> 1;
+ int twopos = 0;
while (twopos < stop) {
int bestchild = (twopos << 1) + 1;
double best = twoheap[bestchild];
@@ -309,51 +178,6 @@ public class DoubleMinHeap implements DoubleHeap {
twoheap[twopos] = cur;
}
- /**
- * Heapify-Down for 4-ary heap.
- *
- * @param fourpos Position in 4-ary heap.
- * @param cur Current object
- */
- private void heapifyDown4(int fourpos, double cur) {
- final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2;
- while (fourpos < stop) {
- final int child = (fourpos << 2) + 1;
- double best = fourheap[child];
- int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE;
- if (size > minsize) {
- double nextchild = fourheap[candidate];
- if (best > nextchild) {
- bestchild = candidate;
- best = nextchild;
- }
-
- minsize += 2;
- if (size >= minsize) {
- nextchild = fourheap[++candidate];
- if (best > nextchild) {
- bestchild = candidate;
- best = nextchild;
- }
-
- if (size > minsize) {
- nextchild = fourheap[++candidate];
- if (best > nextchild) {
- bestchild = candidate;
- best = nextchild;
- }
- }
- }
- }
- if (cur <= best) {
- break;
- }
- fourheap[fourpos] = best;
- fourpos = bestchild;
- }
- fourheap[fourpos] = cur;
- }
-
@Override
public double peek() {
return twoheap[0];
@@ -396,16 +220,8 @@ public class DoubleMinHeap implements DoubleHeap {
*/
protected int pos = 0;
- /**
- * Modification counter we were initialized at.
- */
- protected final int myModCount = modCount;
-
@Override
public boolean valid() {
- if (modCount != myModCount) {
- throw new ConcurrentModificationException();
- }
return pos < size;
}
@@ -416,7 +232,7 @@ public class DoubleMinHeap implements DoubleHeap {
@Override
public double get() {
- return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]);
+ return twoheap[pos];
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectHeap.java
index db65ce81..7323cd8d 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectHeap.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectHeap.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectMaxHeap.java
index dd89573c..939c4d7e 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectMaxHeap.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectMaxHeap.java
@@ -24,32 +24,11 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
*/
import java.util.Arrays;
-import java.util.ConcurrentModificationException;
import de.lmu.ifi.dbs.elki.math.MathUtil;
/**
- * Advanced priority queue class, based on a binary heap (for small sizes),
- * which will for larger heaps be accompanied by a 4-ary heap (attached below
- * the root of the two-ary heap, making the root actually 3-ary).
- *
- * This code was automatically instantiated for the types: Double and Object
- *
- * This combination was found to work quite well in benchmarks, but YMMV.
- *
- * Some other observations from benchmarking:
- * <ul>
- * <li>Bulk loading did not improve things</li>
- * <li>Primitive heaps are substantially faster.</li>
- * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and
- * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li>
- * <li>Workload makes a huge difference. A load-once, poll-until-empty priority
- * queue is something different than e.g. a top-k heap, which will see a lot of
- * top element replacements.</li>
- * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for
- * top-k make a difference.</li>
- * <li>Different day, different benchmark results ...</li>
- * </ul>
+ * Binary heap for primitive types.
*
* @author Erich Schubert
*
@@ -68,49 +47,16 @@ public class DoubleObjectMaxHeap<V> implements DoubleObjectHeap<V> {
protected Object[] twovals;
/**
- * Extension heap.
- */
- protected double[] fourheap;
-
- /**
- * Extension heapvalues.
- */
- protected Object[] fourvals;
-
- /**
* Current size of heap.
*/
protected int size;
/**
- * (Structural) modification counter. Used to invalidate iterators.
- */
- protected int modCount = 0;
-
- /**
- * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements.
- */
- private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1;
-
- /**
* Initial size of the 2-ary heap.
*/
private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1;
/**
- * Initial size of 4-ary heap when initialized.
- *
- * 21 = 4-ary heap of height 2: 1 + 4 + 4*4
- *
- * 85 = 4-ary heap of height 3: 21 + 4*4*4
- *
- * 341 = 4-ary heap of height 4: 85 + 4*4*4*4
- *
- * Since we last grew by 255 (to 511), let's use 341.
- */
- private final static int FOUR_HEAP_INITIAL_SIZE = 341;
-
- /**
* Constructor, with default size.
*/
public DoubleObjectMaxHeap() {
@@ -120,10 +66,6 @@ public class DoubleObjectMaxHeap<V> implements DoubleObjectHeap<V> {
this.twoheap = twoheap;
this.twovals = twovals;
- this.fourheap = null;
- this.fourvals = null;
- this.size = 0;
- this.modCount = 0;
}
/**
@@ -133,35 +75,17 @@ public class DoubleObjectMaxHeap<V> implements DoubleObjectHeap<V> {
*/
public DoubleObjectMaxHeap(int minsize) {
super();
- if (minsize < TWO_HEAP_MAX_SIZE) {
- final int size = MathUtil.nextPow2Int(minsize + 1) - 1;
- double[] twoheap = new double[size];
- Object[] twovals = new Object[size];
+ final int size = MathUtil.nextPow2Int(minsize + 1) - 1;
+ double[] twoheap = new double[size];
+ Object[] twovals = new Object[size];
- this.twoheap = twoheap;
- this.twovals = twovals;
- this.fourheap = null;
- this.fourvals = null;
- } else {
- double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE];
- Object[] twovals = new Object[TWO_HEAP_INITIAL_SIZE];
- double[] fourheap = new double[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)];
- Object[] fourvals = new Object[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)];
- this.twoheap = twoheap;
- this.twovals = twovals;
- this.fourheap = fourheap;
- this.fourvals = fourvals;
- }
- this.size = 0;
- this.modCount = 0;
+ this.twoheap = twoheap;
+ this.twovals = twovals;
}
@Override
public void clear() {
size = 0;
- ++modCount;
- fourheap = null;
- fourvals = null;
Arrays.fill(twoheap, 0.0);
Arrays.fill(twovals, null);
}
@@ -181,34 +105,16 @@ public class DoubleObjectMaxHeap<V> implements DoubleObjectHeap<V> {
final double co = o;
final Object cv = (Object)v;
// System.err.println("Add: " + o);
- if (size < TWO_HEAP_MAX_SIZE) {
- if (size >= twoheap.length) {
- // Grow by one layer.
- twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1);
- twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1);
- }
- final int twopos = size;
- twoheap[twopos] = co;
- twovals[twopos] = cv;
- ++size;
- heapifyUp2(twopos, co, cv);
- ++modCount;
- } else {
- final int fourpos = size - TWO_HEAP_MAX_SIZE;
- if (fourheap == null) {
- fourheap = new double[FOUR_HEAP_INITIAL_SIZE];
- fourvals = new Object[FOUR_HEAP_INITIAL_SIZE];
- } else if (fourpos >= fourheap.length) {
- // Grow extension heap by half.
- fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1));
- fourvals = Arrays.copyOf(fourvals, fourvals.length + (fourvals.length >> 1));
- }
- fourheap[fourpos] = co;
- fourvals[fourpos] = cv;
- ++size;
- heapifyUp4(fourpos, co, cv);
- ++modCount;
+ if (size >= twoheap.length) {
+ // Grow by one layer.
+ twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1);
+ twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1);
}
+ final int twopos = size;
+ twoheap[twopos] = co;
+ twovals[twopos] = cv;
+ ++size;
+ heapifyUp(twopos, co, cv);
}
@Override
@@ -223,7 +129,6 @@ public class DoubleObjectMaxHeap<V> implements DoubleObjectHeap<V> {
@Override
public void replaceTopElement(double reinsert, V val) {
heapifyDown(reinsert, (Object)val);
- ++modCount;
}
/**
@@ -233,7 +138,7 @@ public class DoubleObjectMaxHeap<V> implements DoubleObjectHeap<V> {
* @param cur Current object
* @param val Current value
*/
- private void heapifyUp2(int twopos, double cur, Object val) {
+ private void heapifyUp(int twopos, double cur, Object val) {
while (twopos > 0) {
final int parent = (twopos - 1) >>> 1;
double par = twoheap[parent];
@@ -248,47 +153,11 @@ public class DoubleObjectMaxHeap<V> implements DoubleObjectHeap<V> {
twovals[twopos] = val;
}
- /**
- * Heapify-Up method for 4-ary heap.
- *
- * @param fourpos Position in 4-ary heap.
- * @param cur Current object
- * @param val Current value
- */
- private void heapifyUp4(int fourpos, double cur, Object val) {
- while (fourpos > 0) {
- final int parent = (fourpos - 1) >> 2;
- double par = fourheap[parent];
- if (cur <= par) {
- break;
- }
- fourheap[fourpos] = par;
- fourvals[fourpos] = fourvals[parent];
- fourpos = parent;
- }
- if (fourpos == 0 && twoheap[0] < cur) {
- fourheap[0] = twoheap[0];
- fourvals[0] = twovals[0];
- twoheap[0] = cur;
- twovals[0] = val;
- } else {
- fourheap[fourpos] = cur;
- fourvals[fourpos] = val;
- }
- }
-
@Override
public void poll() {
--size;
// Replacement object:
- if (size >= TWO_HEAP_MAX_SIZE) {
- final int last = size - TWO_HEAP_MAX_SIZE;
- final double reinsert = fourheap[last];
- final Object reinsertv = fourvals[last];
- fourheap[last] = 0.0;
- fourvals[last] = null;
- heapifyDown(reinsert, reinsertv);
- } else if (size > 0) {
+ if (size > 0) {
final double reinsert = twoheap[size];
final Object reinsertv = twovals[size];
twoheap[size] = 0.0;
@@ -298,42 +167,17 @@ public class DoubleObjectMaxHeap<V> implements DoubleObjectHeap<V> {
twoheap[0] = 0.0;
twovals[0] = null;
}
- ++modCount;
}
/**
* Invoke heapify-down for the root object.
*
- * @param reinsert Object to insert.
- * @param val Value to reinsert.
- */
- private void heapifyDown(double reinsert, Object val) {
- if (size > TWO_HEAP_MAX_SIZE) {
- // Special case: 3-ary situation.
- final int best = (twoheap[1] >= twoheap[2]) ? 1 : 2;
- if (fourheap[0] > twoheap[best]) {
- twoheap[0] = fourheap[0];
- twovals[0] = fourvals[0];
- heapifyDown4(0, reinsert, val);
- } else {
- twoheap[0] = twoheap[best];
- twovals[0] = twovals[best];
- heapifyDown2(best, reinsert, val);
- }
- return;
- }
- heapifyDown2(0, reinsert, val);
- }
-
- /**
- * Heapify-Down for 2-ary heap.
- *
- * @param twopos Position in 2-ary heap.
- * @param cur Current object
+ * @param cur Object to insert.
* @param val Value to reinsert.
*/
- private void heapifyDown2(int twopos, double cur, Object val) {
- final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1;
+ private void heapifyDown(double cur, Object val) {
+ final int stop = size >>> 1;
+ int twopos = 0;
while (twopos < stop) {
int bestchild = (twopos << 1) + 1;
double best = twoheap[bestchild];
@@ -353,54 +197,6 @@ public class DoubleObjectMaxHeap<V> implements DoubleObjectHeap<V> {
twovals[twopos] = val;
}
- /**
- * Heapify-Down for 4-ary heap.
- *
- * @param fourpos Position in 4-ary heap.
- * @param cur Current object
- * @param val Value to reinsert.
- */
- private void heapifyDown4(int fourpos, double cur, Object val) {
- final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2;
- while (fourpos < stop) {
- final int child = (fourpos << 2) + 1;
- double best = fourheap[child];
- int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE;
- if (size > minsize) {
- double nextchild = fourheap[candidate];
- if (best < nextchild) {
- bestchild = candidate;
- best = nextchild;
- }
-
- minsize += 2;
- if (size >= minsize) {
- nextchild = fourheap[++candidate];
- if (best < nextchild) {
- bestchild = candidate;
- best = nextchild;
- }
-
- if (size > minsize) {
- nextchild = fourheap[++candidate];
- if (best < nextchild) {
- bestchild = candidate;
- best = nextchild;
- }
- }
- }
- }
- if (cur >= best) {
- break;
- }
- fourheap[fourpos] = best;
- fourvals[fourpos] = fourvals[bestchild];
- fourpos = bestchild;
- }
- fourheap[fourpos] = cur;
- fourvals[fourpos] = val;
- }
-
@Override
public double peekKey() {
return twoheap[0];
@@ -449,16 +245,8 @@ public class DoubleObjectMaxHeap<V> implements DoubleObjectHeap<V> {
*/
protected int pos = 0;
- /**
- * Modification counter we were initialized at.
- */
- protected final int myModCount = modCount;
-
@Override
public boolean valid() {
- if (modCount != myModCount) {
- throw new ConcurrentModificationException();
- }
return pos < size;
}
@@ -469,14 +257,14 @@ public class DoubleObjectMaxHeap<V> implements DoubleObjectHeap<V> {
@Override
public double getKey() {
- return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]);
+ return twoheap[pos];
}
@SuppressWarnings("unchecked")
@Override
public V getValue() {
- return (V)((pos < TWO_HEAP_MAX_SIZE) ? twovals[pos] : fourvals[pos - TWO_HEAP_MAX_SIZE]);
+ return (V)twovals[pos];
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectMinHeap.java
index 905cdedb..01b8e58d 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectMinHeap.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/DoubleObjectMinHeap.java
@@ -24,32 +24,11 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
*/
import java.util.Arrays;
-import java.util.ConcurrentModificationException;
import de.lmu.ifi.dbs.elki.math.MathUtil;
/**
- * Advanced priority queue class, based on a binary heap (for small sizes),
- * which will for larger heaps be accompanied by a 4-ary heap (attached below
- * the root of the two-ary heap, making the root actually 3-ary).
- *
- * This code was automatically instantiated for the types: Double and Object
- *
- * This combination was found to work quite well in benchmarks, but YMMV.
- *
- * Some other observations from benchmarking:
- * <ul>
- * <li>Bulk loading did not improve things</li>
- * <li>Primitive heaps are substantially faster.</li>
- * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and
- * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li>
- * <li>Workload makes a huge difference. A load-once, poll-until-empty priority
- * queue is something different than e.g. a top-k heap, which will see a lot of
- * top element replacements.</li>
- * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for
- * top-k make a difference.</li>
- * <li>Different day, different benchmark results ...</li>
- * </ul>
+ * Binary heap for primitive types.
*
* @author Erich Schubert
*
@@ -68,49 +47,16 @@ public class DoubleObjectMinHeap<V> implements DoubleObjectHeap<V> {
protected Object[] twovals;
/**
- * Extension heap.
- */
- protected double[] fourheap;
-
- /**
- * Extension heapvalues.
- */
- protected Object[] fourvals;
-
- /**
* Current size of heap.
*/
protected int size;
/**
- * (Structural) modification counter. Used to invalidate iterators.
- */
- protected int modCount = 0;
-
- /**
- * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements.
- */
- private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1;
-
- /**
* Initial size of the 2-ary heap.
*/
private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1;
/**
- * Initial size of 4-ary heap when initialized.
- *
- * 21 = 4-ary heap of height 2: 1 + 4 + 4*4
- *
- * 85 = 4-ary heap of height 3: 21 + 4*4*4
- *
- * 341 = 4-ary heap of height 4: 85 + 4*4*4*4
- *
- * Since we last grew by 255 (to 511), let's use 341.
- */
- private final static int FOUR_HEAP_INITIAL_SIZE = 341;
-
- /**
* Constructor, with default size.
*/
public DoubleObjectMinHeap() {
@@ -120,10 +66,6 @@ public class DoubleObjectMinHeap<V> implements DoubleObjectHeap<V> {
this.twoheap = twoheap;
this.twovals = twovals;
- this.fourheap = null;
- this.fourvals = null;
- this.size = 0;
- this.modCount = 0;
}
/**
@@ -133,35 +75,17 @@ public class DoubleObjectMinHeap<V> implements DoubleObjectHeap<V> {
*/
public DoubleObjectMinHeap(int minsize) {
super();
- if (minsize < TWO_HEAP_MAX_SIZE) {
- final int size = MathUtil.nextPow2Int(minsize + 1) - 1;
- double[] twoheap = new double[size];
- Object[] twovals = new Object[size];
+ final int size = MathUtil.nextPow2Int(minsize + 1) - 1;
+ double[] twoheap = new double[size];
+ Object[] twovals = new Object[size];
- this.twoheap = twoheap;
- this.twovals = twovals;
- this.fourheap = null;
- this.fourvals = null;
- } else {
- double[] twoheap = new double[TWO_HEAP_INITIAL_SIZE];
- Object[] twovals = new Object[TWO_HEAP_INITIAL_SIZE];
- double[] fourheap = new double[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)];
- Object[] fourvals = new Object[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)];
- this.twoheap = twoheap;
- this.twovals = twovals;
- this.fourheap = fourheap;
- this.fourvals = fourvals;
- }
- this.size = 0;
- this.modCount = 0;
+ this.twoheap = twoheap;
+ this.twovals = twovals;
}
@Override
public void clear() {
size = 0;
- ++modCount;
- fourheap = null;
- fourvals = null;
Arrays.fill(twoheap, 0.0);
Arrays.fill(twovals, null);
}
@@ -181,34 +105,16 @@ public class DoubleObjectMinHeap<V> implements DoubleObjectHeap<V> {
final double co = o;
final Object cv = (Object)v;
// System.err.println("Add: " + o);
- if (size < TWO_HEAP_MAX_SIZE) {
- if (size >= twoheap.length) {
- // Grow by one layer.
- twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1);
- twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1);
- }
- final int twopos = size;
- twoheap[twopos] = co;
- twovals[twopos] = cv;
- ++size;
- heapifyUp2(twopos, co, cv);
- ++modCount;
- } else {
- final int fourpos = size - TWO_HEAP_MAX_SIZE;
- if (fourheap == null) {
- fourheap = new double[FOUR_HEAP_INITIAL_SIZE];
- fourvals = new Object[FOUR_HEAP_INITIAL_SIZE];
- } else if (fourpos >= fourheap.length) {
- // Grow extension heap by half.
- fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1));
- fourvals = Arrays.copyOf(fourvals, fourvals.length + (fourvals.length >> 1));
- }
- fourheap[fourpos] = co;
- fourvals[fourpos] = cv;
- ++size;
- heapifyUp4(fourpos, co, cv);
- ++modCount;
+ if (size >= twoheap.length) {
+ // Grow by one layer.
+ twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1);
+ twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1);
}
+ final int twopos = size;
+ twoheap[twopos] = co;
+ twovals[twopos] = cv;
+ ++size;
+ heapifyUp(twopos, co, cv);
}
@Override
@@ -223,7 +129,6 @@ public class DoubleObjectMinHeap<V> implements DoubleObjectHeap<V> {
@Override
public void replaceTopElement(double reinsert, V val) {
heapifyDown(reinsert, (Object)val);
- ++modCount;
}
/**
@@ -233,7 +138,7 @@ public class DoubleObjectMinHeap<V> implements DoubleObjectHeap<V> {
* @param cur Current object
* @param val Current value
*/
- private void heapifyUp2(int twopos, double cur, Object val) {
+ private void heapifyUp(int twopos, double cur, Object val) {
while (twopos > 0) {
final int parent = (twopos - 1) >>> 1;
double par = twoheap[parent];
@@ -248,47 +153,11 @@ public class DoubleObjectMinHeap<V> implements DoubleObjectHeap<V> {
twovals[twopos] = val;
}
- /**
- * Heapify-Up method for 4-ary heap.
- *
- * @param fourpos Position in 4-ary heap.
- * @param cur Current object
- * @param val Current value
- */
- private void heapifyUp4(int fourpos, double cur, Object val) {
- while (fourpos > 0) {
- final int parent = (fourpos - 1) >> 2;
- double par = fourheap[parent];
- if (cur >= par) {
- break;
- }
- fourheap[fourpos] = par;
- fourvals[fourpos] = fourvals[parent];
- fourpos = parent;
- }
- if (fourpos == 0 && twoheap[0] > cur) {
- fourheap[0] = twoheap[0];
- fourvals[0] = twovals[0];
- twoheap[0] = cur;
- twovals[0] = val;
- } else {
- fourheap[fourpos] = cur;
- fourvals[fourpos] = val;
- }
- }
-
@Override
public void poll() {
--size;
// Replacement object:
- if (size >= TWO_HEAP_MAX_SIZE) {
- final int last = size - TWO_HEAP_MAX_SIZE;
- final double reinsert = fourheap[last];
- final Object reinsertv = fourvals[last];
- fourheap[last] = 0.0;
- fourvals[last] = null;
- heapifyDown(reinsert, reinsertv);
- } else if (size > 0) {
+ if (size > 0) {
final double reinsert = twoheap[size];
final Object reinsertv = twovals[size];
twoheap[size] = 0.0;
@@ -298,42 +167,17 @@ public class DoubleObjectMinHeap<V> implements DoubleObjectHeap<V> {
twoheap[0] = 0.0;
twovals[0] = null;
}
- ++modCount;
}
/**
* Invoke heapify-down for the root object.
*
- * @param reinsert Object to insert.
- * @param val Value to reinsert.
- */
- private void heapifyDown(double reinsert, Object val) {
- if (size > TWO_HEAP_MAX_SIZE) {
- // Special case: 3-ary situation.
- final int best = (twoheap[1] <= twoheap[2]) ? 1 : 2;
- if (fourheap[0] < twoheap[best]) {
- twoheap[0] = fourheap[0];
- twovals[0] = fourvals[0];
- heapifyDown4(0, reinsert, val);
- } else {
- twoheap[0] = twoheap[best];
- twovals[0] = twovals[best];
- heapifyDown2(best, reinsert, val);
- }
- return;
- }
- heapifyDown2(0, reinsert, val);
- }
-
- /**
- * Heapify-Down for 2-ary heap.
- *
- * @param twopos Position in 2-ary heap.
- * @param cur Current object
+ * @param cur Object to insert.
* @param val Value to reinsert.
*/
- private void heapifyDown2(int twopos, double cur, Object val) {
- final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1;
+ private void heapifyDown(double cur, Object val) {
+ final int stop = size >>> 1;
+ int twopos = 0;
while (twopos < stop) {
int bestchild = (twopos << 1) + 1;
double best = twoheap[bestchild];
@@ -353,54 +197,6 @@ public class DoubleObjectMinHeap<V> implements DoubleObjectHeap<V> {
twovals[twopos] = val;
}
- /**
- * Heapify-Down for 4-ary heap.
- *
- * @param fourpos Position in 4-ary heap.
- * @param cur Current object
- * @param val Value to reinsert.
- */
- private void heapifyDown4(int fourpos, double cur, Object val) {
- final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2;
- while (fourpos < stop) {
- final int child = (fourpos << 2) + 1;
- double best = fourheap[child];
- int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE;
- if (size > minsize) {
- double nextchild = fourheap[candidate];
- if (best > nextchild) {
- bestchild = candidate;
- best = nextchild;
- }
-
- minsize += 2;
- if (size >= minsize) {
- nextchild = fourheap[++candidate];
- if (best > nextchild) {
- bestchild = candidate;
- best = nextchild;
- }
-
- if (size > minsize) {
- nextchild = fourheap[++candidate];
- if (best > nextchild) {
- bestchild = candidate;
- best = nextchild;
- }
- }
- }
- }
- if (cur <= best) {
- break;
- }
- fourheap[fourpos] = best;
- fourvals[fourpos] = fourvals[bestchild];
- fourpos = bestchild;
- }
- fourheap[fourpos] = cur;
- fourvals[fourpos] = val;
- }
-
@Override
public double peekKey() {
return twoheap[0];
@@ -449,16 +245,8 @@ public class DoubleObjectMinHeap<V> implements DoubleObjectHeap<V> {
*/
protected int pos = 0;
- /**
- * Modification counter we were initialized at.
- */
- protected final int myModCount = modCount;
-
@Override
public boolean valid() {
- if (modCount != myModCount) {
- throw new ConcurrentModificationException();
- }
return pos < size;
}
@@ -469,14 +257,14 @@ public class DoubleObjectMinHeap<V> implements DoubleObjectHeap<V> {
@Override
public double getKey() {
- return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]);
+ return twoheap[pos];
}
@SuppressWarnings("unchecked")
@Override
public V getValue() {
- return (V)((pos < TWO_HEAP_MAX_SIZE) ? twovals[pos] : fourvals[pos - TWO_HEAP_MAX_SIZE]);
+ return (V)twovals[pos];
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerHeap.java
index 3235926b..77e5f3e5 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerHeap.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerHeap.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMaxHeap.java
index 60f61d99..4f3b1495 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMaxHeap.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMaxHeap.java
@@ -24,32 +24,11 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
*/
import java.util.Arrays;
-import java.util.ConcurrentModificationException;
import de.lmu.ifi.dbs.elki.math.MathUtil;
/**
- * Advanced priority queue class, based on a binary heap (for small sizes),
- * which will for larger heaps be accompanied by a 4-ary heap (attached below
- * the root of the two-ary heap, making the root actually 3-ary).
- *
- * This code was automatically instantiated for the type: Integer
- *
- * This combination was found to work quite well in benchmarks, but YMMV.
- *
- * Some other observations from benchmarking:
- * <ul>
- * <li>Bulk loading did not improve things</li>
- * <li>Primitive heaps are substantially faster.</li>
- * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and
- * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li>
- * <li>Workload makes a huge difference. A load-once, poll-until-empty priority
- * queue is something different than e.g. a top-k heap, which will see a lot of
- * top element replacements.</li>
- * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for
- * top-k make a difference.</li>
- * <li>Different day, different benchmark results ...</li>
- * </ul>
+ * Binary heap for primitive types.
*
* @author Erich Schubert
*
@@ -62,44 +41,16 @@ public class IntegerMaxHeap implements IntegerHeap {
protected int[] twoheap;
/**
- * Extension heap.
- */
- protected int[] fourheap;
-
- /**
* Current size of heap.
*/
protected int size;
/**
- * (Structural) modification counter. Used to invalidate iterators.
- */
- protected int modCount = 0;
-
- /**
- * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements.
- */
- private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1;
-
- /**
* Initial size of the 2-ary heap.
*/
private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1;
/**
- * Initial size of 4-ary heap when initialized.
- *
- * 21 = 4-ary heap of height 2: 1 + 4 + 4*4
- *
- * 85 = 4-ary heap of height 3: 21 + 4*4*4
- *
- * 341 = 4-ary heap of height 4: 85 + 4*4*4*4
- *
- * Since we last grew by 255 (to 511), let's use 341.
- */
- private final static int FOUR_HEAP_INITIAL_SIZE = 341;
-
- /**
* Constructor, with default size.
*/
public IntegerMaxHeap() {
@@ -107,9 +58,6 @@ public class IntegerMaxHeap implements IntegerHeap {
int[] twoheap = new int[TWO_HEAP_INITIAL_SIZE];
this.twoheap = twoheap;
- this.fourheap = null;
- this.size = 0;
- this.modCount = 0;
}
/**
@@ -119,27 +67,15 @@ public class IntegerMaxHeap implements IntegerHeap {
*/
public IntegerMaxHeap(int minsize) {
super();
- if (minsize < TWO_HEAP_MAX_SIZE) {
- final int size = MathUtil.nextPow2Int(minsize + 1) - 1;
- int[] twoheap = new int[size];
+ final int size = MathUtil.nextPow2Int(minsize + 1) - 1;
+ int[] twoheap = new int[size];
- this.twoheap = twoheap;
- this.fourheap = null;
- } else {
- int[] twoheap = new int[TWO_HEAP_INITIAL_SIZE];
- int[] fourheap = new int[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)];
- this.twoheap = twoheap;
- this.fourheap = fourheap;
- }
- this.size = 0;
- this.modCount = 0;
+ this.twoheap = twoheap;
}
@Override
public void clear() {
size = 0;
- ++modCount;
- fourheap = null;
Arrays.fill(twoheap, 0);
}
@@ -157,29 +93,14 @@ public class IntegerMaxHeap implements IntegerHeap {
public void add(int o) {
final int co = o;
// System.err.println("Add: " + o);
- if (size < TWO_HEAP_MAX_SIZE) {
- if (size >= twoheap.length) {
- // Grow by one layer.
- twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1);
- }
- final int twopos = size;
- twoheap[twopos] = co;
- ++size;
- heapifyUp2(twopos, co);
- ++modCount;
- } else {
- final int fourpos = size - TWO_HEAP_MAX_SIZE;
- if (fourheap == null) {
- fourheap = new int[FOUR_HEAP_INITIAL_SIZE];
- } else if (fourpos >= fourheap.length) {
- // Grow extension heap by half.
- fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1));
- }
- fourheap[fourpos] = co;
- ++size;
- heapifyUp4(fourpos, co);
- ++modCount;
+ if (size >= twoheap.length) {
+ // Grow by one layer.
+ twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1);
}
+ final int twopos = size;
+ twoheap[twopos] = co;
+ ++size;
+ heapifyUp(twopos, co);
}
@Override
@@ -195,7 +116,6 @@ public class IntegerMaxHeap implements IntegerHeap {
public int replaceTopElement(int reinsert) {
final int ret = twoheap[0];
heapifyDown( reinsert);
- ++modCount;
return ret;
}
@@ -205,7 +125,7 @@ public class IntegerMaxHeap implements IntegerHeap {
* @param twopos Position in 2-ary heap.
* @param cur Current object
*/
- private void heapifyUp2(int twopos, int cur) {
+ private void heapifyUp(int twopos, int cur) {
while (twopos > 0) {
final int parent = (twopos - 1) >>> 1;
int par = twoheap[parent];
@@ -218,80 +138,29 @@ public class IntegerMaxHeap implements IntegerHeap {
twoheap[twopos] = cur;
}
- /**
- * Heapify-Up method for 4-ary heap.
- *
- * @param fourpos Position in 4-ary heap.
- * @param cur Current object
- */
- private void heapifyUp4(int fourpos, int cur) {
- while (fourpos > 0) {
- final int parent = (fourpos - 1) >> 2;
- int par = fourheap[parent];
- if (cur <= par) {
- break;
- }
- fourheap[fourpos] = par;
- fourpos = parent;
- }
- if (fourpos == 0 && twoheap[0] < cur) {
- fourheap[0] = twoheap[0];
- twoheap[0] = cur;
- } else {
- fourheap[fourpos] = cur;
- }
- }
-
@Override
public int poll() {
final int ret = twoheap[0];
--size;
// Replacement object:
- if (size >= TWO_HEAP_MAX_SIZE) {
- final int last = size - TWO_HEAP_MAX_SIZE;
- final int reinsert = fourheap[last];
- fourheap[last] = 0;
- heapifyDown(reinsert);
- } else if (size > 0) {
+ if (size > 0) {
final int reinsert = twoheap[size];
twoheap[size] = 0;
heapifyDown(reinsert);
} else {
twoheap[0] = 0;
}
- ++modCount;
return ret;
}
/**
* Invoke heapify-down for the root object.
*
- * @param reinsert Object to insert.
- */
- private void heapifyDown(int reinsert) {
- if (size > TWO_HEAP_MAX_SIZE) {
- // Special case: 3-ary situation.
- final int best = (twoheap[1] >= twoheap[2]) ? 1 : 2;
- if (fourheap[0] > twoheap[best]) {
- twoheap[0] = fourheap[0];
- heapifyDown4(0, reinsert);
- } else {
- twoheap[0] = twoheap[best];
- heapifyDown2(best, reinsert);
- }
- return;
- }
- heapifyDown2(0, reinsert);
- }
-
- /**
- * Heapify-Down for 2-ary heap.
- *
- * @param twopos Position in 2-ary heap.
- * @param cur Current object
+ * @param cur Object to insert.
*/
- private void heapifyDown2(int twopos, int cur) {
- final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1;
+ private void heapifyDown(int cur) {
+ final int stop = size >>> 1;
+ int twopos = 0;
while (twopos < stop) {
int bestchild = (twopos << 1) + 1;
int best = twoheap[bestchild];
@@ -309,51 +178,6 @@ public class IntegerMaxHeap implements IntegerHeap {
twoheap[twopos] = cur;
}
- /**
- * Heapify-Down for 4-ary heap.
- *
- * @param fourpos Position in 4-ary heap.
- * @param cur Current object
- */
- private void heapifyDown4(int fourpos, int cur) {
- final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2;
- while (fourpos < stop) {
- final int child = (fourpos << 2) + 1;
- int best = fourheap[child];
- int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE;
- if (size > minsize) {
- int nextchild = fourheap[candidate];
- if (best < nextchild) {
- bestchild = candidate;
- best = nextchild;
- }
-
- minsize += 2;
- if (size >= minsize) {
- nextchild = fourheap[++candidate];
- if (best < nextchild) {
- bestchild = candidate;
- best = nextchild;
- }
-
- if (size > minsize) {
- nextchild = fourheap[++candidate];
- if (best < nextchild) {
- bestchild = candidate;
- best = nextchild;
- }
- }
- }
- }
- if (cur >= best) {
- break;
- }
- fourheap[fourpos] = best;
- fourpos = bestchild;
- }
- fourheap[fourpos] = cur;
- }
-
@Override
public int peek() {
return twoheap[0];
@@ -396,16 +220,8 @@ public class IntegerMaxHeap implements IntegerHeap {
*/
protected int pos = 0;
- /**
- * Modification counter we were initialized at.
- */
- protected final int myModCount = modCount;
-
@Override
public boolean valid() {
- if (modCount != myModCount) {
- throw new ConcurrentModificationException();
- }
return pos < size;
}
@@ -416,7 +232,7 @@ public class IntegerMaxHeap implements IntegerHeap {
@Override
public int get() {
- return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]);
+ return twoheap[pos];
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMinHeap.java
index c352ece4..b02e04db 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMinHeap.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerMinHeap.java
@@ -24,32 +24,11 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
*/
import java.util.Arrays;
-import java.util.ConcurrentModificationException;
import de.lmu.ifi.dbs.elki.math.MathUtil;
/**
- * Advanced priority queue class, based on a binary heap (for small sizes),
- * which will for larger heaps be accompanied by a 4-ary heap (attached below
- * the root of the two-ary heap, making the root actually 3-ary).
- *
- * This code was automatically instantiated for the type: Integer
- *
- * This combination was found to work quite well in benchmarks, but YMMV.
- *
- * Some other observations from benchmarking:
- * <ul>
- * <li>Bulk loading did not improve things</li>
- * <li>Primitive heaps are substantially faster.</li>
- * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and
- * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li>
- * <li>Workload makes a huge difference. A load-once, poll-until-empty priority
- * queue is something different than e.g. a top-k heap, which will see a lot of
- * top element replacements.</li>
- * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for
- * top-k make a difference.</li>
- * <li>Different day, different benchmark results ...</li>
- * </ul>
+ * Binary heap for primitive types.
*
* @author Erich Schubert
*
@@ -62,44 +41,16 @@ public class IntegerMinHeap implements IntegerHeap {
protected int[] twoheap;
/**
- * Extension heap.
- */
- protected int[] fourheap;
-
- /**
* Current size of heap.
*/
protected int size;
/**
- * (Structural) modification counter. Used to invalidate iterators.
- */
- protected int modCount = 0;
-
- /**
- * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements.
- */
- private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1;
-
- /**
* Initial size of the 2-ary heap.
*/
private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1;
/**
- * Initial size of 4-ary heap when initialized.
- *
- * 21 = 4-ary heap of height 2: 1 + 4 + 4*4
- *
- * 85 = 4-ary heap of height 3: 21 + 4*4*4
- *
- * 341 = 4-ary heap of height 4: 85 + 4*4*4*4
- *
- * Since we last grew by 255 (to 511), let's use 341.
- */
- private final static int FOUR_HEAP_INITIAL_SIZE = 341;
-
- /**
* Constructor, with default size.
*/
public IntegerMinHeap() {
@@ -107,9 +58,6 @@ public class IntegerMinHeap implements IntegerHeap {
int[] twoheap = new int[TWO_HEAP_INITIAL_SIZE];
this.twoheap = twoheap;
- this.fourheap = null;
- this.size = 0;
- this.modCount = 0;
}
/**
@@ -119,27 +67,15 @@ public class IntegerMinHeap implements IntegerHeap {
*/
public IntegerMinHeap(int minsize) {
super();
- if (minsize < TWO_HEAP_MAX_SIZE) {
- final int size = MathUtil.nextPow2Int(minsize + 1) - 1;
- int[] twoheap = new int[size];
+ final int size = MathUtil.nextPow2Int(minsize + 1) - 1;
+ int[] twoheap = new int[size];
- this.twoheap = twoheap;
- this.fourheap = null;
- } else {
- int[] twoheap = new int[TWO_HEAP_INITIAL_SIZE];
- int[] fourheap = new int[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)];
- this.twoheap = twoheap;
- this.fourheap = fourheap;
- }
- this.size = 0;
- this.modCount = 0;
+ this.twoheap = twoheap;
}
@Override
public void clear() {
size = 0;
- ++modCount;
- fourheap = null;
Arrays.fill(twoheap, 0);
}
@@ -157,29 +93,14 @@ public class IntegerMinHeap implements IntegerHeap {
public void add(int o) {
final int co = o;
// System.err.println("Add: " + o);
- if (size < TWO_HEAP_MAX_SIZE) {
- if (size >= twoheap.length) {
- // Grow by one layer.
- twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1);
- }
- final int twopos = size;
- twoheap[twopos] = co;
- ++size;
- heapifyUp2(twopos, co);
- ++modCount;
- } else {
- final int fourpos = size - TWO_HEAP_MAX_SIZE;
- if (fourheap == null) {
- fourheap = new int[FOUR_HEAP_INITIAL_SIZE];
- } else if (fourpos >= fourheap.length) {
- // Grow extension heap by half.
- fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1));
- }
- fourheap[fourpos] = co;
- ++size;
- heapifyUp4(fourpos, co);
- ++modCount;
+ if (size >= twoheap.length) {
+ // Grow by one layer.
+ twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1);
}
+ final int twopos = size;
+ twoheap[twopos] = co;
+ ++size;
+ heapifyUp(twopos, co);
}
@Override
@@ -195,7 +116,6 @@ public class IntegerMinHeap implements IntegerHeap {
public int replaceTopElement(int reinsert) {
final int ret = twoheap[0];
heapifyDown( reinsert);
- ++modCount;
return ret;
}
@@ -205,7 +125,7 @@ public class IntegerMinHeap implements IntegerHeap {
* @param twopos Position in 2-ary heap.
* @param cur Current object
*/
- private void heapifyUp2(int twopos, int cur) {
+ private void heapifyUp(int twopos, int cur) {
while (twopos > 0) {
final int parent = (twopos - 1) >>> 1;
int par = twoheap[parent];
@@ -218,80 +138,29 @@ public class IntegerMinHeap implements IntegerHeap {
twoheap[twopos] = cur;
}
- /**
- * Heapify-Up method for 4-ary heap.
- *
- * @param fourpos Position in 4-ary heap.
- * @param cur Current object
- */
- private void heapifyUp4(int fourpos, int cur) {
- while (fourpos > 0) {
- final int parent = (fourpos - 1) >> 2;
- int par = fourheap[parent];
- if (cur >= par) {
- break;
- }
- fourheap[fourpos] = par;
- fourpos = parent;
- }
- if (fourpos == 0 && twoheap[0] > cur) {
- fourheap[0] = twoheap[0];
- twoheap[0] = cur;
- } else {
- fourheap[fourpos] = cur;
- }
- }
-
@Override
public int poll() {
final int ret = twoheap[0];
--size;
// Replacement object:
- if (size >= TWO_HEAP_MAX_SIZE) {
- final int last = size - TWO_HEAP_MAX_SIZE;
- final int reinsert = fourheap[last];
- fourheap[last] = 0;
- heapifyDown(reinsert);
- } else if (size > 0) {
+ if (size > 0) {
final int reinsert = twoheap[size];
twoheap[size] = 0;
heapifyDown(reinsert);
} else {
twoheap[0] = 0;
}
- ++modCount;
return ret;
}
/**
* Invoke heapify-down for the root object.
*
- * @param reinsert Object to insert.
- */
- private void heapifyDown(int reinsert) {
- if (size > TWO_HEAP_MAX_SIZE) {
- // Special case: 3-ary situation.
- final int best = (twoheap[1] <= twoheap[2]) ? 1 : 2;
- if (fourheap[0] < twoheap[best]) {
- twoheap[0] = fourheap[0];
- heapifyDown4(0, reinsert);
- } else {
- twoheap[0] = twoheap[best];
- heapifyDown2(best, reinsert);
- }
- return;
- }
- heapifyDown2(0, reinsert);
- }
-
- /**
- * Heapify-Down for 2-ary heap.
- *
- * @param twopos Position in 2-ary heap.
- * @param cur Current object
+ * @param cur Object to insert.
*/
- private void heapifyDown2(int twopos, int cur) {
- final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1;
+ private void heapifyDown(int cur) {
+ final int stop = size >>> 1;
+ int twopos = 0;
while (twopos < stop) {
int bestchild = (twopos << 1) + 1;
int best = twoheap[bestchild];
@@ -309,51 +178,6 @@ public class IntegerMinHeap implements IntegerHeap {
twoheap[twopos] = cur;
}
- /**
- * Heapify-Down for 4-ary heap.
- *
- * @param fourpos Position in 4-ary heap.
- * @param cur Current object
- */
- private void heapifyDown4(int fourpos, int cur) {
- final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2;
- while (fourpos < stop) {
- final int child = (fourpos << 2) + 1;
- int best = fourheap[child];
- int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE;
- if (size > minsize) {
- int nextchild = fourheap[candidate];
- if (best > nextchild) {
- bestchild = candidate;
- best = nextchild;
- }
-
- minsize += 2;
- if (size >= minsize) {
- nextchild = fourheap[++candidate];
- if (best > nextchild) {
- bestchild = candidate;
- best = nextchild;
- }
-
- if (size > minsize) {
- nextchild = fourheap[++candidate];
- if (best > nextchild) {
- bestchild = candidate;
- best = nextchild;
- }
- }
- }
- }
- if (cur <= best) {
- break;
- }
- fourheap[fourpos] = best;
- fourpos = bestchild;
- }
- fourheap[fourpos] = cur;
- }
-
@Override
public int peek() {
return twoheap[0];
@@ -396,16 +220,8 @@ public class IntegerMinHeap implements IntegerHeap {
*/
protected int pos = 0;
- /**
- * Modification counter we were initialized at.
- */
- protected final int myModCount = modCount;
-
@Override
public boolean valid() {
- if (modCount != myModCount) {
- throw new ConcurrentModificationException();
- }
return pos < size;
}
@@ -416,7 +232,7 @@ public class IntegerMinHeap implements IntegerHeap {
@Override
public int get() {
- return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]);
+ return twoheap[pos];
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectHeap.java
index 01f7aea0..e4f577af 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectHeap.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectHeap.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectMaxHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectMaxHeap.java
index 93a4e75a..036a9520 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectMaxHeap.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectMaxHeap.java
@@ -24,32 +24,11 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
*/
import java.util.Arrays;
-import java.util.ConcurrentModificationException;
import de.lmu.ifi.dbs.elki.math.MathUtil;
/**
- * Advanced priority queue class, based on a binary heap (for small sizes),
- * which will for larger heaps be accompanied by a 4-ary heap (attached below
- * the root of the two-ary heap, making the root actually 3-ary).
- *
- * This code was automatically instantiated for the types: Integer and Object
- *
- * This combination was found to work quite well in benchmarks, but YMMV.
- *
- * Some other observations from benchmarking:
- * <ul>
- * <li>Bulk loading did not improve things</li>
- * <li>Primitive heaps are substantially faster.</li>
- * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and
- * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li>
- * <li>Workload makes a huge difference. A load-once, poll-until-empty priority
- * queue is something different than e.g. a top-k heap, which will see a lot of
- * top element replacements.</li>
- * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for
- * top-k make a difference.</li>
- * <li>Different day, different benchmark results ...</li>
- * </ul>
+ * Binary heap for primitive types.
*
* @author Erich Schubert
*
@@ -68,49 +47,16 @@ public class IntegerObjectMaxHeap<V> implements IntegerObjectHeap<V> {
protected Object[] twovals;
/**
- * Extension heap.
- */
- protected int[] fourheap;
-
- /**
- * Extension heapvalues.
- */
- protected Object[] fourvals;
-
- /**
* Current size of heap.
*/
protected int size;
/**
- * (Structural) modification counter. Used to invalidate iterators.
- */
- protected int modCount = 0;
-
- /**
- * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements.
- */
- private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1;
-
- /**
* Initial size of the 2-ary heap.
*/
private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1;
/**
- * Initial size of 4-ary heap when initialized.
- *
- * 21 = 4-ary heap of height 2: 1 + 4 + 4*4
- *
- * 85 = 4-ary heap of height 3: 21 + 4*4*4
- *
- * 341 = 4-ary heap of height 4: 85 + 4*4*4*4
- *
- * Since we last grew by 255 (to 511), let's use 341.
- */
- private final static int FOUR_HEAP_INITIAL_SIZE = 341;
-
- /**
* Constructor, with default size.
*/
public IntegerObjectMaxHeap() {
@@ -120,10 +66,6 @@ public class IntegerObjectMaxHeap<V> implements IntegerObjectHeap<V> {
this.twoheap = twoheap;
this.twovals = twovals;
- this.fourheap = null;
- this.fourvals = null;
- this.size = 0;
- this.modCount = 0;
}
/**
@@ -133,35 +75,17 @@ public class IntegerObjectMaxHeap<V> implements IntegerObjectHeap<V> {
*/
public IntegerObjectMaxHeap(int minsize) {
super();
- if (minsize < TWO_HEAP_MAX_SIZE) {
- final int size = MathUtil.nextPow2Int(minsize + 1) - 1;
- int[] twoheap = new int[size];
- Object[] twovals = new Object[size];
+ final int size = MathUtil.nextPow2Int(minsize + 1) - 1;
+ int[] twoheap = new int[size];
+ Object[] twovals = new Object[size];
- this.twoheap = twoheap;
- this.twovals = twovals;
- this.fourheap = null;
- this.fourvals = null;
- } else {
- int[] twoheap = new int[TWO_HEAP_INITIAL_SIZE];
- Object[] twovals = new Object[TWO_HEAP_INITIAL_SIZE];
- int[] fourheap = new int[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)];
- Object[] fourvals = new Object[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)];
- this.twoheap = twoheap;
- this.twovals = twovals;
- this.fourheap = fourheap;
- this.fourvals = fourvals;
- }
- this.size = 0;
- this.modCount = 0;
+ this.twoheap = twoheap;
+ this.twovals = twovals;
}
@Override
public void clear() {
size = 0;
- ++modCount;
- fourheap = null;
- fourvals = null;
Arrays.fill(twoheap, 0);
Arrays.fill(twovals, null);
}
@@ -181,34 +105,16 @@ public class IntegerObjectMaxHeap<V> implements IntegerObjectHeap<V> {
final int co = o;
final Object cv = (Object)v;
// System.err.println("Add: " + o);
- if (size < TWO_HEAP_MAX_SIZE) {
- if (size >= twoheap.length) {
- // Grow by one layer.
- twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1);
- twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1);
- }
- final int twopos = size;
- twoheap[twopos] = co;
- twovals[twopos] = cv;
- ++size;
- heapifyUp2(twopos, co, cv);
- ++modCount;
- } else {
- final int fourpos = size - TWO_HEAP_MAX_SIZE;
- if (fourheap == null) {
- fourheap = new int[FOUR_HEAP_INITIAL_SIZE];
- fourvals = new Object[FOUR_HEAP_INITIAL_SIZE];
- } else if (fourpos >= fourheap.length) {
- // Grow extension heap by half.
- fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1));
- fourvals = Arrays.copyOf(fourvals, fourvals.length + (fourvals.length >> 1));
- }
- fourheap[fourpos] = co;
- fourvals[fourpos] = cv;
- ++size;
- heapifyUp4(fourpos, co, cv);
- ++modCount;
+ if (size >= twoheap.length) {
+ // Grow by one layer.
+ twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1);
+ twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1);
}
+ final int twopos = size;
+ twoheap[twopos] = co;
+ twovals[twopos] = cv;
+ ++size;
+ heapifyUp(twopos, co, cv);
}
@Override
@@ -223,7 +129,6 @@ public class IntegerObjectMaxHeap<V> implements IntegerObjectHeap<V> {
@Override
public void replaceTopElement(int reinsert, V val) {
heapifyDown(reinsert, (Object)val);
- ++modCount;
}
/**
@@ -233,7 +138,7 @@ public class IntegerObjectMaxHeap<V> implements IntegerObjectHeap<V> {
* @param cur Current object
* @param val Current value
*/
- private void heapifyUp2(int twopos, int cur, Object val) {
+ private void heapifyUp(int twopos, int cur, Object val) {
while (twopos > 0) {
final int parent = (twopos - 1) >>> 1;
int par = twoheap[parent];
@@ -248,47 +153,11 @@ public class IntegerObjectMaxHeap<V> implements IntegerObjectHeap<V> {
twovals[twopos] = val;
}
- /**
- * Heapify-Up method for 4-ary heap.
- *
- * @param fourpos Position in 4-ary heap.
- * @param cur Current object
- * @param val Current value
- */
- private void heapifyUp4(int fourpos, int cur, Object val) {
- while (fourpos > 0) {
- final int parent = (fourpos - 1) >> 2;
- int par = fourheap[parent];
- if (cur <= par) {
- break;
- }
- fourheap[fourpos] = par;
- fourvals[fourpos] = fourvals[parent];
- fourpos = parent;
- }
- if (fourpos == 0 && twoheap[0] < cur) {
- fourheap[0] = twoheap[0];
- fourvals[0] = twovals[0];
- twoheap[0] = cur;
- twovals[0] = val;
- } else {
- fourheap[fourpos] = cur;
- fourvals[fourpos] = val;
- }
- }
-
@Override
public void poll() {
--size;
// Replacement object:
- if (size >= TWO_HEAP_MAX_SIZE) {
- final int last = size - TWO_HEAP_MAX_SIZE;
- final int reinsert = fourheap[last];
- final Object reinsertv = fourvals[last];
- fourheap[last] = 0;
- fourvals[last] = null;
- heapifyDown(reinsert, reinsertv);
- } else if (size > 0) {
+ if (size > 0) {
final int reinsert = twoheap[size];
final Object reinsertv = twovals[size];
twoheap[size] = 0;
@@ -298,42 +167,17 @@ public class IntegerObjectMaxHeap<V> implements IntegerObjectHeap<V> {
twoheap[0] = 0;
twovals[0] = null;
}
- ++modCount;
}
/**
* Invoke heapify-down for the root object.
*
- * @param reinsert Object to insert.
- * @param val Value to reinsert.
- */
- private void heapifyDown(int reinsert, Object val) {
- if (size > TWO_HEAP_MAX_SIZE) {
- // Special case: 3-ary situation.
- final int best = (twoheap[1] >= twoheap[2]) ? 1 : 2;
- if (fourheap[0] > twoheap[best]) {
- twoheap[0] = fourheap[0];
- twovals[0] = fourvals[0];
- heapifyDown4(0, reinsert, val);
- } else {
- twoheap[0] = twoheap[best];
- twovals[0] = twovals[best];
- heapifyDown2(best, reinsert, val);
- }
- return;
- }
- heapifyDown2(0, reinsert, val);
- }
-
- /**
- * Heapify-Down for 2-ary heap.
- *
- * @param twopos Position in 2-ary heap.
- * @param cur Current object
+ * @param cur Object to insert.
* @param val Value to reinsert.
*/
- private void heapifyDown2(int twopos, int cur, Object val) {
- final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1;
+ private void heapifyDown(int cur, Object val) {
+ final int stop = size >>> 1;
+ int twopos = 0;
while (twopos < stop) {
int bestchild = (twopos << 1) + 1;
int best = twoheap[bestchild];
@@ -353,54 +197,6 @@ public class IntegerObjectMaxHeap<V> implements IntegerObjectHeap<V> {
twovals[twopos] = val;
}
- /**
- * Heapify-Down for 4-ary heap.
- *
- * @param fourpos Position in 4-ary heap.
- * @param cur Current object
- * @param val Value to reinsert.
- */
- private void heapifyDown4(int fourpos, int cur, Object val) {
- final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2;
- while (fourpos < stop) {
- final int child = (fourpos << 2) + 1;
- int best = fourheap[child];
- int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE;
- if (size > minsize) {
- int nextchild = fourheap[candidate];
- if (best < nextchild) {
- bestchild = candidate;
- best = nextchild;
- }
-
- minsize += 2;
- if (size >= minsize) {
- nextchild = fourheap[++candidate];
- if (best < nextchild) {
- bestchild = candidate;
- best = nextchild;
- }
-
- if (size > minsize) {
- nextchild = fourheap[++candidate];
- if (best < nextchild) {
- bestchild = candidate;
- best = nextchild;
- }
- }
- }
- }
- if (cur >= best) {
- break;
- }
- fourheap[fourpos] = best;
- fourvals[fourpos] = fourvals[bestchild];
- fourpos = bestchild;
- }
- fourheap[fourpos] = cur;
- fourvals[fourpos] = val;
- }
-
@Override
public int peekKey() {
return twoheap[0];
@@ -449,16 +245,8 @@ public class IntegerObjectMaxHeap<V> implements IntegerObjectHeap<V> {
*/
protected int pos = 0;
- /**
- * Modification counter we were initialized at.
- */
- protected final int myModCount = modCount;
-
@Override
public boolean valid() {
- if (modCount != myModCount) {
- throw new ConcurrentModificationException();
- }
return pos < size;
}
@@ -469,14 +257,14 @@ public class IntegerObjectMaxHeap<V> implements IntegerObjectHeap<V> {
@Override
public int getKey() {
- return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]);
+ return twoheap[pos];
}
@SuppressWarnings("unchecked")
@Override
public V getValue() {
- return (V)((pos < TWO_HEAP_MAX_SIZE) ? twovals[pos] : fourvals[pos - TWO_HEAP_MAX_SIZE]);
+ return (V)twovals[pos];
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectMinHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectMinHeap.java
index e54c7d28..cc816a0e 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectMinHeap.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/IntegerObjectMinHeap.java
@@ -24,32 +24,11 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
*/
import java.util.Arrays;
-import java.util.ConcurrentModificationException;
import de.lmu.ifi.dbs.elki.math.MathUtil;
/**
- * Advanced priority queue class, based on a binary heap (for small sizes),
- * which will for larger heaps be accompanied by a 4-ary heap (attached below
- * the root of the two-ary heap, making the root actually 3-ary).
- *
- * This code was automatically instantiated for the types: Integer and Object
- *
- * This combination was found to work quite well in benchmarks, but YMMV.
- *
- * Some other observations from benchmarking:
- * <ul>
- * <li>Bulk loading did not improve things</li>
- * <li>Primitive heaps are substantially faster.</li>
- * <li>Since an array in Java has an overhead of 12 bytes, odd-sized object and
- * integer arrays are actually well aligned both for 2-ary and 4-ary heaps.</li>
- * <li>Workload makes a huge difference. A load-once, poll-until-empty priority
- * queue is something different than e.g. a top-k heap, which will see a lot of
- * top element replacements.</li>
- * <li>Random vs. increasing vs. decreasing vs. sawtooth insertion patterns for
- * top-k make a difference.</li>
- * <li>Different day, different benchmark results ...</li>
- * </ul>
+ * Binary heap for primitive types.
*
* @author Erich Schubert
*
@@ -68,49 +47,16 @@ public class IntegerObjectMinHeap<V> implements IntegerObjectHeap<V> {
protected Object[] twovals;
/**
- * Extension heap.
- */
- protected int[] fourheap;
-
- /**
- * Extension heapvalues.
- */
- protected Object[] fourvals;
-
- /**
* Current size of heap.
*/
protected int size;
/**
- * (Structural) modification counter. Used to invalidate iterators.
- */
- protected int modCount = 0;
-
- /**
- * Maximum size of the 2-ary heap. A complete 2-ary heap has (2^k-1) elements.
- */
- private final static int TWO_HEAP_MAX_SIZE = (1 << 9) - 1;
-
- /**
* Initial size of the 2-ary heap.
*/
private final static int TWO_HEAP_INITIAL_SIZE = (1 << 5) - 1;
/**
- * Initial size of 4-ary heap when initialized.
- *
- * 21 = 4-ary heap of height 2: 1 + 4 + 4*4
- *
- * 85 = 4-ary heap of height 3: 21 + 4*4*4
- *
- * 341 = 4-ary heap of height 4: 85 + 4*4*4*4
- *
- * Since we last grew by 255 (to 511), let's use 341.
- */
- private final static int FOUR_HEAP_INITIAL_SIZE = 341;
-
- /**
* Constructor, with default size.
*/
public IntegerObjectMinHeap() {
@@ -120,10 +66,6 @@ public class IntegerObjectMinHeap<V> implements IntegerObjectHeap<V> {
this.twoheap = twoheap;
this.twovals = twovals;
- this.fourheap = null;
- this.fourvals = null;
- this.size = 0;
- this.modCount = 0;
}
/**
@@ -133,35 +75,17 @@ public class IntegerObjectMinHeap<V> implements IntegerObjectHeap<V> {
*/
public IntegerObjectMinHeap(int minsize) {
super();
- if (minsize < TWO_HEAP_MAX_SIZE) {
- final int size = MathUtil.nextPow2Int(minsize + 1) - 1;
- int[] twoheap = new int[size];
- Object[] twovals = new Object[size];
+ final int size = MathUtil.nextPow2Int(minsize + 1) - 1;
+ int[] twoheap = new int[size];
+ Object[] twovals = new Object[size];
- this.twoheap = twoheap;
- this.twovals = twovals;
- this.fourheap = null;
- this.fourvals = null;
- } else {
- int[] twoheap = new int[TWO_HEAP_INITIAL_SIZE];
- Object[] twovals = new Object[TWO_HEAP_INITIAL_SIZE];
- int[] fourheap = new int[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)];
- Object[] fourvals = new Object[Math.max(21, minsize - TWO_HEAP_MAX_SIZE)];
- this.twoheap = twoheap;
- this.twovals = twovals;
- this.fourheap = fourheap;
- this.fourvals = fourvals;
- }
- this.size = 0;
- this.modCount = 0;
+ this.twoheap = twoheap;
+ this.twovals = twovals;
}
@Override
public void clear() {
size = 0;
- ++modCount;
- fourheap = null;
- fourvals = null;
Arrays.fill(twoheap, 0);
Arrays.fill(twovals, null);
}
@@ -181,34 +105,16 @@ public class IntegerObjectMinHeap<V> implements IntegerObjectHeap<V> {
final int co = o;
final Object cv = (Object)v;
// System.err.println("Add: " + o);
- if (size < TWO_HEAP_MAX_SIZE) {
- if (size >= twoheap.length) {
- // Grow by one layer.
- twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1);
- twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1);
- }
- final int twopos = size;
- twoheap[twopos] = co;
- twovals[twopos] = cv;
- ++size;
- heapifyUp2(twopos, co, cv);
- ++modCount;
- } else {
- final int fourpos = size - TWO_HEAP_MAX_SIZE;
- if (fourheap == null) {
- fourheap = new int[FOUR_HEAP_INITIAL_SIZE];
- fourvals = new Object[FOUR_HEAP_INITIAL_SIZE];
- } else if (fourpos >= fourheap.length) {
- // Grow extension heap by half.
- fourheap = Arrays.copyOf(fourheap, fourheap.length + (fourheap.length >> 1));
- fourvals = Arrays.copyOf(fourvals, fourvals.length + (fourvals.length >> 1));
- }
- fourheap[fourpos] = co;
- fourvals[fourpos] = cv;
- ++size;
- heapifyUp4(fourpos, co, cv);
- ++modCount;
+ if (size >= twoheap.length) {
+ // Grow by one layer.
+ twoheap = Arrays.copyOf(twoheap, twoheap.length + twoheap.length + 1);
+ twovals = Arrays.copyOf(twovals, twovals.length + twovals.length + 1);
}
+ final int twopos = size;
+ twoheap[twopos] = co;
+ twovals[twopos] = cv;
+ ++size;
+ heapifyUp(twopos, co, cv);
}
@Override
@@ -223,7 +129,6 @@ public class IntegerObjectMinHeap<V> implements IntegerObjectHeap<V> {
@Override
public void replaceTopElement(int reinsert, V val) {
heapifyDown(reinsert, (Object)val);
- ++modCount;
}
/**
@@ -233,7 +138,7 @@ public class IntegerObjectMinHeap<V> implements IntegerObjectHeap<V> {
* @param cur Current object
* @param val Current value
*/
- private void heapifyUp2(int twopos, int cur, Object val) {
+ private void heapifyUp(int twopos, int cur, Object val) {
while (twopos > 0) {
final int parent = (twopos - 1) >>> 1;
int par = twoheap[parent];
@@ -248,47 +153,11 @@ public class IntegerObjectMinHeap<V> implements IntegerObjectHeap<V> {
twovals[twopos] = val;
}
- /**
- * Heapify-Up method for 4-ary heap.
- *
- * @param fourpos Position in 4-ary heap.
- * @param cur Current object
- * @param val Current value
- */
- private void heapifyUp4(int fourpos, int cur, Object val) {
- while (fourpos > 0) {
- final int parent = (fourpos - 1) >> 2;
- int par = fourheap[parent];
- if (cur >= par) {
- break;
- }
- fourheap[fourpos] = par;
- fourvals[fourpos] = fourvals[parent];
- fourpos = parent;
- }
- if (fourpos == 0 && twoheap[0] > cur) {
- fourheap[0] = twoheap[0];
- fourvals[0] = twovals[0];
- twoheap[0] = cur;
- twovals[0] = val;
- } else {
- fourheap[fourpos] = cur;
- fourvals[fourpos] = val;
- }
- }
-
@Override
public void poll() {
--size;
// Replacement object:
- if (size >= TWO_HEAP_MAX_SIZE) {
- final int last = size - TWO_HEAP_MAX_SIZE;
- final int reinsert = fourheap[last];
- final Object reinsertv = fourvals[last];
- fourheap[last] = 0;
- fourvals[last] = null;
- heapifyDown(reinsert, reinsertv);
- } else if (size > 0) {
+ if (size > 0) {
final int reinsert = twoheap[size];
final Object reinsertv = twovals[size];
twoheap[size] = 0;
@@ -298,42 +167,17 @@ public class IntegerObjectMinHeap<V> implements IntegerObjectHeap<V> {
twoheap[0] = 0;
twovals[0] = null;
}
- ++modCount;
}
/**
* Invoke heapify-down for the root object.
*
- * @param reinsert Object to insert.
- * @param val Value to reinsert.
- */
- private void heapifyDown(int reinsert, Object val) {
- if (size > TWO_HEAP_MAX_SIZE) {
- // Special case: 3-ary situation.
- final int best = (twoheap[1] <= twoheap[2]) ? 1 : 2;
- if (fourheap[0] < twoheap[best]) {
- twoheap[0] = fourheap[0];
- twovals[0] = fourvals[0];
- heapifyDown4(0, reinsert, val);
- } else {
- twoheap[0] = twoheap[best];
- twovals[0] = twovals[best];
- heapifyDown2(best, reinsert, val);
- }
- return;
- }
- heapifyDown2(0, reinsert, val);
- }
-
- /**
- * Heapify-Down for 2-ary heap.
- *
- * @param twopos Position in 2-ary heap.
- * @param cur Current object
+ * @param cur Object to insert.
* @param val Value to reinsert.
*/
- private void heapifyDown2(int twopos, int cur, Object val) {
- final int stop = Math.min(size, TWO_HEAP_MAX_SIZE) >>> 1;
+ private void heapifyDown(int cur, Object val) {
+ final int stop = size >>> 1;
+ int twopos = 0;
while (twopos < stop) {
int bestchild = (twopos << 1) + 1;
int best = twoheap[bestchild];
@@ -353,54 +197,6 @@ public class IntegerObjectMinHeap<V> implements IntegerObjectHeap<V> {
twovals[twopos] = val;
}
- /**
- * Heapify-Down for 4-ary heap.
- *
- * @param fourpos Position in 4-ary heap.
- * @param cur Current object
- * @param val Value to reinsert.
- */
- private void heapifyDown4(int fourpos, int cur, Object val) {
- final int stop = (size - TWO_HEAP_MAX_SIZE + 2) >>> 2;
- while (fourpos < stop) {
- final int child = (fourpos << 2) + 1;
- int best = fourheap[child];
- int bestchild = child, candidate = child + 1, minsize = candidate + TWO_HEAP_MAX_SIZE;
- if (size > minsize) {
- int nextchild = fourheap[candidate];
- if (best > nextchild) {
- bestchild = candidate;
- best = nextchild;
- }
-
- minsize += 2;
- if (size >= minsize) {
- nextchild = fourheap[++candidate];
- if (best > nextchild) {
- bestchild = candidate;
- best = nextchild;
- }
-
- if (size > minsize) {
- nextchild = fourheap[++candidate];
- if (best > nextchild) {
- bestchild = candidate;
- best = nextchild;
- }
- }
- }
- }
- if (cur <= best) {
- break;
- }
- fourheap[fourpos] = best;
- fourvals[fourpos] = fourvals[bestchild];
- fourpos = bestchild;
- }
- fourheap[fourpos] = cur;
- fourvals[fourpos] = val;
- }
-
@Override
public int peekKey() {
return twoheap[0];
@@ -449,16 +245,8 @@ public class IntegerObjectMinHeap<V> implements IntegerObjectHeap<V> {
*/
protected int pos = 0;
- /**
- * Modification counter we were initialized at.
- */
- protected final int myModCount = modCount;
-
@Override
public boolean valid() {
- if (modCount != myModCount) {
- throw new ConcurrentModificationException();
- }
return pos < size;
}
@@ -469,14 +257,14 @@ public class IntegerObjectMinHeap<V> implements IntegerObjectHeap<V> {
@Override
public int getKey() {
- return ((pos < TWO_HEAP_MAX_SIZE) ? twoheap[pos] : fourheap[pos - TWO_HEAP_MAX_SIZE]);
+ return twoheap[pos];
}
@SuppressWarnings("unchecked")
@Override
public V getValue() {
- return (V)((pos < TWO_HEAP_MAX_SIZE) ? twovals[pos] : fourvals[pos - TWO_HEAP_MAX_SIZE]);
+ return (V)twovals[pos];
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ObjectHeap.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ObjectHeap.java
index b5dbbb0e..2b03740e 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ObjectHeap.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/heap/ObjectHeap.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.datastructures.heap;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -31,7 +31,6 @@ import de.lmu.ifi.dbs.elki.utilities.datastructures.iterator.Iter;
* @author Erich Schubert
*
* @apiviz.has UnsortedIter
- *
* @param <K> Key type
*/
public interface ObjectHeap<K> {
@@ -85,7 +84,7 @@ public interface ObjectHeap<K> {
* @return Size
*/
public int size();
-
+
/**
* Is the heap empty?
*
@@ -112,7 +111,6 @@ public interface ObjectHeap<K> {
* </pre>
*
* @author Erich Schubert
- *
* @param <K> Key type
*/
public static interface UnsortedIter<K> extends Iter {
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntStaticHistogram.java b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntStaticHistogram.java
index 7b1eed94..84b55cd1 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntStaticHistogram.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/datastructures/histogram/IntStaticHistogram.java
@@ -73,7 +73,7 @@ public class IntStaticHistogram extends AbstractStaticHistogram implements IntHi
} else {
// Shift in place and clear head
System.arraycopy(data, 0, data, -bin, size);
- Arrays.fill(data, 0, -bin, (int) 0);
+ Arrays.fill(data, 0, -bin, 0);
}
data[0] = val;
// Note that bin is negative, -bin is the shift offset!
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/documentation/Description.java b/src/de/lmu/ifi/dbs/elki/utilities/documentation/Description.java
index 10fe40f3..3ce24e4e 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/documentation/Description.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/documentation/Description.java
@@ -36,7 +36,7 @@ import java.lang.annotation.Target;
*/
@Documented
@Retention(RetentionPolicy.RUNTIME)
-@Target( { ElementType.TYPE })
+@Target( { ElementType.TYPE, ElementType.METHOD, ElementType.FIELD })
public @interface Description {
/**
* Description of the class.
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/documentation/Title.java b/src/de/lmu/ifi/dbs/elki/utilities/documentation/Title.java
index 9676e8e0..c77ca875 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/documentation/Title.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/documentation/Title.java
@@ -36,7 +36,7 @@ import java.lang.annotation.Target;
*/
@Documented
@Retention(RetentionPolicy.RUNTIME)
-@Target( { ElementType.TYPE })
+@Target( { ElementType.TYPE, ElementType.METHOD, ElementType.FIELD })
public @interface Title {
/**
* Title of the Algorithm
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/ensemble/EnsembleVotingMedian.java b/src/de/lmu/ifi/dbs/elki/utilities/ensemble/EnsembleVotingMedian.java
index de137b40..85e7f6c8 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/ensemble/EnsembleVotingMedian.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/ensemble/EnsembleVotingMedian.java
@@ -26,8 +26,7 @@ package de.lmu.ifi.dbs.elki.utilities.ensemble;
import de.lmu.ifi.dbs.elki.utilities.datastructures.QuickSelect;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
@@ -84,10 +83,10 @@ public class EnsembleVotingMedian implements EnsembleVoting {
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- DoubleParameter quantileP = new DoubleParameter(QUANTILE_ID, 0.5);
- quantileP.addConstraint(new GreaterEqualConstraint(0.0));
- quantileP.addConstraint(new LessEqualConstraint(1.0));
- if (config.grab(quantileP)) {
+ DoubleParameter quantileP = new DoubleParameter(QUANTILE_ID, .5);
+ quantileP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE);
+ quantileP.addConstraint(CommonConstraints.LESS_EQUAL_ONE_DOUBLE);
+ if(config.grab(quantileP)) {
quantile = quantileP.getValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/AllOrNoneMustBeSetGlobalConstraint.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/AllOrNoneMustBeSetGlobalConstraint.java
index a06a06c1..a87af1a8 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/AllOrNoneMustBeSetGlobalConstraint.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/AllOrNoneMustBeSetGlobalConstraint.java
@@ -24,7 +24,6 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints;
*/
import java.util.ArrayList;
-import java.util.List;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionUtil;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.ParameterException;
@@ -41,7 +40,7 @@ public class AllOrNoneMustBeSetGlobalConstraint implements GlobalParameterConstr
/**
* List of parameters to be checked
*/
- private List<Parameter<?>> parameterList;
+ private Parameter<?>[] parameterList;
/**
* Constructs a global parameter constraint for testing if either all elements
@@ -49,7 +48,7 @@ public class AllOrNoneMustBeSetGlobalConstraint implements GlobalParameterConstr
*
* @param parameters list of parameters to be checked
*/
- public AllOrNoneMustBeSetGlobalConstraint(List<Parameter<?>> parameters) {
+ public AllOrNoneMustBeSetGlobalConstraint(Parameter<?>... parameters) {
this.parameterList = parameters;
}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/CommonConstraints.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/CommonConstraints.java
new file mode 100644
index 00000000..ea1caed9
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/CommonConstraints.java
@@ -0,0 +1,98 @@
+package de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.List;
+
+/**
+ * Class storing a number of very common constraints.
+ *
+ * @author Erich Schubert
+ */
+public final class CommonConstraints {
+ /**
+ * Integer constraint: >= -1
+ */
+ public static final ParameterConstraint<? super Integer> GREATER_EQUAL_MINUSONE_INT = new GreaterEqualConstraint(-1);
+
+ /**
+ * Not negative.
+ */
+ public static final ParameterConstraint<? super Integer> GREATER_EQUAL_ZERO_INT = new GreaterEqualConstraint(0);
+
+ /**
+ * Larger than zero.
+ */
+ public static final ParameterConstraint<? super Integer> GREATER_EQUAL_ONE_INT = new GreaterEqualConstraint(1);
+
+ /**
+ * Larger than one.
+ */
+ public static final ParameterConstraint<? super Integer> GREATER_THAN_ONE_INT = new GreaterConstraint(1);
+
+ /**
+ * Not negative.
+ */
+ public static final ParameterConstraint<? super Double> GREATER_EQUAL_ZERO_DOUBLE = new GreaterEqualConstraint(0.);
+
+ /**
+ * Larger than zero.
+ */
+ public static final ParameterConstraint<? super Double> GREATER_THAN_ZERO_DOUBLE = new GreaterConstraint(0.);
+
+ /**
+ * Constraint: less than .5
+ */
+ public static final ParameterConstraint<? super Double> LESS_THAN_HALF_DOUBLE = new LessConstraint(.5);
+
+ /**
+ * At least 1.
+ */
+ public static final ParameterConstraint<? super Double> GREATER_EQUAL_ONE_DOUBLE = new GreaterEqualConstraint(1.);
+
+ /**
+ * Larger than one.
+ */
+ public static final ParameterConstraint<? super Double> GREATER_THAN_ONE_DOUBLE = new GreaterConstraint(1.);
+
+ /**
+ * Less than one.
+ */
+ public static final ParameterConstraint<? super Double> LESS_THAN_ONE_DOUBLE = new LessConstraint(1.);
+
+ /**
+ * Less or equal than one.
+ */
+ public static final ParameterConstraint<? super Double> LESS_EQUAL_ONE_DOUBLE = new LessEqualConstraint(1.);
+
+ /**
+ * Constraint for the whole list.
+ */
+ public static final ParameterConstraint<? super List<Integer>> GREATER_EQUAL_ZERO_INT_LIST = new ListEachConstraint<>(GREATER_EQUAL_ZERO_INT);
+
+ /**
+ * List constraint: >= 1
+ */
+ public static final ParameterConstraint<? super List<Integer>> GREATER_EQUAL_ONE_INT_LIST = new ListEachConstraint<>(GREATER_EQUAL_ONE_INT);
+}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/EqualSizeGlobalConstraint.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/EqualSizeGlobalConstraint.java
index 2ee7be9c..586b4257 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/EqualSizeGlobalConstraint.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/EqualSizeGlobalConstraint.java
@@ -23,8 +23,6 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import java.util.List;
-
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionUtil;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.ParameterException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.WrongParameterValueException;
@@ -41,7 +39,7 @@ public class EqualSizeGlobalConstraint implements GlobalParameterConstraint {
/**
* List parameters to be tested
*/
- private List<ListParameter<?>> parameters;
+ private ListParameter<?, ?>[] parameters;
/**
* Creates a global parameter constraint for testing if a number of list
@@ -49,7 +47,7 @@ public class EqualSizeGlobalConstraint implements GlobalParameterConstraint {
*
* @param params list parameters to be tested for equal list sizes
*/
- public EqualSizeGlobalConstraint(List<ListParameter<?>> params) {
+ public EqualSizeGlobalConstraint(ListParameter<?, ?>... params) {
this.parameters = params;
}
@@ -63,7 +61,7 @@ public class EqualSizeGlobalConstraint implements GlobalParameterConstraint {
boolean first = false;
int constraintSize = -1;
- for(ListParameter<?> listParam : parameters) {
+ for(ListParameter<?, ?> listParam : parameters) {
if(listParam.isDefined()) {
if(!first) {
constraintSize = listParam.getListSize();
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/GlobalListSizeConstraint.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/GlobalListSizeConstraint.java
index bcedd342..7c35045e 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/GlobalListSizeConstraint.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/GlobalListSizeConstraint.java
@@ -39,7 +39,7 @@ public class GlobalListSizeConstraint implements GlobalParameterConstraint {
/**
* List parameter to be tested.
*/
- private ListParameter<?> list;
+ private ListParameter<?, ?> list;
/**
* Integer parameter specifying the constraint list size.
@@ -55,7 +55,7 @@ public class GlobalListSizeConstraint implements GlobalParameterConstraint {
* @param v the list parameter to be tested.
* @param i integer parameter specifying the constraint list size.
*/
- public GlobalListSizeConstraint(ListParameter<?> v, IntParameter i) {
+ public GlobalListSizeConstraint(ListParameter<?, ?> v, IntParameter i) {
this.list = v;
this.length = i;
}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/LessEqualGlobalConstraint.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/LessEqualGlobalConstraint.java
index 1216e03b..9fa0ee99 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/LessEqualGlobalConstraint.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/LessEqualGlobalConstraint.java
@@ -39,12 +39,12 @@ public class LessEqualGlobalConstraint<T extends Number> implements GlobalParame
/**
* First number parameter.
*/
- private NumberParameter<T> first;
+ private NumberParameter<?, T> first;
/**
* Second number parameter.
*/
- private NumberParameter<T> second;
+ private NumberParameter<?, T> second;
/**
* Creates a Less-Equal-Than global parameter constraint.
@@ -55,7 +55,7 @@ public class LessEqualGlobalConstraint<T extends Number> implements GlobalParame
* @param first first number parameter
* @param second second number parameter
*/
- public LessEqualGlobalConstraint(NumberParameter<T> first, NumberParameter<T> second) {
+ public LessEqualGlobalConstraint(NumberParameter<?, T> first, NumberParameter<?, T> second) {
this.first = first;
this.second = second;
}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/LessGlobalConstraint.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/LessGlobalConstraint.java
index a722edab..989f6e29 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/LessGlobalConstraint.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/LessGlobalConstraint.java
@@ -39,12 +39,12 @@ public class LessGlobalConstraint<T extends Number> implements GlobalParameterCo
/**
* First number parameter.
*/
- private NumberParameter<T> first;
+ private NumberParameter<?, T> first;
/**
* Second number parameter.
*/
- private NumberParameter<T> second;
+ private NumberParameter<?, T> second;
/**
* Creates a Less-Than global parameter constraint. That is the value of the
@@ -54,7 +54,7 @@ public class LessGlobalConstraint<T extends Number> implements GlobalParameterCo
* @param first first number parameter
* @param second second number parameter
*/
- public LessGlobalConstraint(NumberParameter<T> first, NumberParameter<T> second) {
+ public LessGlobalConstraint(NumberParameter<?, T> first, NumberParameter<?, T> second) {
this.first = first;
this.second = second;
}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/NoDuplicateValueGlobalConstraint.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/NoDuplicateValueGlobalConstraint.java
index 65b427a1..7036ecef 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/NoDuplicateValueGlobalConstraint.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/constraints/NoDuplicateValueGlobalConstraint.java
@@ -45,7 +45,7 @@ public class NoDuplicateValueGlobalConstraint implements GlobalParameterConstrai
/**
* List of number parameters to be checked.
*/
- private List<? extends AbstractParameter<?>> parameters;
+ private List<? extends AbstractParameter<?, ?>> parameters;
/**
* Constructs a Not-Equal-Value global parameter constraint. That is, the
@@ -54,7 +54,7 @@ public class NoDuplicateValueGlobalConstraint implements GlobalParameterConstrai
*
* @param parameters list of number parameters to be tested
*/
- public NoDuplicateValueGlobalConstraint(List<? extends AbstractParameter<?>> parameters) {
+ public NoDuplicateValueGlobalConstraint(List<? extends AbstractParameter<?, ?>> parameters) {
this.parameters = parameters;
}
@@ -65,7 +65,7 @@ public class NoDuplicateValueGlobalConstraint implements GlobalParameterConstrai
*
* @param parameters list of number parameters to be tested
*/
- public NoDuplicateValueGlobalConstraint(AbstractParameter<?>... parameters) {
+ public NoDuplicateValueGlobalConstraint(AbstractParameter<?, ?>... parameters) {
this.parameters = Arrays.asList(parameters);
}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/package-info.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/package-info.java
index e4c5489b..d2106865 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/package-info.java
@@ -15,7 +15,7 @@
* "Distance function to determine the distance between database objects."
* );
* }</pre></blockquote>
- * (This example is from {@link de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm DistanceBasedAlgorithm}.)
+ * (This example is from {@link de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm AbstractDistanceBasedAlgorithm}.)
* </li>
*
* <li><b>Parameter Object</b>: To obtain a value, you <em>must</em> use a
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/AbstractParameter.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/AbstractParameter.java
index 8e1b48c3..cdad8583 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/AbstractParameter.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/AbstractParameter.java
@@ -25,7 +25,6 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters;
import java.security.InvalidParameterException;
import java.util.ArrayList;
-import java.util.Collection;
import java.util.List;
import de.lmu.ifi.dbs.elki.logging.LoggingUtil;
@@ -49,9 +48,10 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.ParameterConstra
* @apiviz.composedOf OptionID
* @apiviz.uses ParameterConstraint
*
+ * @param <THIS> type self-reference
* @param <T> the type of a possible value (i.e., the type of the option)
*/
-public abstract class AbstractParameter<T> implements Parameter<T> {
+public abstract class AbstractParameter<THIS extends AbstractParameter<THIS, T>, T> implements Parameter<T> {
/**
* The default value of the parameter (may be null).
*/
@@ -152,20 +152,24 @@ public abstract class AbstractParameter<T> implements Parameter<T> {
@Override
public boolean tryDefaultValue() throws UnspecifiedParameterException {
// Assume default value instead.
- if (hasDefaultValue()) {
+ if(hasDefaultValue()) {
useDefaultValue();
return true;
- } else if (isOptional()) {
+ }
+ else if(isOptional()) {
// Optional is fine, but not successful
return false;
- } else {
+ }
+ else {
throw new UnspecifiedParameterException(this);
}
}
+ @SuppressWarnings("unchecked")
@Override
- public void setOptional(boolean opt) {
+ public THIS setOptional(boolean opt) {
this.optionalParameter = opt;
+ return (THIS) this;
}
@Override
@@ -204,31 +208,32 @@ public abstract class AbstractParameter<T> implements Parameter<T> {
// description.append(getParameterType()).append(" ");
description.append(shortDescription);
description.append(FormatUtil.NEWLINE);
- if (hasValuesDescription()) {
+ if(hasValuesDescription()) {
final String valuesDescription = getValuesDescription();
description.append(valuesDescription);
- if (!valuesDescription.endsWith(FormatUtil.NEWLINE)) {
+ if(!valuesDescription.endsWith(FormatUtil.NEWLINE)) {
description.append(FormatUtil.NEWLINE);
}
}
- if (hasDefaultValue()) {
+ if(hasDefaultValue()) {
description.append("Default: ");
description.append(getDefaultValueAsString());
description.append(FormatUtil.NEWLINE);
}
- if (constraints != null && !constraints.isEmpty()) {
- if (constraints.size() == 1) {
+ if(constraints != null && !constraints.isEmpty()) {
+ if(constraints.size() == 1) {
description.append("Constraint: ");
- } else if (constraints.size() > 1) {
+ }
+ else if(constraints.size() > 1) {
description.append("Constraints: ");
}
- for (int i = 0; i < constraints.size(); i++) {
+ for(int i = 0; i < constraints.size(); i++) {
ParameterConstraint<? super T> constraint = constraints.get(i);
- if (i > 0) {
+ if(i > 0) {
description.append(", ");
}
description.append(constraint.getDescription(getName()));
- if (i == constraints.size() - 1) {
+ if(i == constraints.size() - 1) {
description.append('.');
}
}
@@ -245,8 +250,8 @@ public abstract class AbstractParameter<T> implements Parameter<T> {
* @throws ParameterException when the object is not valid.
*/
protected boolean validate(T obj) throws ParameterException {
- if (constraints != null) {
- for (ParameterConstraint<? super T> cons : this.constraints) {
+ if(constraints != null) {
+ for(ParameterConstraint<? super T> cons : this.constraints) {
cons.test(obj);
}
}
@@ -276,9 +281,10 @@ public abstract class AbstractParameter<T> implements Parameter<T> {
@Override
public void setValue(Object obj) throws ParameterException {
T val = parseValue(obj);
- if (validate(val)) {
+ if(validate(val)) {
setValueInternal(val);
- } else {
+ }
+ else {
throw new InvalidParameterException("Value for option \"" + getName() + "\" did not validate: " + obj.toString());
}
}
@@ -294,7 +300,7 @@ public abstract class AbstractParameter<T> implements Parameter<T> {
@Override
public final T getValue() {
- if (this.value == null) {
+ if(this.value == null) {
LoggingUtil.warning("Programming error: Parameter#getValue() called for unset parameter \"" + this.optionid.getName() + "\"", new Throwable());
}
return this.value;
@@ -331,23 +337,13 @@ public abstract class AbstractParameter<T> implements Parameter<T> {
return getDefaultValue().toString();
}
+ @SuppressWarnings("unchecked")
@Override
- public void addConstraint(ParameterConstraint<? super T> constraint) {
- if (constraints == null) {
+ public THIS addConstraint(ParameterConstraint<? super T> constraint) {
+ if(constraints == null) {
this.constraints = new ArrayList<>(1);
}
constraints.add(constraint);
- }
-
- /**
- * Add a collection of constraints.
- *
- * @param cs Constraints to add
- */
- public void addConstraints(Collection<? extends ParameterConstraint<? super T>> cs) {
- if (constraints == null) {
- this.constraints = new ArrayList<>(cs.size());
- }
- constraints.addAll(cs);
+ return (THIS) this;
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/ClassListParameter.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/ClassListParameter.java
index 35cd0573..1cea7fac 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/ClassListParameter.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/ClassListParameter.java
@@ -46,7 +46,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameteriz
* @param <C> Class type
*/
// TODO: Add missing constructors. (ObjectListParameter also!)
-public class ClassListParameter<C> extends ListParameter<Class<? extends C>> {
+public class ClassListParameter<C> extends ListParameter<ClassListParameter<C>, Class<? extends C>> {
/**
* The restriction class for the list of class names.
*/
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/ClassParameter.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/ClassParameter.java
index a0669fca..42288738 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/ClassParameter.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/ClassParameter.java
@@ -49,7 +49,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameteriz
*/
// TODO: add additional constructors with parameter constraints.
// TODO: turn restrictionClass into a constraint?
-public class ClassParameter<C> extends AbstractParameter<Class<? extends C>> {
+public class ClassParameter<C> extends AbstractParameter<ClassParameter<C>, Class<? extends C>> {
/**
* The restriction class for this class parameter.
*/
@@ -73,7 +73,7 @@ public class ClassParameter<C> extends AbstractParameter<Class<? extends C>> {
// * ClassParameter<Foo<Bar>>(optionID, (Class<Foo<Bar>>) Foo.class) is an
// invalid cast.
this.restrictionClass = (Class<C>) restrictionClass;
- if (restrictionClass == null) {
+ if(restrictionClass == null) {
LoggingUtil.warning("Restriction class 'null' for parameter '" + optionID + "'", new Throwable());
}
}
@@ -96,7 +96,7 @@ public class ClassParameter<C> extends AbstractParameter<Class<? extends C>> {
// * ClassParameter<Foo<Bar>>(optionID, (Class<Foo<Bar>>) Foo.class) is an
// invalid cast.
this.restrictionClass = (Class<C>) restrictionClass;
- if (restrictionClass == null) {
+ if(restrictionClass == null) {
LoggingUtil.warning("Restriction class 'null' for parameter '" + optionID + "'", new Throwable());
}
}
@@ -115,15 +115,15 @@ public class ClassParameter<C> extends AbstractParameter<Class<? extends C>> {
@SuppressWarnings("unchecked")
@Override
protected Class<? extends C> parseValue(Object obj) throws ParameterException {
- if (obj == null) {
+ if(obj == null) {
throw new UnspecifiedParameterException(this);
}
- if (obj instanceof Class<?>) {
+ if(obj instanceof Class<?>) {
return (Class<? extends C>) obj;
}
- if (obj instanceof String) {
+ if(obj instanceof String) {
Class<? extends C> clz = InspectionUtil.findImplementation(restrictionClass, (String) obj);
- if (clz != null) {
+ if(clz != null) {
return clz;
}
}
@@ -136,13 +136,13 @@ public class ClassParameter<C> extends AbstractParameter<Class<? extends C>> {
*/
@Override
public boolean validate(Class<? extends C> obj) throws ParameterException {
- if (obj == null) {
+ if(obj == null) {
throw new UnspecifiedParameterException(this);
}
- if (!restrictionClass.isAssignableFrom(obj)) {
+ if(!restrictionClass.isAssignableFrom(obj)) {
throw new WrongParameterValueException(this, obj.getName(), "Given class not a subclass / implementation of " + restrictionClass.getName());
}
- if (!super.validate(obj)) {
+ if(!super.validate(obj)) {
return false;
}
return true;
@@ -175,7 +175,7 @@ public class ClassParameter<C> extends AbstractParameter<Class<? extends C>> {
*/
@Override
public String getValuesDescription() {
- if (restrictionClass != null && restrictionClass != Object.class) {
+ if(restrictionClass != null && restrictionClass != Object.class) {
return restrictionString();
}
return "";
@@ -199,22 +199,26 @@ public class ClassParameter<C> extends AbstractParameter<Class<? extends C>> {
*/
public C instantiateClass(Parameterization config) {
try {
- if (getValue() == null /* && !optionalParameter */) {
+ if(getValue() == null /* && !optionalParameter */) {
throw new UnusedParameterException("Value of parameter " + getName() + " has not been specified.");
}
C instance;
try {
config = config.descend(this);
instance = ClassGenericsUtil.tryInstantiate(restrictionClass, getValue(), config);
- } catch (InvocationTargetException e) {
+ }
+ catch(InvocationTargetException e) {
throw new WrongParameterValueException(this, getValue().getCanonicalName(), "Error instantiating class.", e);
- } catch (NoSuchMethodException e) {
+ }
+ catch(NoSuchMethodException e) {
throw new WrongParameterValueException(this, getValue().getCanonicalName(), "Error instantiating class - no usable public constructor.");
- } catch (Exception e) {
+ }
+ catch(Exception e) {
throw new WrongParameterValueException(this, getValue().getCanonicalName(), "Error instantiating class.", e);
}
return instance;
- } catch (ParameterException e) {
+ }
+ catch(ParameterException e) {
config.reportError(e);
return null;
}
@@ -247,19 +251,20 @@ public class ClassParameter<C> extends AbstractParameter<Class<? extends C>> {
*/
public String restrictionString() {
StringBuilder info = new StringBuilder();
- if (restrictionClass.isInterface()) {
+ if(restrictionClass.isInterface()) {
info.append("Implementing ");
- } else {
+ }
+ else {
info.append("Extending ");
}
info.append(restrictionClass.getName());
info.append(FormatUtil.NEWLINE);
List<Class<?>> known = getKnownImplementations();
- if (!known.isEmpty()) {
+ if(!known.isEmpty()) {
info.append("Known classes (default package " + restrictionClass.getPackage().getName() + "):");
info.append(FormatUtil.NEWLINE);
- for (Class<?> c : known) {
+ for(Class<?> c : known) {
info.append("->" + FormatUtil.NONBREAKING_SPACE);
info.append(canonicalClassName(c, getRestrictionClass()));
info.append(FormatUtil.NEWLINE);
@@ -279,13 +284,13 @@ public class ClassParameter<C> extends AbstractParameter<Class<? extends C>> {
*/
public static String canonicalClassName(Class<?> c, Package pkg, String postfix) {
String name = c.getName();
- if (pkg != null) {
+ if(pkg != null) {
String prefix = pkg.getName() + ".";
- if (name.startsWith(prefix)) {
+ if(name.startsWith(prefix)) {
name = name.substring(prefix.length());
}
}
- if (postfix != null && name.endsWith(postfix)) {
+ if(postfix != null && name.endsWith(postfix)) {
name = name.substring(0, name.length() - postfix.length());
}
return name;
@@ -299,7 +304,7 @@ public class ClassParameter<C> extends AbstractParameter<Class<? extends C>> {
* @return Simplified class name.
*/
public static String canonicalClassName(Class<?> c, Class<?> parent) {
- if (parent == null) {
+ if(parent == null) {
return canonicalClassName(c, null, InspectionUtil.FACTORY_POSTFIX);
}
return canonicalClassName(c, parent.getPackage(), InspectionUtil.FACTORY_POSTFIX);
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/DistanceParameter.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/DistanceParameter.java
index e97b6d0e..eda54082 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/DistanceParameter.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/DistanceParameter.java
@@ -36,7 +36,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.WrongParameterValueException
*
* @param <D> Distance type
*/
-public class DistanceParameter<D extends Distance<D>> extends AbstractParameter<D> {
+public class DistanceParameter<D extends Distance<D>> extends AbstractParameter<DistanceParameter<D>, D> {
/**
* Distance type
*/
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/DoubleListParameter.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/DoubleListParameter.java
index 89cfc345..84f97734 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/DoubleListParameter.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/DoubleListParameter.java
@@ -37,7 +37,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.WrongParameterValueException
* @author Steffi Wanka
* @author Erich Schubert
*/
-public class DoubleListParameter extends ListParameter<Double> {
+public class DoubleListParameter extends ListParameter<DoubleListParameter, Double> {
/**
* Constructs a list parameter with the given optionID and optional flag.
*
@@ -83,7 +83,7 @@ public class DoubleListParameter extends ListParameter<Double> {
String[] values = SPLIT.split((String) obj);
ArrayList<Double> doubleValue = new ArrayList<>(values.length);
for(String val : values) {
- doubleValue.add(Double.valueOf(val));
+ doubleValue.add(FormatUtil.parseDouble(val));
}
return doubleValue;
}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/DoubleParameter.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/DoubleParameter.java
index 632e1f8c..efa64370 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/DoubleParameter.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/DoubleParameter.java
@@ -23,9 +23,9 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+import de.lmu.ifi.dbs.elki.utilities.FormatUtil;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.WrongParameterValueException;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.ParameterConstraint;
/**
* Parameter class for a parameter specifying a double value.
@@ -33,36 +33,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.ParameterConstra
* @author Steffi Wanka
* @author Erich Schubert
*/
-public class DoubleParameter extends NumberParameter<Double> {
- /**
- * Constructs a double parameter with the given optionID, parameter
- * constraint, and default value.
- *
- * @param optionID the unique id of this parameter
- * @param defaultValue the default value for this parameter
- * @param constraint the constraint of this parameter
- * @deprecated Use {@link #addConstraint} instead.
- */
- @Deprecated
- public DoubleParameter(OptionID optionID, double defaultValue, ParameterConstraint<Number> constraint) {
- super(optionID, defaultValue);
- addConstraint(constraint);
- }
-
- /**
- * Constructs a double parameter with the given optionID, and parameter
- * constraint.
- *
- * @param optionID the unique id of this parameter
- * @param constraint the constraint of this parameter
- * @deprecated Use {@link #addConstraint} instead.
- */
- @Deprecated
- public DoubleParameter(OptionID optionID, ParameterConstraint<Number> constraint) {
- super(optionID);
- addConstraint(constraint);
- }
-
+public class DoubleParameter extends NumberParameter<DoubleParameter, Double> {
/**
* Constructs a double parameter with the given optionID and default value.
*
@@ -74,20 +45,6 @@ public class DoubleParameter extends NumberParameter<Double> {
}
/**
- * Constructs a double parameter with the given optionID and default value.
- *
- * @param optionID the unique optionID
- * @param optional Flag to indicate that the parameter is optional
- *
- * @deprecated Use {@link #setOptional} instead.
- */
- @Deprecated
- public DoubleParameter(OptionID optionID, boolean optional) {
- super(optionID);
- setOptional(optional);
- }
-
- /**
* Constructs a double parameter with the given optionID.
*
* @param optionID the unique id of this parameter
@@ -103,14 +60,16 @@ public class DoubleParameter extends NumberParameter<Double> {
@Override
protected Double parseValue(Object obj) throws WrongParameterValueException {
- if (obj instanceof Double) {
+ if(obj instanceof Double) {
return (Double) obj;
}
try {
- return Double.valueOf(obj.toString());
- } catch (NullPointerException e) {
+ return FormatUtil.parseDouble(obj.toString());
+ }
+ catch(NullPointerException e) {
throw new WrongParameterValueException("Wrong parameter format! Parameter \"" + getName() + "\" requires a double value, read: " + obj + "!\n");
- } catch (NumberFormatException e) {
+ }
+ catch(NumberFormatException e) {
throw new WrongParameterValueException("Wrong parameter format! Parameter \"" + getName() + "\" requires a double value, read: " + obj + "!\n");
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/EnumParameter.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/EnumParameter.java
index 4d05753c..22d7dd54 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/EnumParameter.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/EnumParameter.java
@@ -62,7 +62,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.WrongParameterValueException
*
* @param <E> Enum type
*/
-public class EnumParameter<E extends Enum<E>> extends AbstractParameter<E> {
+public class EnumParameter<E extends Enum<E>> extends AbstractParameter<EnumParameter<E>, E> {
/**
* Reference to the actual enum type, for T.valueOf().
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/FileListParameter.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/FileListParameter.java
index eb638298..9e115dc7 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/FileListParameter.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/FileListParameter.java
@@ -38,7 +38,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.WrongParameterValueException
* @author Steffi Wanka
* @author Erich Schubert
*/
-public class FileListParameter extends ListParameter<File> {
+public class FileListParameter extends ListParameter<FileListParameter, File> {
/**
* Available types of the files: {@link #INPUT_FILES} denotes input files,
* {@link #OUTPUT_FILES} denotes output files.
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/FileParameter.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/FileParameter.java
index ea3fa454..3e9fdc7d 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/FileParameter.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/FileParameter.java
@@ -38,7 +38,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.WrongParameterValueException
* @author Erich Schubert
*/
// TODO: turn FileType into a Constraint?
-public class FileParameter extends AbstractParameter<File> {
+public class FileParameter extends AbstractParameter<FileParameter, File> {
/**
* Available file types: {@link #INPUT_FILE} denotes an input file,
* {@link #OUTPUT_FILE} denotes an output file.
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/Flag.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/Flag.java
index 7587d2a5..f9e1a1f1 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/Flag.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/Flag.java
@@ -37,7 +37,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.WrongParameterValueException
* @author Steffi Wanka
* @author Erich Schubert
*/
-public class Flag extends AbstractParameter<Boolean> {
+public class Flag extends AbstractParameter<Flag, Boolean> {
/**
* Constant indicating that the flag is set.
*/
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/IntListParameter.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/IntListParameter.java
index 93012955..cc8327b4 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/IntListParameter.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/IntListParameter.java
@@ -37,7 +37,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.WrongParameterValueException
* @author Elke Achtert
* @author Erich Schubert
*/
-public class IntListParameter extends ListParameter<Integer> {
+public class IntListParameter extends ListParameter<IntListParameter, Integer> {
/**
* Constructs an integer list parameter
*
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/IntParameter.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/IntParameter.java
index 30457330..3d867770 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/IntParameter.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/IntParameter.java
@@ -23,10 +23,10 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+import de.lmu.ifi.dbs.elki.utilities.FormatUtil;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.ParameterException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.WrongParameterValueException;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.ParameterConstraint;
/**
* Parameter class for a parameter specifying an integer value.
@@ -34,51 +34,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.ParameterConstra
* @author Steffi Wanka
* @author Erich Schubert
*/
-public class IntParameter extends NumberParameter<Integer> {
- /**
- * Constructs an integer parameter with the given optionID, parameter
- * constraint, and default value.
- *
- * @param optionID optionID the unique id of the option
- * @param defaultValue the default value
- * @param constraint the constraint for this integer parameter
- * @deprecated Use {@link #addConstraint} instead.
- */
- @Deprecated
- public IntParameter(OptionID optionID, int defaultValue, ParameterConstraint<Number> constraint) {
- super(optionID, Integer.valueOf(defaultValue));
- addConstraint(constraint);
- }
-
- /**
- * Constructs an integer parameter with the given optionID, parameter
- * constraint, and optional flag.
- *
- * @param optionID optionID the unique id of the option
- * @param constraint the constraint for this integer parameter
- * @param optional specifies if this parameter is an optional parameter
- * @deprecated Use {@link #addConstraint} instead.
- */
- @Deprecated
- public IntParameter(OptionID optionID, ParameterConstraint<Number> constraint, boolean optional) {
- super(optionID, optional);
- addConstraint(constraint);
- }
-
- /**
- * Constructs an integer parameter with the given optionID, and parameter
- * constraint.
- *
- * @param optionID optionID the unique id of the option
- * @param constraint the constraint for this integer parameter
- * @deprecated Use {@link #addConstraint} instead.
- */
- @Deprecated
- public IntParameter(OptionID optionID, ParameterConstraint<Number> constraint) {
- super(optionID);
- addConstraint(constraint);
- }
-
+public class IntParameter extends NumberParameter<IntParameter, Integer> {
/**
* Constructs an integer parameter with the given optionID.
*
@@ -93,18 +49,6 @@ public class IntParameter extends NumberParameter<Integer> {
* Constructs an integer parameter with the given optionID.
*
* @param optionID optionID the unique id of the option
- * @param optional specifies if this parameter is an optional parameter
- * @deprecated Use {@link #setOptional} instead.
- */
- @Deprecated
- public IntParameter(OptionID optionID, boolean optional) {
- super(optionID, optional);
- }
-
- /**
- * Constructs an integer parameter with the given optionID.
- *
- * @param optionID optionID the unique id of the option
*/
public IntParameter(OptionID optionID) {
super(optionID);
@@ -117,14 +61,17 @@ public class IntParameter extends NumberParameter<Integer> {
@Override
protected Integer parseValue(Object obj) throws ParameterException {
- if (obj instanceof Integer) {
+ if(obj instanceof Integer) {
return (Integer) obj;
}
try {
- return Integer.valueOf(obj.toString());
- } catch (NullPointerException e) {
+ final String s = obj.toString();
+ return (int) FormatUtil.parseLongBase10(s, 0, s.length());
+ }
+ catch(NullPointerException e) {
throw new WrongParameterValueException("Wrong parameter format! Parameter \"" + getName() + "\" requires an integer value, read: " + obj + "!\n");
- } catch (NumberFormatException e) {
+ }
+ catch(NumberFormatException e) {
throw new WrongParameterValueException("Wrong parameter format! Parameter \"" + getName() + "\" requires an integer value, read: " + obj + "!\n");
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/ListParameter.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/ListParameter.java
index 119fb121..df520daa 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/ListParameter.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/ListParameter.java
@@ -34,9 +34,10 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
* @author Steffi Wanka
* @author Erich Schubert
*
+ * @param <THIS> Type self-reference
* @param <T> List type
*/
-public abstract class ListParameter<T> extends AbstractParameter<List<T>> {
+public abstract class ListParameter<THIS extends ListParameter<THIS, T>, T> extends AbstractParameter<THIS, List<T>> {
/**
* A pattern defining a &quot,&quot.
*/
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/LongParameter.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/LongParameter.java
index f5d441b5..5ab6b487 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/LongParameter.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/LongParameter.java
@@ -26,7 +26,6 @@ package de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.ParameterException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.WrongParameterValueException;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.ParameterConstraint;
/**
* Parameter class for a parameter specifying a long value.
@@ -34,22 +33,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.ParameterConstra
* @author Steffi Wanka
* @author Erich Schubert
*/
-public class LongParameter extends NumberParameter<Long> {
- /**
- * Constructs a long parameter with the given optionID, parameter constraint
- * and default value.
- *
- * @param optionID the unique OptionID for this parameter
- * @param constraint the parameter constraint for this long parameter
- * @param defaultValue the default value
- * @deprecated Use {@link #addConstraint} instead!
- */
- @Deprecated
- public LongParameter(OptionID optionID, ParameterConstraint<Number> constraint, long defaultValue) {
- super(optionID, Long.valueOf(defaultValue));
- addConstraint(constraint);
- }
-
+public class LongParameter extends NumberParameter<LongParameter, Long> {
/**
* Constructs a long parameter with the given optionID and default value.
*
@@ -68,7 +52,7 @@ public class LongParameter extends NumberParameter<Long> {
public LongParameter(OptionID optionID) {
super(optionID);
}
-
+
@Override
public String getValueAsString() {
return getValue().toString();
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/NumberParameter.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/NumberParameter.java
index a4448d30..fabdce53 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/NumberParameter.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/NumberParameter.java
@@ -30,10 +30,11 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
*
* @author Steffi Wanka
* @author Erich Schubert
- *
+ *
+ * @param <THIS> type self-reference
* @param <T> the type of a possible value (i.e., the type of the option)
*/
-public abstract class NumberParameter<T extends Number> extends AbstractParameter<T> {
+public abstract class NumberParameter<THIS extends NumberParameter<THIS, T>, T extends Number> extends AbstractParameter<THIS, T> {
/**
* Constructs a number parameter with the given optionID and default Value.
*
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/Parameter.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/Parameter.java
index 110633b3..ffacb6d1 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/Parameter.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/Parameter.java
@@ -80,8 +80,9 @@ public interface Parameter<T> {
* Specifies if this parameter is an optional parameter.
*
* @param opt true if this parameter is optional, false otherwise
+ * @return the parameter itself, for chaining
*/
- public abstract void setOptional(boolean opt);
+ public abstract Parameter<T> setOptional(boolean opt);
/**
* Checks if this parameter is an optional parameter.
@@ -232,6 +233,7 @@ public interface Parameter<T> {
* Add an additional constraint.
*
* @param constraint Constraint to add.
+ * @return the parameter itself, for chaining
*/
- public abstract void addConstraint(ParameterConstraint<? super T> constraint);
+ public abstract Parameter<T> addConstraint(ParameterConstraint<? super T> constraint);
}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/PatternParameter.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/PatternParameter.java
index b76edbbb..e3cb4bcf 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/PatternParameter.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/PatternParameter.java
@@ -37,7 +37,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.WrongParameterValueException
* @author Steffi Wanka
* @author Erich Schubert
*/
-public class PatternParameter extends AbstractParameter<Pattern> {
+public class PatternParameter extends AbstractParameter<PatternParameter, Pattern> {
/**
* Constructs a pattern parameter with the given optionID, and default value.
*
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/RandomParameter.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/RandomParameter.java
index 6c0668dd..bf5e0cb0 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/RandomParameter.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/RandomParameter.java
@@ -33,7 +33,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.WrongParameterValueException
*
* @author Erich Schubert
*/
-public class RandomParameter extends AbstractParameter<RandomFactory> {
+public class RandomParameter extends AbstractParameter<RandomParameter, RandomFactory> {
/**
* Seed value, if used
*/
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/StringParameter.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/StringParameter.java
index 3a9bbf11..dc2a2a32 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/StringParameter.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/StringParameter.java
@@ -27,7 +27,6 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.ParameterException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.UnspecifiedParameterException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.WrongParameterValueException;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.ParameterConstraint;
/**
* Parameter class for a parameter specifying a string.
@@ -35,38 +34,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.ParameterConstra
* @author Steffi Wanka
* @author Erich Schubert
*/
-public class StringParameter extends AbstractParameter<String> {
- /**
- * Constructs a string parameter with the given optionID, constraints and
- * default value.
- *
- * @param optionID the unique id of the parameter
- * @param constraint parameter constraint
- * @param defaultValue the default value of the parameter
- *
- * @deprecated Use {@link #addConstraint} instead!
- */
- @Deprecated
- public StringParameter(OptionID optionID, ParameterConstraint<String> constraint, String defaultValue) {
- super(optionID, defaultValue);
- addConstraint(constraint);
- }
-
- /**
- * Constructs a string parameter with the given optionID, constraints and
- * default value.
- *
- * @param optionID the unique id of the parameter
- * @param constraint parameter constraint
- *
- * @deprecated Use {@link #addConstraint} instead!
- */
- @Deprecated
- public StringParameter(OptionID optionID, ParameterConstraint<String> constraint) {
- super(optionID);
- addConstraint(constraint);
- }
-
+public class StringParameter extends AbstractParameter<StringParameter, String> {
/**
* Constructs a string parameter with the given optionID, and default value.
*
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/VectorListParameter.java b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/VectorListParameter.java
index 906bfbd8..43fa0797 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/VectorListParameter.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/optionhandling/parameters/VectorListParameter.java
@@ -27,6 +27,7 @@ import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
+import de.lmu.ifi.dbs.elki.utilities.FormatUtil;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.ParameterException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.WrongParameterValueException;
@@ -38,31 +39,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.ParameterConstra
* @author Steffi Wanka
* @author Erich Schubert
*/
-public class VectorListParameter extends ListParameter<List<Double>> {
- /**
- * Constructs a vector list parameter with the given name and description.
- *
- * @param optionID Option ID
- * @param constraints Constraint
- * @param defaultValue Default value
- */
- public VectorListParameter(OptionID optionID, List<ParameterConstraint<List<List<Double>>>> constraints, List<List<Double>> defaultValue) {
- super(optionID, defaultValue);
- addConstraints(constraints);
- }
-
- /**
- * Constructs a vector list parameter with the given name and description.
- *
- * @param optionID Option ID
- * @param constraints Constraints
- * @param optional Optional flag
- */
- public VectorListParameter(OptionID optionID, List<ParameterConstraint<List<List<Double>>>> constraints, boolean optional) {
- super(optionID, optional);
- addConstraints(constraints);
- }
-
+public class VectorListParameter extends ListParameter<VectorListParameter, List<Double>> {
/**
* Constructs a vector list parameter with the given name and description.
*
@@ -139,12 +116,12 @@ public class VectorListParameter extends ListParameter<List<Double>> {
Iterator<Double> veciter = vec.iterator();
while(veciter.hasNext()) {
buf.append(veciter.next().toString());
- if (veciter.hasNext()) {
+ if(veciter.hasNext()) {
buf.append(LIST_SEP);
}
}
// Append separation character
- if (valiter.hasNext()) {
+ if(valiter.hasNext()) {
buf.append(VECTOR_SEP);
}
}
@@ -184,7 +161,7 @@ public class VectorListParameter extends ListParameter<List<Double>> {
ArrayList<Double> vectorCoord = new ArrayList<>();
for(String coordinate : coordinates) {
try {
- vectorCoord.add(Double.valueOf(coordinate));
+ vectorCoord.add(FormatUtil.parseDouble(coordinate));
}
catch(NumberFormatException e) {
throw new WrongParameterValueException("Wrong parameter format! Coordinates of vector \"" + vector + "\" are not valid!");
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/referencepoints/AxisBasedReferencePoints.java b/src/de/lmu/ifi/dbs/elki/utilities/referencepoints/AxisBasedReferencePoints.java
index 24829d98..d8544cd4 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/referencepoints/AxisBasedReferencePoints.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/referencepoints/AxisBasedReferencePoints.java
@@ -32,7 +32,7 @@ import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
@@ -83,7 +83,7 @@ public class AxisBasedReferencePoints<V extends NumberVector<?>> implements Refe
// Compute mean and extend from minmax.
double[] mean = new double[dim];
double[] delta = new double[dim];
- for (int d = 0; d < dim; d++) {
+ for(int d = 0; d < dim; d++) {
mean[d] = (minmax.first.doubleValue(d) + minmax.second.doubleValue(d)) * .5;
delta[d] = spacescale * (minmax.second.doubleValue(d) - mean[d]);
}
@@ -92,21 +92,22 @@ public class AxisBasedReferencePoints<V extends NumberVector<?>> implements Refe
double[] vec = new double[dim];
// Use min and max
- for (int d = 0; d < dim; d++) {
+ for(int d = 0; d < dim; d++) {
vec[d] = mean[d] - delta[d];
}
result.add(factory.newNumberVector(vec));
- for (int d = 0; d < dim; d++) {
+ for(int d = 0; d < dim; d++) {
vec[d] = mean[d] + delta[d];
}
result.add(factory.newNumberVector(vec));
// Plus axis end points:
- for (int i = 0; i < dim; i++) {
- for (int d = 0; d < dim; d++) {
- if (d != i) {
+ for(int i = 0; i < dim; i++) {
+ for(int d = 0; d < dim; d++) {
+ if(d != i) {
vec[d] = mean[d] - delta[d];
- } else {
+ }
+ else {
vec[d] = mean[d] + delta[d];
}
}
@@ -133,8 +134,8 @@ public class AxisBasedReferencePoints<V extends NumberVector<?>> implements Refe
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
DoubleParameter spacescaleP = new DoubleParameter(SPACE_SCALE_ID, 1.0);
- spacescaleP.addConstraint(new GreaterEqualConstraint(0.0));
- if (config.grab(spacescaleP)) {
+ spacescaleP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE);
+ if(config.grab(spacescaleP)) {
spacescale = spacescaleP.getValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/referencepoints/GridBasedReferencePoints.java b/src/de/lmu/ifi/dbs/elki/utilities/referencepoints/GridBasedReferencePoints.java
index b94564cf..007efe6f 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/referencepoints/GridBasedReferencePoints.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/referencepoints/GridBasedReferencePoints.java
@@ -29,10 +29,11 @@ import java.util.Collection;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
+import de.lmu.ifi.dbs.elki.math.MathUtil;
import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -97,23 +98,23 @@ public class GridBasedReferencePoints<V extends NumberVector<?>> implements Refe
// Compute mean from minmax.
double[] mean = new double[dim];
- for (int d = 0; d < dim; d++) {
+ for(int d = 0; d < dim; d++) {
mean[d] = (minmax.first.doubleValue(d) + minmax.second.doubleValue(d)) * .5;
}
- int gridpoints = Math.max(1, (int) Math.pow(gridres + 1, dim));
+ int gridpoints = Math.max(1, MathUtil.ipowi(gridres + 1, dim));
ArrayList<V> result = new ArrayList<>(gridpoints);
double[] delta = new double[dim];
- if (gridres > 0) {
+ if(gridres > 0) {
double halfgrid = gridres / 2.0;
- for (int d = 0; d < dim; d++) {
+ for(int d = 0; d < dim; d++) {
delta[d] = (minmax.second.doubleValue(d) - minmax.first.doubleValue(d)) / gridres;
}
double[] vec = new double[dim];
- for (int i = 0; i < gridpoints; i++) {
+ for(int i = 0; i < gridpoints; i++) {
int acc = i;
- for (int d = 0; d < dim; d++) {
+ for(int d = 0; d < dim; d++) {
int coord = acc % (gridres + 1);
acc = acc / (gridres + 1);
vec[d] = mean[d] + (coord - halfgrid) * delta[d] * gridscale;
@@ -122,7 +123,8 @@ public class GridBasedReferencePoints<V extends NumberVector<?>> implements Refe
// logger.debug("New reference point: " + FormatUtil.format(vec));
result.add(newp);
}
- } else {
+ }
+ else {
result.add(factory.newNumberVector(mean));
// logger.debug("New reference point: " + FormatUtil.format(mean));
}
@@ -152,14 +154,14 @@ public class GridBasedReferencePoints<V extends NumberVector<?>> implements Refe
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
IntParameter gridP = new IntParameter(GRID_ID, 1);
- gridP.addConstraint(new GreaterEqualConstraint(0));
- if (config.grab(gridP)) {
+ gridP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_INT);
+ if(config.grab(gridP)) {
gridres = gridP.getValue();
}
DoubleParameter gridscaleP = new DoubleParameter(GRID_SCALE_ID, 1.0);
- gridscaleP.addConstraint(new GreaterEqualConstraint(0.0));
- if (config.grab(gridscaleP)) {
+ gridscaleP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE);
+ if(config.grab(gridscaleP)) {
gridscale = gridscaleP.getValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/referencepoints/RandomGeneratedReferencePoints.java b/src/de/lmu/ifi/dbs/elki/utilities/referencepoints/RandomGeneratedReferencePoints.java
index 0a59d410..9d866ecc 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/referencepoints/RandomGeneratedReferencePoints.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/referencepoints/RandomGeneratedReferencePoints.java
@@ -32,7 +32,7 @@ import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -97,15 +97,15 @@ public class RandomGeneratedReferencePoints<V extends NumberVector<?>> implement
// Compute mean from minmax.
double[] mean = new double[dim];
double[] delta = new double[dim];
- for (int d = 0; d < dim; d++) {
+ for(int d = 0; d < dim; d++) {
mean[d] = (minmax.first.doubleValue(d + 1) + minmax.second.doubleValue(d + 1)) * .5;
delta[d] = (minmax.second.doubleValue(d + 1) - minmax.first.doubleValue(d + 1));
}
ArrayList<V> result = new ArrayList<>(samplesize);
double[] vec = new double[dim];
- for (int i = 0; i < samplesize; i++) {
- for (int d = 0; d < dim; d++) {
+ for(int i = 0; i < samplesize; i++) {
+ for(int d = 0; d < dim; d++) {
vec[d] = mean[d] + (Math.random() - 0.5) * scale * delta[d];
}
V newp = factory.newNumberVector(vec);
@@ -139,14 +139,14 @@ public class RandomGeneratedReferencePoints<V extends NumberVector<?>> implement
super.makeOptions(config);
IntParameter samplesizeP = new IntParameter(N_ID);
- samplesizeP.addConstraint(new GreaterConstraint(0));
- if (config.grab(samplesizeP)) {
+ samplesizeP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
+ if(config.grab(samplesizeP)) {
samplesize = samplesizeP.getValue();
}
DoubleParameter scaleP = new DoubleParameter(SCALE_ID, 1.0);
- scaleP.addConstraint(new GreaterConstraint(0.0));
- if (config.grab(scaleP)) {
+ scaleP.addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE);
+ if(config.grab(scaleP)) {
scale = scaleP.getValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/referencepoints/RandomSampleReferencePoints.java b/src/de/lmu/ifi/dbs/elki/utilities/referencepoints/RandomSampleReferencePoints.java
index a2a48b30..ea80d9d9 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/referencepoints/RandomSampleReferencePoints.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/referencepoints/RandomSampleReferencePoints.java
@@ -36,7 +36,7 @@ import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.logging.LoggingUtil;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -151,7 +151,7 @@ public class RandomSampleReferencePoints<V extends NumberVector<?>> implements R
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
IntParameter samplesizeP = new IntParameter(N_ID);
- samplesizeP.addConstraint(new GreaterConstraint(0));
+ samplesizeP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
if(config.grab(samplesizeP)) {
samplesize = samplesizeP.intValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/referencepoints/StarBasedReferencePoints.java b/src/de/lmu/ifi/dbs/elki/utilities/referencepoints/StarBasedReferencePoints.java
index 611100b4..74cdf92b 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/referencepoints/StarBasedReferencePoints.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/referencepoints/StarBasedReferencePoints.java
@@ -33,7 +33,7 @@ import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag;
@@ -96,14 +96,14 @@ public class StarBasedReferencePoints<V extends NumberVector<?>> implements Refe
double[] centroid = new double[dim];
double[] min = new double[dim];
double[] max = new double[dim];
- for (int d = 0; d < dim; d++) {
+ for(int d = 0; d < dim; d++) {
centroid[d] = 0;
min[d] = Double.MAX_VALUE;
max[d] = -Double.MAX_VALUE;
}
- for (DBIDIter iditer = database.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for(DBIDIter iditer = database.iterDBIDs(); iditer.valid(); iditer.advance()) {
V obj = database.get(iditer);
- for (int d = 0; d < dim; d++) {
+ for(int d = 0; d < dim; d++) {
double val = obj.doubleValue(d + 1);
centroid[d] += val;
min[d] = Math.min(min[d], val);
@@ -111,21 +111,21 @@ public class StarBasedReferencePoints<V extends NumberVector<?>> implements Refe
}
}
// finish centroid, scale min, max
- for (int d = 0; d < dim; d++) {
+ for(int d = 0; d < dim; d++) {
centroid[d] = centroid[d] / database.size();
min[d] = (min[d] - centroid[d]) * scale + centroid[d];
max[d] = (max[d] - centroid[d]) * scale + centroid[d];
}
ArrayList<V> result = new ArrayList<>(2 * dim + 1);
- if (!nocenter) {
+ if(!nocenter) {
result.add(factory.newNumberVector(centroid));
}
// Plus axis end points through centroid
double[] vec = new double[dim];
- for (int i = 0; i < dim; i++) {
- for (int d = 0; d < dim; d++) {
- if (d != i) {
+ for(int i = 0; i < dim; i++) {
+ for(int d = 0; d < dim; d++) {
+ if(d != i) {
vec[d] = centroid[d];
}
}
@@ -160,13 +160,13 @@ public class StarBasedReferencePoints<V extends NumberVector<?>> implements Refe
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
Flag nocenterF = new Flag(NOCENTER_ID);
- if (config.grab(nocenterF)) {
+ if(config.grab(nocenterF)) {
nocenter = nocenterF.getValue();
}
DoubleParameter scaleP = new DoubleParameter(SCALE_ID, 1.0);
- scaleP.addConstraint(new GreaterEqualConstraint(0.0));
- if (config.grab(scaleP)) {
+ scaleP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE);
+ if(config.grab(scaleP)) {
scale = scaleP.getValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/TopKOutlierScaling.java b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/TopKOutlierScaling.java
index 25103dbc..71a495a6 100644
--- a/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/TopKOutlierScaling.java
+++ b/src/de/lmu/ifi/dbs/elki/utilities/scaling/outlier/TopKOutlierScaling.java
@@ -31,7 +31,7 @@ import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayLikeUtil;
import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -97,36 +97,36 @@ public class TopKOutlierScaling implements OutlierScalingFunction {
@Override
public void prepare(OutlierResult or) {
- if (k <= 0) {
+ if(k <= 0) {
LoggingUtil.warning("No k configured for Top-k outlier scaling!");
}
DBIDIter order = or.getOrdering().iter(or.getOrdering().getDBIDs()).iter();
- for (int i = 0; i < k && order.valid(); i++, order.advance()) {
+ for(int i = 0; i < k && order.valid(); i++, order.advance()) {
cutoff = or.getScores().get(order);
}
max = or.getOutlierMeta().getActualMaximum();
ground = or.getOutlierMeta().getTheoreticalBaseline();
- if (Double.isInfinite(ground) || Double.isNaN(ground)) {
+ if(Double.isInfinite(ground) || Double.isNaN(ground)) {
ground = or.getOutlierMeta().getTheoreticalMinimum();
}
- if (Double.isInfinite(ground) || Double.isNaN(ground)) {
+ if(Double.isInfinite(ground) || Double.isNaN(ground)) {
ground = or.getOutlierMeta().getActualMinimum();
}
- if (Double.isInfinite(ground) || Double.isNaN(ground)) {
+ if(Double.isInfinite(ground) || Double.isNaN(ground)) {
ground = Math.min(0.0, cutoff);
}
}
@Override
public <A> void prepare(A array, NumberArrayAdapter<?, A> adapter) {
- if (k <= 0) {
+ if(k <= 0) {
LoggingUtil.warning("No k configured for Top-k outlier scaling!");
}
double[] scores = ArrayLikeUtil.toPrimitiveDoubleArray(array, adapter);
QuickSelect.quickSelect(scores, k);
cutoff = scores[k - 1];
max = Double.NEGATIVE_INFINITY;
- for (double v : scores) {
+ for(double v : scores) {
max = Math.max(max, v);
}
ground = Math.min(0.0, cutoff);
@@ -134,7 +134,7 @@ public class TopKOutlierScaling implements OutlierScalingFunction {
@Override
public double getMax() {
- if (binary) {
+ if(binary) {
return 1.0;
}
return max;
@@ -142,7 +142,7 @@ public class TopKOutlierScaling implements OutlierScalingFunction {
@Override
public double getMin() {
- if (binary) {
+ if(binary) {
return 0.0;
}
return ground;
@@ -150,16 +150,19 @@ public class TopKOutlierScaling implements OutlierScalingFunction {
@Override
public double getScaled(double value) {
- if (binary) {
- if (value >= cutoff) {
+ if(binary) {
+ if(value >= cutoff) {
return 1;
- } else {
+ }
+ else {
return 0;
}
- } else {
- if (value >= cutoff) {
+ }
+ else {
+ if(value >= cutoff) {
return (value - ground) / (max - ground);
- } else {
+ }
+ else {
return 0.0;
}
}
@@ -181,13 +184,13 @@ public class TopKOutlierScaling implements OutlierScalingFunction {
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
IntParameter kP = new IntParameter(K_ID);
- kP.addConstraint(new GreaterConstraint(1));
- if (config.grab(kP)) {
+ kP.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
+ if(config.grab(kP)) {
k = kP.intValue();
}
Flag binaryF = new Flag(BINARY_ID);
- if (config.grab(binaryF)) {
+ if(config.grab(binaryF)) {
binary = binaryF.isTrue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/visualization/ExportVisualizations.java b/src/de/lmu/ifi/dbs/elki/visualization/ExportVisualizations.java
index 4daf5246..9c19d15f 100644
--- a/src/de/lmu/ifi/dbs/elki/visualization/ExportVisualizations.java
+++ b/src/de/lmu/ifi/dbs/elki/visualization/ExportVisualizations.java
@@ -40,7 +40,7 @@ import de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy.Hierarchy;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.FileParameter;
@@ -130,44 +130,44 @@ public class ExportVisualizations implements ResultHandler {
@Override
public void processNewResult(HierarchicalResult baseResult, Result newResult) {
- if (output.isFile()) {
+ if(output.isFile()) {
throw new AbortException("Output folder cannot be an existing file.");
}
- if (!output.exists()) {
- if (!output.mkdirs()) {
+ if(!output.exists()) {
+ if(!output.mkdirs()) {
throw new AbortException("Could not create output directory.");
}
}
- if (this.baseResult != baseResult) {
+ if(this.baseResult != baseResult) {
this.baseResult = baseResult;
context = null;
counter = 0;
LOG.verbose("Note: Reusing visualization exporter for more than one result is untested.");
}
- if (context == null) {
+ if(context == null) {
context = manager.newContext(baseResult);
}
// Projected visualizations
ArrayList<Projector> projectors = ResultUtil.filterResults(baseResult, Projector.class);
- for (Projector proj : projectors) {
+ for(Projector proj : projectors) {
// TODO: allow selecting individual projections only.
Collection<PlotItem> items = proj.arrange();
- for (PlotItem item : items) {
+ for(PlotItem item : items) {
processItem(item);
}
}
ResultHierarchy hier = baseResult.getHierarchy();
ArrayList<VisualizationTask> tasks = ResultUtil.filterResults(baseResult, VisualizationTask.class);
- for (VisualizationTask task : tasks) {
+ for(VisualizationTask task : tasks) {
boolean isprojected = false;
- for (Hierarchy.Iter<Result> iter = hier.iterParents(task); iter.valid(); iter.advance()) {
- if (iter.get() instanceof Projector) {
+ for(Hierarchy.Iter<Result> iter = hier.iterParents(task); iter.valid(); iter.advance()) {
+ if(iter.get() instanceof Projector) {
isprojected = true;
break;
}
}
- if (isprojected) {
+ if(isprojected) {
continue;
}
PlotItem pi = new PlotItem(ratio, 1.0, null);
@@ -180,11 +180,11 @@ public class ExportVisualizations implements ResultHandler {
final double height = 1;
final double width = ratio * height;
// Descend into subitems
- for (Iterator<PlotItem> iter = item.subitems.iterator(); iter.hasNext();) {
+ for(Iterator<PlotItem> iter = item.subitems.iterator(); iter.hasNext();) {
PlotItem subitem = iter.next();
processItem(subitem);
}
- if (item.taskSize() <= 0) {
+ if(item.taskSize() <= 0) {
return;
}
item.sort();
@@ -195,29 +195,32 @@ public class ExportVisualizations implements ResultHandler {
svgp.getRoot().setAttribute(SVGConstants.SVG_VIEW_BOX_ATTRIBUTE, "0 0 " + width + " " + height);
ArrayList<Visualization> layers = new ArrayList<>();
- for (Iterator<VisualizationTask> iter = item.tasks.iterator(); iter.hasNext();) {
+ for(Iterator<VisualizationTask> iter = item.tasks.iterator(); iter.hasNext();) {
VisualizationTask task = iter.next();
- if (task.nodetail || task.noexport || !task.visible) {
+ if(task.nodetail || task.noexport || !task.visible) {
continue;
}
try {
Visualization v = task.getFactory().makeVisualization(task.clone(svgp, context, item.proj, width, height));
layers.add(v);
- } catch (Exception e) {
- if (Logging.getLogger(task.getFactory().getClass()).isDebugging()) {
+ }
+ catch(Exception e) {
+ if(Logging.getLogger(task.getFactory().getClass()).isDebugging()) {
LoggingUtil.exception("Visualization failed.", e);
- } else {
+ }
+ else {
LoggingUtil.warning("Visualizer " + task.getFactory().getClass().getName() + " failed - enable debugging to see details.");
}
}
}
- if (layers.size() <= 0) {
+ if(layers.size() <= 0) {
return;
}
- for (Visualization layer : layers) {
- if (layer.getLayer() != null) {
+ for(Visualization layer : layers) {
+ if(layer.getLayer() != null) {
svgp.getRoot().appendChild(layer.getLayer());
- } else {
+ }
+ else {
LoggingUtil.warning("NULL layer seen.");
}
}
@@ -227,10 +230,11 @@ public class ExportVisualizations implements ResultHandler {
File outname = new File(output, "plot-" + counter + ".svg");
try {
svgp.saveAsSVG(outname);
- } catch (Exception e) {
+ }
+ catch(Exception e) {
LOG.warning("Export of visualization failed.", e);
}
- for (Visualization layer : layers) {
+ for(Visualization layer : layers) {
layer.destroy();
}
counter++;
@@ -263,13 +267,13 @@ public class ExportVisualizations implements ResultHandler {
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
FileParameter outputP = new FileParameter(FOLDER_ID, FileType.OUTPUT_FILE);
- if (config.grab(outputP)) {
+ if(config.grab(outputP)) {
output = outputP.getValue();
}
DoubleParameter ratioP = new DoubleParameter(RATIO_ID, 1.33);
- ratioP.addConstraint(new GreaterConstraint(0.0));
- if (config.grab(ratioP)) {
+ ratioP.addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE);
+ if(config.grab(ratioP)) {
ratio = ratioP.doubleValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/visualization/VisualizerParameterizer.java b/src/de/lmu/ifi/dbs/elki/visualization/VisualizerParameterizer.java
index f49ddde6..69976532 100644
--- a/src/de/lmu/ifi/dbs/elki/visualization/VisualizerParameterizer.java
+++ b/src/de/lmu/ifi/dbs/elki/visualization/VisualizerParameterizer.java
@@ -29,7 +29,7 @@ import java.util.Collection;
import java.util.List;
import java.util.regex.Pattern;
-import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.DistanceBasedAlgorithm;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
@@ -48,7 +48,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.Parameterizable;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.WrongParameterValueException;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.MergedParameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -84,18 +84,18 @@ public class VisualizerParameterizer implements Parameterizable {
* <p>
* Key: -visualizer.stylesheet
*
- * Default: default properties file
- * <br>
+ * Default: default properties file <br>
* included stylesheets:
* <ul>
- * <li>classic</li>
- * <li>default</li>
- * <li>greyscale</li>
- * <li>neon</li>
- * <li>presentation</li>
- * <li>print</li>
+ * <li>classic</li>
+ * <li>default</li>
+ * <li>greyscale</li>
+ * <li>neon</li>
+ * <li>presentation</li>
+ * <li>print</li>
* </ul>
- * These are {@code *.properties} files in the package {@link de.lmu.ifi.dbs.elki.visualization.style}.
+ * These are {@code *.properties} files in the package
+ * {@link de.lmu.ifi.dbs.elki.visualization.style}.
* </p>
*
*
@@ -178,14 +178,14 @@ public class VisualizerParameterizer implements Parameterizable {
* @return New context
*/
public VisualizerContext newContext(HierarchicalResult result) {
- if (samplesize > 0) {
+ if(samplesize > 0) {
Collection<Relation<?>> rels = ResultUtil.filterResults(result, Relation.class);
- for (Relation<?> rel : rels) {
- if (!ResultUtil.filterResults(rel, SamplingResult.class).isEmpty()) {
+ for(Relation<?> rel : rels) {
+ if(!ResultUtil.filterResults(rel, SamplingResult.class).isEmpty()) {
continue;
}
int size = rel.size();
- if (size > samplesize) {
+ if(size > samplesize) {
SamplingResult sample = new SamplingResult(rel);
sample.setSample(DBIDUtil.randomSample(sample.getSample(), samplesize, rnd));
ResultUtil.addChildResult(rel, sample);
@@ -205,53 +205,53 @@ public class VisualizerParameterizer implements Parameterizable {
*/
public static String getTitle(Database db, Result result) {
List<Pair<Object, Parameter<?>>> settings = new ArrayList<>();
- for (SettingsResult sr : ResultUtil.getSettingsResults(result)) {
+ for(SettingsResult sr : ResultUtil.getSettingsResults(result)) {
settings.addAll(sr.getSettings());
}
String algorithm = null;
String distance = null;
String dataset = null;
- for (Pair<Object, Parameter<?>> setting : settings) {
- if (setting.second.equals(AlgorithmStep.Parameterizer.ALGORITHM_ID)) {
+ for(Pair<Object, Parameter<?>> setting : settings) {
+ if(setting.second.equals(AlgorithmStep.Parameterizer.ALGORITHM_ID)) {
algorithm = setting.second.getValue().toString();
}
- if (setting.second.equals(AbstractDistanceBasedAlgorithm.DISTANCE_FUNCTION_ID)) {
+ if(setting.second.equals(DistanceBasedAlgorithm.DISTANCE_FUNCTION_ID)) {
distance = setting.second.getValue().toString();
}
- if (setting.second.equals(FileBasedDatabaseConnection.INPUT_ID)) {
+ if(setting.second.equals(FileBasedDatabaseConnection.Parameterizer.INPUT_ID)) {
dataset = setting.second.getValue().toString();
}
}
StringBuilder buf = new StringBuilder();
- if (algorithm != null) {
+ if(algorithm != null) {
// shorten the algorithm
- if (algorithm.contains(".")) {
+ if(algorithm.contains(".")) {
algorithm = algorithm.substring(algorithm.lastIndexOf('.') + 1);
}
buf.append(algorithm);
}
- if (distance != null) {
+ if(distance != null) {
// shorten the distance
- if (distance.contains(".")) {
+ if(distance.contains(".")) {
distance = distance.substring(distance.lastIndexOf('.') + 1);
}
- if (buf.length() > 0) {
+ if(buf.length() > 0) {
buf.append(" using ");
}
buf.append(distance);
}
- if (dataset != null) {
+ if(dataset != null) {
// shorten the data set filename
- if (dataset.contains(File.separator)) {
+ if(dataset.contains(File.separator)) {
dataset = dataset.substring(dataset.lastIndexOf(File.separator) + 1);
}
- if (buf.length() > 0) {
+ if(buf.length() > 0) {
buf.append(" on ");
}
buf.append(dataset);
}
- if (buf.length() > 0) {
+ if(buf.length() > 0) {
return buf.toString();
}
return null;
@@ -279,22 +279,23 @@ public class VisualizerParameterizer implements Parameterizable {
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
IntParameter samplingP = new IntParameter(SAMPLING_ID, 10000);
- samplingP.addConstraint(new GreaterEqualConstraint(-1));
- if (config.grab(samplingP)) {
+ samplingP.addConstraint(CommonConstraints.GREATER_EQUAL_MINUSONE_INT);
+ if(config.grab(samplingP)) {
samplesize = samplingP.intValue();
}
StringParameter stylelibP = new StringParameter(STYLELIB_ID, PropertiesBasedStyleLibrary.DEFAULT_SCHEME_FILENAME);
- if (config.grab(stylelibP)) {
+ if(config.grab(stylelibP)) {
String filename = stylelibP.getValue();
try {
stylelib = new PropertiesBasedStyleLibrary(filename, "Command line style");
- } catch (AbortException e) {
+ }
+ catch(AbortException e) {
config.reportError(new WrongParameterValueException(stylelibP, filename, e));
}
}
PatternParameter enablevisP = new PatternParameter(ENABLEVIS_ID, DEFAULT_ENABLEVIS);
- if (config.grab(enablevisP)) {
- if (!"all".equals(enablevisP.getValueAsString())) {
+ if(config.grab(enablevisP)) {
+ if(!"all".equals(enablevisP.getValueAsString())) {
enableVisualizers = enablevisP.getValue();
}
}
@@ -312,18 +313,20 @@ public class VisualizerParameterizer implements Parameterizable {
*/
private static <O> Collection<ProjectorFactory> collectProjectorFactorys(MergedParameterization config, Pattern filter) {
ArrayList<ProjectorFactory> factories = new ArrayList<>();
- for (Class<?> c : InspectionUtil.cachedFindAllImplementations(ProjectorFactory.class)) {
- if (filter != null && !filter.matcher(c.getCanonicalName()).find()) {
+ for(Class<?> c : InspectionUtil.cachedFindAllImplementations(ProjectorFactory.class)) {
+ if(filter != null && !filter.matcher(c.getCanonicalName()).find()) {
continue;
}
try {
config.rewind();
ProjectorFactory a = ClassGenericsUtil.tryInstantiate(ProjectorFactory.class, c, config);
factories.add(a);
- } catch (Throwable e) {
- if (LOG.isDebugging()) {
+ }
+ catch(Throwable e) {
+ if(LOG.isDebugging()) {
LOG.exception("Error instantiating visualization factory " + c.getName(), e.getCause());
- } else {
+ }
+ else {
LOG.warning("Error instantiating visualization factory " + c.getName() + ": " + e.getMessage());
}
}
@@ -340,18 +343,20 @@ public class VisualizerParameterizer implements Parameterizable {
*/
private static <O> Collection<VisFactory> collectVisFactorys(MergedParameterization config, Pattern filter) {
ArrayList<VisFactory> factories = new ArrayList<>();
- for (Class<?> c : InspectionUtil.cachedFindAllImplementations(VisFactory.class)) {
- if (filter != null && !filter.matcher(c.getCanonicalName()).find()) {
+ for(Class<?> c : InspectionUtil.cachedFindAllImplementations(VisFactory.class)) {
+ if(filter != null && !filter.matcher(c.getCanonicalName()).find()) {
continue;
}
try {
config.rewind();
VisFactory a = ClassGenericsUtil.tryInstantiate(VisFactory.class, c, config);
factories.add(a);
- } catch (Throwable e) {
- if (LOG.isDebugging()) {
+ }
+ catch(Throwable e) {
+ if(LOG.isDebugging()) {
LOG.exception("Error instantiating visualization factory " + c.getName(), e.getCause());
- } else {
+ }
+ else {
LOG.warning("Error instantiating visualization factory " + c.getName() + ": " + e.getMessage());
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/visualization/gui/SelectionTableWindow.java b/src/de/lmu/ifi/dbs/elki/visualization/gui/SelectionTableWindow.java
index 6e1b4a66..70cd07a9 100644
--- a/src/de/lmu/ifi/dbs/elki/visualization/gui/SelectionTableWindow.java
+++ b/src/de/lmu/ifi/dbs/elki/visualization/gui/SelectionTableWindow.java
@@ -327,7 +327,7 @@ public class SelectionTableWindow extends JFrame implements DataStoreListener, R
double[] vals = new double[dimensionality];
for(int d = 0; d < dimensionality; d++) {
if(d == columnIndex - 3) {
- vals[d] = Double.parseDouble((String) aValue);
+ vals[d] = FormatUtil.parseDouble((String) aValue);
}
else {
vals[d] = obj.doubleValue(d + 1);
diff --git a/src/de/lmu/ifi/dbs/elki/visualization/projections/AffineProjection.java b/src/de/lmu/ifi/dbs/elki/visualization/projections/AffineProjection.java
index 7522b57c..f29c1d50 100644
--- a/src/de/lmu/ifi/dbs/elki/visualization/projections/AffineProjection.java
+++ b/src/de/lmu/ifi/dbs/elki/visualization/projections/AffineProjection.java
@@ -210,7 +210,8 @@ public class AffineProjection extends AbstractFullProjection implements Projecti
x += colx[vr.length];
y += coly[vr.length];
s += cols[vr.length];
- assert (s > 0.0 || s < 0.0);
+ // Note: we may have NaN values here.
+ // assert (s > 0.0 || s < 0.0);
return new double[] { x / s, y / s };
}
diff --git a/src/de/lmu/ifi/dbs/elki/visualization/projector/HistogramFactory.java b/src/de/lmu/ifi/dbs/elki/visualization/projector/HistogramFactory.java
index 61921296..894bd264 100644
--- a/src/de/lmu/ifi/dbs/elki/visualization/projector/HistogramFactory.java
+++ b/src/de/lmu/ifi/dbs/elki/visualization/projector/HistogramFactory.java
@@ -33,7 +33,7 @@ import de.lmu.ifi.dbs.elki.result.HierarchicalResult;
import de.lmu.ifi.dbs.elki.result.Result;
import de.lmu.ifi.dbs.elki.result.ResultUtil;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -91,7 +91,7 @@ public class HistogramFactory implements ProjectorFactory {
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
IntParameter maxdimP = new IntParameter(ScatterPlotFactory.Parameterizer.MAXDIM_ID, ScatterPlotFactory.MAX_DIMENSIONS_DEFAULT);
- maxdimP.addConstraint(new GreaterEqualConstraint(1));
+ maxdimP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_INT);
if(config.grab(maxdimP)) {
maxdim = maxdimP.intValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/visualization/projector/ScatterPlotFactory.java b/src/de/lmu/ifi/dbs/elki/visualization/projector/ScatterPlotFactory.java
index 101f52a6..f1d61698 100644
--- a/src/de/lmu/ifi/dbs/elki/visualization/projector/ScatterPlotFactory.java
+++ b/src/de/lmu/ifi/dbs/elki/visualization/projector/ScatterPlotFactory.java
@@ -34,7 +34,7 @@ import de.lmu.ifi.dbs.elki.result.Result;
import de.lmu.ifi.dbs.elki.result.ResultUtil;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -108,7 +108,7 @@ public class ScatterPlotFactory implements ProjectorFactory {
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
IntParameter maxdimP = new IntParameter(MAXDIM_ID, MAX_DIMENSIONS_DEFAULT);
- maxdimP.addConstraint(new GreaterEqualConstraint(1));
+ maxdimP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_INT);
if(config.grab(maxdimP)) {
maxdim = maxdimP.intValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/visualization/style/PropertiesBasedStyleLibrary.java b/src/de/lmu/ifi/dbs/elki/visualization/style/PropertiesBasedStyleLibrary.java
index ee9c9b3e..d936a9d5 100644
--- a/src/de/lmu/ifi/dbs/elki/visualization/style/PropertiesBasedStyleLibrary.java
+++ b/src/de/lmu/ifi/dbs/elki/visualization/style/PropertiesBasedStyleLibrary.java
@@ -32,6 +32,7 @@ import java.util.Properties;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.utilities.FileUtil;
+import de.lmu.ifi.dbs.elki.utilities.FormatUtil;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.visualization.colors.ColorLibrary;
import de.lmu.ifi.dbs.elki.visualization.colors.ListBasedColorLibrary;
@@ -249,7 +250,7 @@ public class PropertiesBasedStyleLibrary implements StyleLibrary {
Double lw = getCached(key, LINE_WIDTH, Double.class);
if (lw == null) {
try {
- lw = Double.valueOf(Double.parseDouble(getPropertyValue(key, LINE_WIDTH)) * SCALE);
+ lw = Double.valueOf(FormatUtil.parseDouble(getPropertyValue(key, LINE_WIDTH)) * SCALE);
} catch (NullPointerException e) {
throw new AbortException("Missing/invalid value in style library: " + key + '.' + LINE_WIDTH);
}
@@ -262,7 +263,7 @@ public class PropertiesBasedStyleLibrary implements StyleLibrary {
Double lw = getCached(key, TEXT_SIZE, Double.class);
if (lw == null) {
try {
- lw = Double.valueOf(Double.parseDouble(getPropertyValue(key, TEXT_SIZE)) * SCALE);
+ lw = Double.valueOf(FormatUtil.parseDouble(getPropertyValue(key, TEXT_SIZE)) * SCALE);
} catch (NullPointerException e) {
throw new AbortException("Missing/invalid value in style library: " + key + '.' + TEXT_SIZE);
}
@@ -280,7 +281,7 @@ public class PropertiesBasedStyleLibrary implements StyleLibrary {
Double lw = getCached(key, GENERIC_SIZE, Double.class);
if (lw == null) {
try {
- lw = Double.valueOf(Double.parseDouble(getPropertyValue(key, GENERIC_SIZE)) * SCALE);
+ lw = Double.valueOf(FormatUtil.parseDouble(getPropertyValue(key, GENERIC_SIZE)) * SCALE);
} catch (NullPointerException e) {
throw new AbortException("Missing/invalid value in style library: " + key + '.' + GENERIC_SIZE);
}
@@ -293,7 +294,7 @@ public class PropertiesBasedStyleLibrary implements StyleLibrary {
Double lw = getCached(key, OPACITY, Double.class);
if (lw == null) {
try {
- lw = Double.valueOf(Double.parseDouble(getPropertyValue(key, OPACITY)));
+ lw = Double.valueOf(FormatUtil.parseDouble(getPropertyValue(key, OPACITY)));
} catch (NullPointerException e) {
throw new AbortException("Missing/invalid value in style library: " + key + '.' + OPACITY);
}
diff --git a/src/de/lmu/ifi/dbs/elki/visualization/visualizers/histogram/ColoredHistogramVisualizer.java b/src/de/lmu/ifi/dbs/elki/visualization/visualizers/histogram/ColoredHistogramVisualizer.java
index fd7edbd4..eb737a6d 100644
--- a/src/de/lmu/ifi/dbs/elki/visualization/visualizers/histogram/ColoredHistogramVisualizer.java
+++ b/src/de/lmu/ifi/dbs/elki/visualization/visualizers/histogram/ColoredHistogramVisualizer.java
@@ -45,7 +45,7 @@ import de.lmu.ifi.dbs.elki.utilities.datastructures.histogram.DoubleArrayStaticH
import de.lmu.ifi.dbs.elki.utilities.exceptions.ObjectNotFoundException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -113,9 +113,9 @@ public class ColoredHistogramVisualizer extends AbstractVisFactory {
public void processNewResult(HierarchicalResult baseResult, Result result) {
// Find a style result to visualize:
Collection<StyleResult> styleres = ResultUtil.filterResults(result, StyleResult.class);
- for (StyleResult c : styleres) {
+ for(StyleResult c : styleres) {
Collection<HistogramProjector<?>> ps = ResultUtil.filterResults(baseResult, HistogramProjector.class);
- for (HistogramProjector<?> p : ps) {
+ for(HistogramProjector<?> p : ps) {
// register self
final VisualizationTask task = new VisualizationTask(CNAME, c, p.getRelation(), this);
task.level = VisualizationTask.LEVEL_DATA;
@@ -196,9 +196,10 @@ public class ColoredHistogramVisualizer extends AbstractVisFactory {
// Styling policy
final StylingPolicy spol = style.getStylingPolicy();
final ClassStylingPolicy cspol;
- if (spol instanceof ClassStylingPolicy) {
+ if(spol instanceof ClassStylingPolicy) {
cspol = (ClassStylingPolicy) spol;
- } else {
+ }
+ else {
cspol = null;
}
// TODO also use min style?
@@ -212,35 +213,37 @@ public class ColoredHistogramVisualizer extends AbstractVisFactory {
final int cols = numc + 1;
DoubleArrayStaticHistogram histogram = new DoubleArrayStaticHistogram(settings.bins, -.5, .5, cols);
- if (cspol != null) {
- for (int snum = 0; snum < numc; snum++) {
+ if(cspol != null) {
+ for(int snum = 0; snum < numc; snum++) {
double[] inc = new double[cols];
inc[0] = frac;
inc[snum + 1] = frac;
- for (DBIDIter iter = cspol.iterateClass(snum + off); iter.valid(); iter.advance()) {
- if (!sample.getSample().contains(iter)) {
+ for(DBIDIter iter = cspol.iterateClass(snum + off); iter.valid(); iter.advance()) {
+ if(!sample.getSample().contains(iter)) {
continue; // TODO: can we test more efficiently than this?
}
try {
double pos = proj.fastProjectDataToRenderSpace(relation.get(iter)) / Projection.SCALE;
histogram.increment(pos, inc);
- } catch (ObjectNotFoundException e) {
+ }
+ catch(ObjectNotFoundException e) {
// Ignore. The object was probably deleted from the database
}
}
}
- } else {
+ }
+ else {
// Actual data distribution.
double[] inc = new double[cols];
inc[0] = frac;
- for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double pos = proj.fastProjectDataToRenderSpace(relation.get(iditer)) / Projection.SCALE;
histogram.increment(pos, inc);
}
}
// for scaling, get the maximum occurring value in the bins:
- for (DoubleArrayStaticHistogram.Iter iter = histogram.iter(); iter.valid(); iter.advance()) {
- for (double val : iter.getValue()) {
+ for(DoubleArrayStaticHistogram.Iter iter = histogram.iter(); iter.valid(); iter.advance()) {
+ for(double val : iter.getValue()) {
minmax.put(val);
}
}
@@ -255,29 +258,30 @@ public class ColoredHistogramVisualizer extends AbstractVisFactory {
// draw axes that are non-trivial
final int dimensionality = RelationUtil.dimensionality(relation);
double orig = proj.fastProjectScaledToRender(new Vector(dimensionality));
- for (int d = 0; d < dimensionality; d++) {
+ for(int d = 0; d < dimensionality; d++) {
Vector v = new Vector(dimensionality);
v.set(d, 1);
// projected endpoint of axis
double ax = proj.fastProjectScaledToRender(v);
- if (ax < orig || ax > orig) {
+ if(ax < orig || ax > orig) {
final double left = (orig / Projection.SCALE + 0.5) * xsize;
final double right = (ax / Projection.SCALE + 0.5) * xsize;
SVGSimpleLinearAxis.drawAxis(svgp, layer, proj.getScale(d), left, ysize, right, ysize, SVGSimpleLinearAxis.LabelStyle.RIGHTHAND, style.getStyleLibrary());
}
}
- } catch (CSSNamingConflict e) {
+ }
+ catch(CSSNamingConflict e) {
LoggingUtil.exception("CSS class exception in axis class.", e);
}
// Visualizing
- if (!settings.curves) {
- for (DoubleArrayStaticHistogram.Iter iter = histogram.iter(); iter.valid(); iter.advance()) {
+ if(!settings.curves) {
+ for(DoubleArrayStaticHistogram.Iter iter = histogram.iter(); iter.valid(); iter.advance()) {
double lpos = xscale.getScaled(iter.getLeft());
double rpos = xscale.getScaled(iter.getRight());
double stack = 0.0;
final int start = numc > 0 ? 1 : 0;
- for (int key = start; key < cols; key++) {
+ for(int key = start; key < cols; key++) {
double val = yscale.getScaled(iter.getValue()[key]);
Element row = SVGUtil.svgRect(svgp.getDocument(), xsize * lpos, ysize * (1 - (val + stack)), xsize * (rpos - lpos), ysize * val);
stack = stack + val;
@@ -285,24 +289,25 @@ public class ColoredHistogramVisualizer extends AbstractVisFactory {
layer.appendChild(row);
}
}
- } else {
+ }
+ else {
double left = xscale.getScaled(histogram.getCoverMinimum());
double right = left;
SVGPath[] paths = new SVGPath[cols];
double[] lasty = new double[cols];
- for (int i = 0; i < cols; i++) {
+ for(int i = 0; i < cols; i++) {
paths[i] = new SVGPath(xsize * left, ysize * 1);
lasty[i] = 0;
}
// draw histogram lines
- for (DoubleArrayStaticHistogram.Iter iter = histogram.iter(); iter.valid(); iter.advance()) {
+ for(DoubleArrayStaticHistogram.Iter iter = histogram.iter(); iter.valid(); iter.advance()) {
left = xscale.getScaled(iter.getLeft());
right = xscale.getScaled(iter.getRight());
- for (int i = 0; i < cols; i++) {
+ for(int i = 0; i < cols; i++) {
double val = yscale.getScaled(iter.getValue()[i]);
- if (lasty[i] > val || lasty[i] < val) {
+ if(lasty[i] > val || lasty[i] < val) {
paths[i].lineTo(xsize * left, ysize * (1 - lasty[i]));
paths[i].lineTo(xsize * left, ysize * (1 - val));
paths[i].lineTo(xsize * right, ysize * (1 - val));
@@ -311,8 +316,8 @@ public class ColoredHistogramVisualizer extends AbstractVisFactory {
}
}
// close and insert all lines.
- for (int i = 0; i < cols; i++) {
- if (lasty[i] != 0) {
+ for(int i = 0; i < cols; i++) {
+ if(lasty[i] != 0) {
paths[i].lineTo(xsize * right, ysize * (1 - lasty[i]));
}
paths[i].lineTo(xsize * right, ysize * 1);
@@ -333,22 +338,24 @@ public class ColoredHistogramVisualizer extends AbstractVisFactory {
ColorLibrary colors = style.getStyleLibrary().getColorSet(StyleLibrary.PLOT);
CSSClass allInOne = new CSSClass(svgp, BIN + -1);
- if (!settings.curves) {
+ if(!settings.curves) {
allInOne.setStatement(SVGConstants.CSS_FILL_PROPERTY, SVGConstants.CSS_BLACK_VALUE);
allInOne.setStatement(SVGConstants.CSS_FILL_OPACITY_PROPERTY, 1.0);
- } else {
+ }
+ else {
allInOne.setStatement(SVGConstants.CSS_STROKE_PROPERTY, SVGConstants.CSS_BLACK_VALUE);
allInOne.setStatement(SVGConstants.CSS_STROKE_WIDTH_PROPERTY, style.getStyleLibrary().getLineWidth(StyleLibrary.PLOT));
allInOne.setStatement(SVGConstants.CSS_FILL_PROPERTY, SVGConstants.CSS_NONE_VALUE);
}
svgp.addCSSClassOrLogError(allInOne);
- for (int clusterID = 0; clusterID < numc; clusterID++) {
+ for(int clusterID = 0; clusterID < numc; clusterID++) {
CSSClass bin = new CSSClass(svgp, BIN + clusterID);
- if (!settings.curves) {
+ if(!settings.curves) {
bin.setStatement(SVGConstants.CSS_FILL_PROPERTY, colors.getColor(clusterID));
- } else {
+ }
+ else {
bin.setStatement(SVGConstants.CSS_STROKE_PROPERTY, colors.getColor(clusterID));
bin.setStatement(SVGConstants.CSS_STROKE_WIDTH_PROPERTY, style.getStyleLibrary().getLineWidth(StyleLibrary.PLOT));
bin.setStatement(SVGConstants.CSS_FILL_PROPERTY, SVGConstants.CSS_NONE_VALUE);
@@ -399,12 +406,12 @@ public class ColoredHistogramVisualizer extends AbstractVisFactory {
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
Flag curvesF = new Flag(STYLE_CURVES_ID);
- if (config.grab(curvesF)) {
+ if(config.grab(curvesF)) {
curves = curvesF.isTrue();
}
IntParameter binsP = new IntParameter(HISTOGRAM_BINS_ID, DEFAULT_BINS);
- binsP.addConstraint(new GreaterEqualConstraint(2));
- if (config.grab(binsP)) {
+ binsP.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
+ if(config.grab(binsP)) {
bins = binsP.intValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/visualization/visualizers/pairsegments/CircleSegmentsVisualizer.java b/src/de/lmu/ifi/dbs/elki/visualization/visualizers/pairsegments/CircleSegmentsVisualizer.java
index 9718925f..ff281a35 100644
--- a/src/de/lmu/ifi/dbs/elki/visualization/visualizers/pairsegments/CircleSegmentsVisualizer.java
+++ b/src/de/lmu/ifi/dbs/elki/visualization/visualizers/pairsegments/CircleSegmentsVisualizer.java
@@ -48,6 +48,7 @@ import de.lmu.ifi.dbs.elki.result.HierarchicalResult;
import de.lmu.ifi.dbs.elki.result.Result;
import de.lmu.ifi.dbs.elki.result.ResultListener;
import de.lmu.ifi.dbs.elki.result.ResultUtil;
+import de.lmu.ifi.dbs.elki.utilities.FormatUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.visualization.VisualizationTask;
@@ -319,7 +320,7 @@ public class CircleSegmentsVisualizer extends AbstractVisFactory {
// Add ring:clustering info
Element clrInfo = drawClusteringInfo();
- Element c = checkbox.renderCheckBox(svgp, 1, 5 + Double.parseDouble(clrInfo.getAttribute(SVGConstants.SVG_HEIGHT_ATTRIBUTE)), 11);
+ Element c = checkbox.renderCheckBox(svgp, 1., 5. + FormatUtil.parseDouble(clrInfo.getAttribute(SVGConstants.SVG_HEIGHT_ATTRIBUTE)), 11);
ctrlLayer.appendChild(clrInfo);
ctrlLayer.appendChild(c);
diff --git a/src/de/lmu/ifi/dbs/elki/visualization/visualizers/parallel/LineVisualization.java b/src/de/lmu/ifi/dbs/elki/visualization/visualizers/parallel/LineVisualization.java
index e9726ba6..1290372f 100644
--- a/src/de/lmu/ifi/dbs/elki/visualization/visualizers/parallel/LineVisualization.java
+++ b/src/de/lmu/ifi/dbs/elki/visualization/visualizers/parallel/LineVisualization.java
@@ -31,6 +31,9 @@ import org.w3c.dom.Element;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreListener;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.result.HierarchicalResult;
import de.lmu.ifi.dbs.elki.result.Result;
import de.lmu.ifi.dbs.elki.result.ResultUtil;
@@ -64,8 +67,7 @@ public class LineVisualization extends AbstractVisFactory {
public static final String NAME = "Data lines";
/**
- * Constructor, adhering to
- * {@link de.lmu.ifi.dbs.elki.utilities.optionhandling.Parameterizable}
+ * Constructor.
*/
public LineVisualization() {
super();
@@ -137,37 +139,34 @@ public class LineVisualization extends AbstractVisFactory {
StylingPolicy sp = context.getStyleResult().getStylingPolicy();
addCSSClasses(svgp, sp);
- DBIDIter ids = sample.getSample().iter();
- if(ids == null || !ids.valid()) {
- ids = relation.iterDBIDs();
- }
if(sp instanceof ClassStylingPolicy) {
+ final DBIDs sam = DBIDUtil.ensureSet(sample.getSample());
ClassStylingPolicy csp = (ClassStylingPolicy) sp;
for(int c = csp.getMinStyle(); c < csp.getMaxStyle(); c++) {
String key = DATALINE + "_" + c;
for(DBIDIter iter = csp.iterateClass(c); iter.valid(); iter.advance()) {
- if(!sample.getSample().contains(iter)) {
+ if(!sam.contains(iter)) {
continue; // TODO: can we test more efficiently than this?
}
- SVGPath path = new SVGPath();
- double[] yPos = proj.fastProjectDataToRenderSpace(relation.get(iter));
- for(int i = 0; i < yPos.length; i++) {
- path.drawTo(getVisibleAxisX(i), yPos[i]);
+ Element line = drawLine(iter);
+ if(line == null) {
+ continue;
}
- Element line = path.makeElement(svgp);
SVGUtil.addCSSClass(line, key);
layer.appendChild(line);
}
}
}
else {
+ DBIDIter ids = sample.getSample().iter();
+ if(ids == null || !ids.valid()) {
+ ids = relation.iterDBIDs();
+ }
for(; ids.valid(); ids.advance()) {
- SVGPath path = new SVGPath();
- double[] yPos = proj.fastProjectDataToRenderSpace(relation.get(ids));
- for(int i = 0; i < yPos.length; i++) {
- path.drawTo(getVisibleAxisX(i), yPos[i]);
+ Element line = drawLine(ids);
+ if(line == null) {
+ continue;
}
- Element line = path.makeElement(svgp);
SVGUtil.addCSSClass(line, DATALINE);
// assign color
line.setAttribute(SVGConstants.SVG_STYLE_ATTRIBUTE, SVGConstants.CSS_STROKE_PROPERTY + ":" + SVGUtil.colorToString(sp.getColorForDBID(ids)));
@@ -177,6 +176,42 @@ public class LineVisualization extends AbstractVisFactory {
}
/**
+ * Draw a single line.
+ *
+ * @param iter Object reference
+ * @return Line element
+ */
+ private Element drawLine(DBIDRef iter) {
+ SVGPath path = new SVGPath();
+ double[] yPos = proj.fastProjectDataToRenderSpace(relation.get(iter));
+ boolean draw = false, drawprev = false, drawn = false;
+ for(int i = 0; i < yPos.length; i++) {
+ // NaN handling:
+ if(yPos[i] != yPos[i]) {
+ draw = false;
+ drawprev = false;
+ continue;
+ }
+ if(draw) {
+ if(drawprev) {
+ path.moveTo(getVisibleAxisX(i - 1), yPos[i - 1]);
+ drawprev = false;
+ }
+ path.lineTo(getVisibleAxisX(i), yPos[i]);
+ drawn = true;
+ }
+ else {
+ drawprev = true;
+ }
+ draw = true;
+ }
+ if(!drawn) {
+ return null; // Not enough data.
+ }
+ return path.makeElement(svgp);
+ }
+
+ /**
* Adds the required CSS-Classes
*
* @param svgp SVG-Plot
diff --git a/src/de/lmu/ifi/dbs/elki/visualization/visualizers/parallel/selection/SelectionLineVisualization.java b/src/de/lmu/ifi/dbs/elki/visualization/visualizers/parallel/selection/SelectionLineVisualization.java
index d7ccb072..92e5f64c 100644
--- a/src/de/lmu/ifi/dbs/elki/visualization/visualizers/parallel/selection/SelectionLineVisualization.java
+++ b/src/de/lmu/ifi/dbs/elki/visualization/visualizers/parallel/selection/SelectionLineVisualization.java
@@ -31,6 +31,7 @@ import org.w3c.dom.Element;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreListener;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.result.DBIDSelection;
import de.lmu.ifi.dbs.elki.result.HierarchicalResult;
@@ -130,13 +131,10 @@ public class SelectionLineVisualization extends AbstractVisFactory {
DBIDs selection = selContext.getSelectedIds();
for(DBIDIter iter = selection.iter(); iter.valid(); iter.advance()) {
- double[] yPos = proj.fastProjectDataToRenderSpace(relation.get(iter));
-
- SVGPath path = new SVGPath();
- for(int i = 0; i < proj.getVisibleDimensions(); i++) {
- path.drawTo(getVisibleAxisX(i), yPos[i]);
+ Element marker = drawLine(iter);
+ if(marker == null) {
+ continue;
}
- Element marker = path.makeElement(svgp);
SVGUtil.addCSSClass(marker, MARKER);
layer.appendChild(marker);
}
@@ -144,6 +142,42 @@ public class SelectionLineVisualization extends AbstractVisFactory {
}
/**
+ * Draw a single line.
+ *
+ * @param iter Object reference
+ * @return SVG Element
+ */
+ private Element drawLine(DBIDRef iter) {
+ SVGPath path = new SVGPath();
+ double[] yPos = proj.fastProjectDataToRenderSpace(relation.get(iter));
+ boolean draw = false, drawprev = false, drawn = false;
+ for(int i = 0; i < yPos.length; i++) {
+ // NaN handling:
+ if(yPos[i] != yPos[i]) {
+ draw = false;
+ drawprev = false;
+ continue;
+ }
+ if(draw) {
+ if(drawprev) {
+ path.moveTo(getVisibleAxisX(i - 1), yPos[i - 1]);
+ drawprev = false;
+ }
+ path.lineTo(getVisibleAxisX(i), yPos[i]);
+ drawn = true;
+ }
+ else {
+ drawprev = true;
+ }
+ draw = true;
+ }
+ if(!drawn) {
+ return null; // Not enough data.
+ }
+ return path.makeElement(svgp);
+ }
+
+ /**
* Adds the required CSS-Classes
*
* @param svgp SVG-Plot
diff --git a/src/de/lmu/ifi/dbs/elki/visualization/visualizers/scatterplot/AbstractTooltipVisualization.java b/src/de/lmu/ifi/dbs/elki/visualization/visualizers/scatterplot/AbstractTooltipVisualization.java
index 332f31e1..61bff7d4 100644
--- a/src/de/lmu/ifi/dbs/elki/visualization/visualizers/scatterplot/AbstractTooltipVisualization.java
+++ b/src/de/lmu/ifi/dbs/elki/visualization/visualizers/scatterplot/AbstractTooltipVisualization.java
@@ -95,6 +95,9 @@ public abstract class AbstractTooltipVisualization extends AbstractScatterplotVi
for(DBIDIter id = sample.getSample().iter(); id.valid(); id.advance()) {
double[] v = proj.fastProjectDataToRenderSpace(rel.get(id));
+ if(v[0] != v[0] || v[1] != v[1]) {
+ continue; // NaN!
+ }
Element tooltip = makeTooltip(id, v[0], v[1], dotsize);
SVGUtil.addCSSClass(tooltip, TOOLTIP_HIDDEN);
diff --git a/src/de/lmu/ifi/dbs/elki/visualization/visualizers/scatterplot/MarkerVisualization.java b/src/de/lmu/ifi/dbs/elki/visualization/visualizers/scatterplot/MarkerVisualization.java
index 964500ec..8a3d0849 100644
--- a/src/de/lmu/ifi/dbs/elki/visualization/visualizers/scatterplot/MarkerVisualization.java
+++ b/src/de/lmu/ifi/dbs/elki/visualization/visualizers/scatterplot/MarkerVisualization.java
@@ -147,6 +147,9 @@ public class MarkerVisualization extends AbstractVisFactory {
try {
final NumberVector<?> vec = rel.get(iter);
double[] v = proj.fastProjectDataToRenderSpace(vec);
+ if(v[0] != v[0] || v[1] != v[1]) {
+ continue; // NaN!
+ }
ml.useMarker(svgp, layer, v[0], v[1], cnum, marker_size);
}
catch(ObjectNotFoundException e) {
diff --git a/src/de/lmu/ifi/dbs/elki/visualization/visualizers/scatterplot/TooltipScoreVisualization.java b/src/de/lmu/ifi/dbs/elki/visualization/visualizers/scatterplot/TooltipScoreVisualization.java
index bcf0dc71..45351aec 100644
--- a/src/de/lmu/ifi/dbs/elki/visualization/visualizers/scatterplot/TooltipScoreVisualization.java
+++ b/src/de/lmu/ifi/dbs/elki/visualization/visualizers/scatterplot/TooltipScoreVisualization.java
@@ -40,7 +40,7 @@ import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy.Hierarchy;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.visualization.VisualizationTask;
@@ -237,7 +237,7 @@ public class TooltipScoreVisualization extends AbstractVisFactory {
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
IntParameter digitsP = new IntParameter(DIGITS_ID, 4);
- digitsP.addConstraint(new GreaterEqualConstraint(0));
+ digitsP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_INT);
if(config.grab(digitsP)) {
int digits = digitsP.intValue();
diff --git a/src/de/lmu/ifi/dbs/elki/visualization/visualizers/scatterplot/cluster/ClusterHullVisualization.java b/src/de/lmu/ifi/dbs/elki/visualization/visualizers/scatterplot/cluster/ClusterHullVisualization.java
index 123ed7d9..9436b25c 100644
--- a/src/de/lmu/ifi/dbs/elki/visualization/visualizers/scatterplot/cluster/ClusterHullVisualization.java
+++ b/src/de/lmu/ifi/dbs/elki/visualization/visualizers/scatterplot/cluster/ClusterHullVisualization.java
@@ -272,7 +272,11 @@ public class ClusterHullVisualization extends AbstractVisFactory {
GrahamScanConvexHull2D hull = new GrahamScanConvexHull2D();
GrahamScanConvexHull2D hull2 = coremodel ? new GrahamScanConvexHull2D() : null;
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
- Vector projP = new Vector(proj.fastProjectDataToRenderSpace(rel.get(iter)));
+ final double[] projv = proj.fastProjectDataToRenderSpace(rel.get(iter));
+ if(projv[0] != projv[0] || projv[1] != projv[1]) {
+ continue; // NaN!
+ }
+ Vector projP = new Vector(projv);
hull.add(projP);
if (coremodel && cids.contains(iter)) {
hull2.add(projP);
@@ -327,6 +331,9 @@ public class ClusterHullVisualization extends AbstractVisFactory {
double weight = ids.size();
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
double[] projP = proj.fastProjectDataToRenderSpace(rel.get(iter));
+ if(projP[0] != projP[0] || projP[1] != projP[1]) {
+ continue; // NaN!
+ }
hull.add(new Vector(projP));
}
for (Iter<Cluster<Model>> iter = hier.iterChildren(clus); iter.valid(); iter.advance()) {
diff --git a/src/de/lmu/ifi/dbs/elki/visualization/visualizers/scatterplot/cluster/ClusterOrderVisualization.java b/src/de/lmu/ifi/dbs/elki/visualization/visualizers/scatterplot/cluster/ClusterOrderVisualization.java
index bdc77006..339b7c41 100644
--- a/src/de/lmu/ifi/dbs/elki/visualization/visualizers/scatterplot/cluster/ClusterOrderVisualization.java
+++ b/src/de/lmu/ifi/dbs/elki/visualization/visualizers/scatterplot/cluster/ClusterOrderVisualization.java
@@ -136,10 +136,13 @@ public class ClusterOrderVisualization extends AbstractVisFactory {
double[] thisVec = proj.fastProjectDataToRenderSpace(rel.get(thisId));
double[] prevVec = proj.fastProjectDataToRenderSpace(rel.get(prevId));
- // FIXME: DO NOT COMMIT
- thisVec[0] = thisVec[0] * 0.95 + prevVec[0] * 0.05;
- thisVec[1] = thisVec[1] * 0.95 + prevVec[1] * 0.05;
-
+ if(thisVec[0] != thisVec[0] || thisVec[1] != thisVec[1]) {
+ continue; // NaN!
+ }
+ if(prevVec[0] != prevVec[0] || prevVec[1] != prevVec[1]) {
+ continue; // NaN!
+ }
+ // FIXME: add arrow decorations!
Element arrow = svgp.svgLine(prevVec[0], prevVec[1], thisVec[0], thisVec[1]);
SVGUtil.setCSSClass(arrow, cls.getName());
diff --git a/src/de/lmu/ifi/dbs/elki/visualization/visualizers/scatterplot/outlier/BubbleVisualization.java b/src/de/lmu/ifi/dbs/elki/visualization/visualizers/scatterplot/outlier/BubbleVisualization.java
index 5051d019..752cf333 100644
--- a/src/de/lmu/ifi/dbs/elki/visualization/visualizers/scatterplot/outlier/BubbleVisualization.java
+++ b/src/de/lmu/ifi/dbs/elki/visualization/visualizers/scatterplot/outlier/BubbleVisualization.java
@@ -101,7 +101,7 @@ public class BubbleVisualization extends AbstractVisFactory {
@Override
public Visualization makeVisualization(VisualizationTask task) {
- if (settings.scaling != null && settings.scaling instanceof OutlierScalingFunction) {
+ if(settings.scaling != null && settings.scaling instanceof OutlierScalingFunction) {
final OutlierResult outlierResult = task.getResult();
((OutlierScalingFunction) settings.scaling).prepare(outlierResult);
}
@@ -111,21 +111,21 @@ public class BubbleVisualization extends AbstractVisFactory {
@Override
public void processNewResult(HierarchicalResult baseResult, Result result) {
Collection<OutlierResult> ors = ResultUtil.filterResults(result, OutlierResult.class);
- for (OutlierResult o : ors) {
+ for(OutlierResult o : ors) {
Collection<ScatterPlotProjector<?>> ps = ResultUtil.filterResults(baseResult, ScatterPlotProjector.class);
boolean vis = true;
// Quick and dirty hack: hide if parent result is also an outlier result
// Since that probably is already visible and we're redundant.
- for (Hierarchy.Iter<Result> r = o.getHierarchy().iterParents(o); r.valid(); r.advance()) {
- if (r.get() instanceof OutlierResult) {
+ for(Hierarchy.Iter<Result> r = o.getHierarchy().iterParents(o); r.valid(); r.advance()) {
+ if(r.get() instanceof OutlierResult) {
vis = false;
break;
}
}
- for (ScatterPlotProjector<?> p : ps) {
+ for(ScatterPlotProjector<?> p : ps) {
final VisualizationTask task = new VisualizationTask(NAME, o, p.getRelation(), this);
task.level = VisualizationTask.LEVEL_DATA;
- if (!vis) {
+ if(!vis) {
task.initDefaultVisibility(false);
}
baseResult.getHierarchy().add(o, task);
@@ -173,37 +173,45 @@ public class BubbleVisualization extends AbstractVisFactory {
StylingPolicy stylepolicy = style.getStylingPolicy();
// bubble size
final double bubble_size = style.getStyleLibrary().getSize(StyleLibrary.BUBBLEPLOT);
- if (stylepolicy instanceof ClassStylingPolicy) {
+ if(stylepolicy instanceof ClassStylingPolicy) {
ClassStylingPolicy colors = (ClassStylingPolicy) stylepolicy;
setupCSS(svgp, colors);
// draw data
- for (DBIDIter objId = sample.getSample().iter(); objId.valid(); objId.advance()) {
+ for(DBIDIter objId = sample.getSample().iter(); objId.valid(); objId.advance()) {
final double radius = getScaledForId(objId);
- if (radius > 0.01 && !Double.isInfinite(radius)) {
+ if(radius > 0.01 && !Double.isInfinite(radius)) {
final NumberVector<?> vec = rel.get(objId);
- if (vec != null) {
+ if(vec != null) {
double[] v = proj.fastProjectDataToRenderSpace(vec);
+ if(v[0] != v[0] || v[1] != v[1]) {
+ continue; // NaN!
+ }
Element circle = svgp.svgCircle(v[0], v[1], radius * bubble_size);
SVGUtil.addCSSClass(circle, BUBBLE + colors.getStyleForDBID(objId));
layer.appendChild(circle);
}
}
}
- } else {
+ }
+ else {
// draw data
- for (DBIDIter objId = sample.getSample().iter(); objId.valid(); objId.advance()) {
+ for(DBIDIter objId = sample.getSample().iter(); objId.valid(); objId.advance()) {
final double radius = getScaledForId(objId);
- if (radius > 0.01 && !Double.isInfinite(radius)) {
+ if(radius > 0.01 && !Double.isInfinite(radius)) {
final NumberVector<?> vec = rel.get(objId);
- if (vec != null) {
+ if(vec != null) {
double[] v = proj.fastProjectDataToRenderSpace(vec);
+ if(v[0] != v[0] || v[1] != v[1]) {
+ continue; // NaN!
+ }
Element circle = svgp.svgCircle(v[0], v[1], radius * bubble_size);
int color = stylepolicy.getColorForDBID(objId);
final StringBuilder cssstyle = new StringBuilder();
- if (settings.fill) {
+ if(settings.fill) {
cssstyle.append(SVGConstants.CSS_FILL_PROPERTY).append(':').append(SVGUtil.colorToString(color));
cssstyle.append(SVGConstants.CSS_FILL_OPACITY_PROPERTY).append(":0.5");
- } else {
+ }
+ else {
cssstyle.append(SVGConstants.CSS_STROKE_VALUE).append(':').append(SVGUtil.colorToString(color));
cssstyle.append(SVGConstants.CSS_FILL_PROPERTY).append(':').append(SVGConstants.CSS_NONE_VALUE);
}
@@ -218,7 +226,7 @@ public class BubbleVisualization extends AbstractVisFactory {
@Override
public void resultChanged(Result current) {
super.resultChanged(current);
- if (sample == current || context.getStyleResult() == current) {
+ if(sample == current || context.getStyleResult() == current) {
synchronizedRedraw();
}
}
@@ -234,16 +242,17 @@ public class BubbleVisualization extends AbstractVisFactory {
ColorLibrary colors = style.getColorSet(StyleLibrary.PLOT);
// creating IDs manually because cluster often return a null-ID.
- for (int clusterID = policy.getMinStyle(); clusterID < policy.getMaxStyle(); clusterID++) {
+ for(int clusterID = policy.getMinStyle(); clusterID < policy.getMaxStyle(); clusterID++) {
CSSClass bubble = new CSSClass(svgp, BUBBLE + clusterID);
bubble.setStatement(SVGConstants.CSS_STROKE_WIDTH_PROPERTY, style.getLineWidth(StyleLibrary.PLOT));
String color = colors.getColor(clusterID);
- if (settings.fill) {
+ if(settings.fill) {
bubble.setStatement(SVGConstants.CSS_FILL_PROPERTY, color);
bubble.setStatement(SVGConstants.CSS_FILL_OPACITY_PROPERTY, 0.5);
- } else {
+ }
+ else {
// for diamond-shaped strokes, see bugs.sun.com, bug ID 6294396
bubble.setStatement(SVGConstants.CSS_STROKE_VALUE, color);
bubble.setStatement(SVGConstants.CSS_FILL_PROPERTY, SVGConstants.CSS_NONE_VALUE);
@@ -262,12 +271,13 @@ public class BubbleVisualization extends AbstractVisFactory {
*/
protected double getScaledForId(DBIDRef id) {
double d = result.getScores().get(id).doubleValue();
- if (Double.isNaN(d) || Double.isInfinite(d)) {
+ if(Double.isNaN(d) || Double.isInfinite(d)) {
return 0.0;
}
- if (settings.scaling == null) {
+ if(settings.scaling == null) {
return result.getOutlierMeta().normalizeScore(d);
- } else {
+ }
+ else {
return settings.scaling.getScaled(d);
}
}
@@ -313,12 +323,12 @@ public class BubbleVisualization extends AbstractVisFactory {
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
Flag fillF = new Flag(FILL_ID);
- if (config.grab(fillF)) {
+ if(config.grab(fillF)) {
fill = fillF.isTrue();
}
ObjectParameter<ScalingFunction> scalingP = new ObjectParameter<>(SCALING_ID, OutlierScalingFunction.class, true);
- if (config.grab(scalingP)) {
+ if(config.grab(scalingP)) {
scaling = scalingP.instantiateClass(config);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/visualization/visualizers/scatterplot/outlier/COPVectorVisualization.java b/src/de/lmu/ifi/dbs/elki/visualization/visualizers/scatterplot/outlier/COPVectorVisualization.java
index c3346c02..e62f0c3d 100644
--- a/src/de/lmu/ifi/dbs/elki/visualization/visualizers/scatterplot/outlier/COPVectorVisualization.java
+++ b/src/de/lmu/ifi/dbs/elki/visualization/visualizers/scatterplot/outlier/COPVectorVisualization.java
@@ -157,6 +157,9 @@ public class COPVectorVisualization extends AbstractVisFactory {
continue;
}
double[] v = proj.fastProjectDataToRenderSpace(vec);
+ if (v[0] != v[0] || v[1] != v[1]) {
+ continue; // NaN!
+ }
Element arrow = svgp.svgLine(v[0], v[1], v[0] + ev[0], v[1] + ev[1]);
SVGUtil.addCSSClass(arrow, VEC);
layer.appendChild(arrow);
diff --git a/src/de/lmu/ifi/dbs/elki/visualization/visualizers/scatterplot/selection/DistanceFunctionVisualization.java b/src/de/lmu/ifi/dbs/elki/visualization/visualizers/scatterplot/selection/DistanceFunctionVisualization.java
index d9b860de..38dc973f 100644
--- a/src/de/lmu/ifi/dbs/elki/visualization/visualizers/scatterplot/selection/DistanceFunctionVisualization.java
+++ b/src/de/lmu/ifi/dbs/elki/visualization/visualizers/scatterplot/selection/DistanceFunctionVisualization.java
@@ -281,6 +281,9 @@ public class DistanceFunctionVisualization extends AbstractVisFactory {
for(DistanceDBIDListIter<D> iter = knn.iter(); iter.valid(); iter.advance()) {
try {
double[] v = proj.fastProjectDataToRenderSpace(rel.get(iter));
+ if (v[0] != v[0] || v[1] != v[1]) {
+ continue; // NaN!
+ }
Element dot = svgp.svgCircle(v[0], v[1], size);
SVGUtil.addCSSClass(dot, KNNMARKER);
layer.appendChild(dot);
diff --git a/src/de/lmu/ifi/dbs/elki/visualization/visualizers/scatterplot/selection/MoveObjectsToolVisualization.java b/src/de/lmu/ifi/dbs/elki/visualization/visualizers/scatterplot/selection/MoveObjectsToolVisualization.java
index 84659848..0f9f99a4 100644
--- a/src/de/lmu/ifi/dbs/elki/visualization/visualizers/scatterplot/selection/MoveObjectsToolVisualization.java
+++ b/src/de/lmu/ifi/dbs/elki/visualization/visualizers/scatterplot/selection/MoveObjectsToolVisualization.java
@@ -101,7 +101,7 @@ public class MoveObjectsToolVisualization extends AbstractVisFactory {
* @author Heidi Kolb
* @author Erich Schubert
*
- * @apiviz.has NumberVector oneway - - edits
+ * @apiviz.has de.lmu.ifi.dbs.elki.data.NumberVector oneway - - edits
*/
public class Instance extends AbstractScatterplotVisualization implements DragListener {
/**
@@ -160,8 +160,7 @@ public class MoveObjectsToolVisualization extends AbstractVisFactory {
private void updateDB(DBIDs dbids, Vector movingVector) {
throw new AbortException("FIXME: INCOMPLETE TRANSITION");
/*
- * NumberVector<?> nv = null;
- * database.accumulateDataStoreEvents();
+ * NumberVector<?> nv = null; database.accumulateDataStoreEvents();
* Representation<DatabaseObjectMetadata> mrep =
* database.getMetadataQuery(); for(DBID dbid : dbids) { NV obj =
* database.get(dbid); // Copy metadata to keep DatabaseObjectMetadata
diff --git a/src/de/lmu/ifi/dbs/elki/visualization/visualizers/scatterplot/selection/SelectionConvexHullVisualization.java b/src/de/lmu/ifi/dbs/elki/visualization/visualizers/scatterplot/selection/SelectionConvexHullVisualization.java
index f3d41e08..36155163 100644
--- a/src/de/lmu/ifi/dbs/elki/visualization/visualizers/scatterplot/selection/SelectionConvexHullVisualization.java
+++ b/src/de/lmu/ifi/dbs/elki/visualization/visualizers/scatterplot/selection/SelectionConvexHullVisualization.java
@@ -131,7 +131,11 @@ public class SelectionConvexHullVisualization extends AbstractVisFactory {
GrahamScanConvexHull2D hull = new GrahamScanConvexHull2D();
for(DBIDIter iter = selection.iter(); iter.valid(); iter.advance()) {
try {
- hull.add(new Vector(proj.fastProjectDataToRenderSpace(rel.get(iter))));
+ final double[] v = proj.fastProjectDataToRenderSpace(rel.get(iter));
+ if (v[0] != v[0] || v[1] != v[1]) {
+ continue; // NaN!
+ }
+ hull.add(new Vector(v));
}
catch(ObjectNotFoundException e) {
// ignore
diff --git a/src/de/lmu/ifi/dbs/elki/visualization/visualizers/scatterplot/selection/SelectionDotVisualization.java b/src/de/lmu/ifi/dbs/elki/visualization/visualizers/scatterplot/selection/SelectionDotVisualization.java
index b70755c4..ba6d0423 100644
--- a/src/de/lmu/ifi/dbs/elki/visualization/visualizers/scatterplot/selection/SelectionDotVisualization.java
+++ b/src/de/lmu/ifi/dbs/elki/visualization/visualizers/scatterplot/selection/SelectionDotVisualization.java
@@ -134,6 +134,9 @@ public class SelectionDotVisualization extends AbstractVisFactory {
for(DBIDIter iter = selection.iter(); iter.valid(); iter.advance()) {
try {
double[] v = proj.fastProjectDataToRenderSpace(rel.get(iter));
+ if (v[0] != v[0] || v[1] != v[1]) {
+ continue; // NaN!
+ }
Element dot = svgp.svgCircle(v[0], v[1], size);
SVGUtil.addCSSClass(dot, MARKER);
layer.appendChild(dot);
diff --git a/src/de/lmu/ifi/dbs/elki/workflow/AlgorithmStep.java b/src/de/lmu/ifi/dbs/elki/workflow/AlgorithmStep.java
index 9367eb30..ff8d1c47 100644
--- a/src/de/lmu/ifi/dbs/elki/workflow/AlgorithmStep.java
+++ b/src/de/lmu/ifi/dbs/elki/workflow/AlgorithmStep.java
@@ -100,7 +100,7 @@ public class AlgorithmStep implements WorkflowStep {
duration.end();
LOG.statistics(duration);
}
- if (LOG.isStatistics()) {
+ if (LOG.isStatistics() && database.getIndexes().size() > 0) {
LOG.statistics("Index statistics after running algorithms:");
for (Index idx : database.getIndexes()) {
idx.logStatistics();
diff --git a/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering1.java b/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering1.java
index ad620d0e..7477c064 100644
--- a/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering1.java
+++ b/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering1.java
@@ -50,7 +50,7 @@ import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.result.Result;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -227,7 +227,7 @@ public class NaiveAgglomerativeHierarchicalClustering1<O, D extends NumberDistan
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
IntParameter numclustersP = new IntParameter(ExtractFlatClusteringFromHierarchy.Parameterizer.MINCLUSTERS_ID);
- numclustersP.addConstraint(new GreaterEqualConstraint(1));
+ numclustersP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
if (config.grab(numclustersP)) {
numclusters = numclustersP.intValue();
}
diff --git a/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering2.java b/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering2.java
index 88dc4bab..d33ca768 100644
--- a/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering2.java
+++ b/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering2.java
@@ -51,7 +51,7 @@ import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.result.Result;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -264,7 +264,7 @@ public class NaiveAgglomerativeHierarchicalClustering2<O, D extends NumberDistan
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
IntParameter numclustersP = new IntParameter(ExtractFlatClusteringFromHierarchy.Parameterizer.MINCLUSTERS_ID);
- numclustersP.addConstraint(new GreaterEqualConstraint(1));
+ numclustersP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
if (config.grab(numclustersP)) {
numclusters = numclustersP.intValue();
}
diff --git a/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering3.java b/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering3.java
index ae571358..d3312115 100644
--- a/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering3.java
+++ b/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering3.java
@@ -54,7 +54,7 @@ import de.lmu.ifi.dbs.elki.result.Result;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.EnumParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -378,7 +378,7 @@ public class NaiveAgglomerativeHierarchicalClustering3<O, D extends NumberDistan
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
IntParameter numclustersP = new IntParameter(ExtractFlatClusteringFromHierarchy.Parameterizer.MINCLUSTERS_ID);
- numclustersP.addConstraint(new GreaterEqualConstraint(1));
+ numclustersP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
if (config.grab(numclustersP)) {
numclusters = numclustersP.intValue();
}
diff --git a/src/tutorial/clustering/SameSizeKMeansAlgorithm.java b/src/tutorial/clustering/SameSizeKMeansAlgorithm.java
index 574a02e9..91da44c5 100644
--- a/src/tutorial/clustering/SameSizeKMeansAlgorithm.java
+++ b/src/tutorial/clustering/SameSizeKMeansAlgorithm.java
@@ -57,8 +57,7 @@ import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.utilities.datastructures.arrays.IntegerArrayQuickSort;
import de.lmu.ifi.dbs.elki.utilities.datastructures.arrays.IntegerComparator;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
@@ -112,7 +111,7 @@ public class SameSizeKMeansAlgorithm<V extends NumberVector<?>> extends Abstract
List<? extends NumberVector<?>> means = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction());
// Setup cluster assignment store
List<ModifiableDBIDs> clusters = new ArrayList<>();
- for (int i = 0; i < k; i++) {
+ for(int i = 0; i < k; i++) {
clusters.add(DBIDUtil.newHashSet(relation.size() / k + 2));
}
@@ -128,7 +127,7 @@ public class SameSizeKMeansAlgorithm<V extends NumberVector<?>> extends Abstract
// Wrap result
Clustering<MeanModel<V>> result = new Clustering<>("k-Means Samesize Clustering", "kmeans-samesize-clustering");
final NumberVector.Factory<V, ?> factory = RelationUtil.getNumberVectorFactory(relation);
- for (int i = 0; i < clusters.size(); i++) {
+ for(int i = 0; i < clusters.size(); i++) {
V mean = factory.newNumberVector(means.get(i).getColumnVector().getArrayRef());
result.addToplevelCluster(new Cluster<>(clusters.get(i), new MeanModel<>(mean)));
}
@@ -149,15 +148,16 @@ public class SameSizeKMeansAlgorithm<V extends NumberVector<?>> extends Abstract
// The actual storage
final WritableDataStore<Meta> metas = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, Meta.class);
// Build the metadata, track the two nearest cluster centers.
- for (DBIDIter id = relation.iterDBIDs(); id.valid(); id.advance()) {
+ for(DBIDIter id = relation.iterDBIDs(); id.valid(); id.advance()) {
Meta c = new Meta(k);
V fv = relation.get(id);
- for (int i = 0; i < k; i++) {
+ for(int i = 0; i < k; i++) {
c.dists[i] = df.doubleDistance(fv, means.get(i));
- if (i > 0) {
- if (c.dists[i] < c.dists[c.primary]) {
+ if(i > 0) {
+ if(c.dists[i] < c.dists[c.primary]) {
c.primary = i;
- } else if (c.dists[i] > c.dists[c.secondary]) {
+ }
+ else if(c.dists[i] > c.dists[c.secondary]) {
c.secondary = i;
}
}
@@ -184,9 +184,9 @@ public class SameSizeKMeansAlgorithm<V extends NumberVector<?>> extends Abstract
DBIDArrayIter id = tids.iter();
// Initialization phase:
- for (int start = 0; start < tids.size();) {
+ for(int start = 0; start < tids.size();) {
tids.sort(start, tids.size(), comp);
- for (id.seek(start); id.valid();) {
+ for(id.seek(start); id.valid();) {
Meta c = metas.get(id);
// Assigning to best cluster - which cannot be full yet!
ModifiableDBIDs cluster = clusters.get(c.primary);
@@ -194,18 +194,18 @@ public class SameSizeKMeansAlgorithm<V extends NumberVector<?>> extends Abstract
start++;
id.advance();
// Now the cluster may have become completely filled:
- if (cluster.size() == maxsize) {
+ if(cluster.size() == maxsize) {
final int full = c.primary;
// Refresh the not yet assigned objects where necessary:
- for (; id.valid(); id.advance()) {
+ for(; id.valid(); id.advance()) {
Meta ca = metas.get(id);
- if (ca.primary == full) {
+ if(ca.primary == full) {
// Update the best index:
- for (int i = 0; i < k; i++) {
- if (i == full || clusters.get(i).size() >= maxsize) {
+ for(int i = 0; i < k; i++) {
+ if(i == full || clusters.get(i).size() >= maxsize) {
continue;
}
- if (ca.primary == full || ca.dists[i] < ca.dists[ca.primary]) {
+ if(ca.primary == full || ca.dists[i] < ca.dists[ca.primary]) {
ca.primary = i;
}
}
@@ -232,15 +232,15 @@ public class SameSizeKMeansAlgorithm<V extends NumberVector<?>> extends Abstract
* @param df Distance function
*/
protected void updateDistances(Relation<V> relation, List<? extends NumberVector<?>> means, final WritableDataStore<Meta> metas, PrimitiveDoubleDistanceFunction<NumberVector<?>> df) {
- for (DBIDIter id = relation.iterDBIDs(); id.valid(); id.advance()) {
+ for(DBIDIter id = relation.iterDBIDs(); id.valid(); id.advance()) {
Meta c = metas.get(id);
V fv = relation.get(id);
// Update distances to means.
c.secondary = -1;
- for (int i = 0; i < k; i++) {
+ for(int i = 0; i < k; i++) {
c.dists[i] = df.doubleDistance(fv, means.get(i));
- if (c.primary != i) {
- if (c.secondary < 0 || c.dists[i] < c.dists[c.secondary]) {
+ if(c.primary != i) {
+ if(c.secondary < 0 || c.dists[i] < c.dists[c.secondary]) {
c.secondary = i;
}
}
@@ -277,7 +277,7 @@ public class SameSizeKMeansAlgorithm<V extends NumberVector<?>> extends Abstract
};
// List for sorting cluster preferences
int[] preferences = new int[k];
- for (int i = 0; i < k; i++) {
+ for(int i = 0; i < k; i++) {
preferences[i] = i;
}
// Comparator for this list.
@@ -285,28 +285,28 @@ public class SameSizeKMeansAlgorithm<V extends NumberVector<?>> extends Abstract
// Initialize transfer lists:
ArrayModifiableDBIDs[] transfers = new ArrayModifiableDBIDs[k];
- for (int i = 0; i < k; i++) {
+ for(int i = 0; i < k; i++) {
transfers[i] = DBIDUtil.newArray();
}
- for (int iter = 0; maxiter < 0 || iter < maxiter; iter++) {
+ for(int iter = 0; maxiter < 0 || iter < maxiter; iter++) {
updateDistances(relation, means, metas, df);
tids.sort(comp);
int active = 0; // Track if anything has changed
- for (DBIDIter id = tids.iter(); id.valid(); id.advance()) {
+ for(DBIDIter id = tids.iter(); id.valid(); id.advance()) {
Meta c = metas.get(id);
ModifiableDBIDs source = clusters.get(c.primary);
IntegerArrayQuickSort.sort(preferences, pcomp.select(c));
boolean transferred = false;
- for (int i : preferences) {
- if (i == c.primary) {
+ for(int i : preferences) {
+ if(i == c.primary) {
continue; // Cannot transfer to the same cluster!
}
ModifiableDBIDs dest = clusters.get(i);
// Can we pair this transfer?
- for (DBIDMIter other = transfers[i].iter(); other.valid(); other.advance()) {
+ for(DBIDMIter other = transfers[i].iter(); other.valid(); other.advance()) {
Meta c2 = metas.get(other);
- if (c.gain(i) + c2.gain(c.primary) > 0) {
+ if(c.gain(i) + c2.gain(c.primary) > 0) {
transfer(metas, c2, dest, source, other, c.primary);
transfer(metas, c, source, dest, id, i);
active += 2;
@@ -316,7 +316,7 @@ public class SameSizeKMeansAlgorithm<V extends NumberVector<?>> extends Abstract
}
}
// If cluster sizes allow, move a single object.
- if (c.gain(i) > 0 && (dest.size() < maxsize && source.size() > minsize)) {
+ if(c.gain(i) > 0 && (dest.size() < maxsize && source.size() > minsize)) {
transfer(metas, c, source, dest, id, i);
active += 1;
transferred = true;
@@ -325,7 +325,7 @@ public class SameSizeKMeansAlgorithm<V extends NumberVector<?>> extends Abstract
}
// If the object would prefer a different cluster, put in outgoing
// transfer list.
- if (!transferred && (c.dists[c.primary] > c.dists[c.secondary])) {
+ if(!transferred && (c.dists[c.primary] > c.dists[c.secondary])) {
transfers[c.primary].add(id);
}
}
@@ -333,14 +333,14 @@ public class SameSizeKMeansAlgorithm<V extends NumberVector<?>> extends Abstract
// considering more than one object?
int pending = 0;
// Clear transfer lists for next iteration.
- for (int i = 0; i < k; i++) {
+ for(int i = 0; i < k; i++) {
pending += transfers[i].size();
transfers[i].clear();
}
- if (LOG.isDebuggingFine()) {
+ if(LOG.isDebuggingFine()) {
LOG.debugFine("Performed " + active + " transfers in iteration " + iter + " skipped " + pending);
}
- if (active <= 0) {
+ if(active <= 0) {
break;
}
// Recompute means after reassignment
@@ -486,27 +486,27 @@ public class SameSizeKMeansAlgorithm<V extends NumberVector<?>> extends Abstract
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
ObjectParameter<PrimitiveDoubleDistanceFunction<? super NumberVector<?>>> distanceFunctionP = makeParameterDistanceFunction(SquaredEuclideanDistanceFunction.class, PrimitiveDoubleDistanceFunction.class);
- if (config.grab(distanceFunctionP)) {
+ if(config.grab(distanceFunctionP)) {
distanceFunction = distanceFunctionP.instantiateClass(config);
- if (!(distanceFunction instanceof EuclideanDistanceFunction) && !(distanceFunction instanceof SquaredEuclideanDistanceFunction)) {
+ if(!(distanceFunction instanceof EuclideanDistanceFunction) && !(distanceFunction instanceof SquaredEuclideanDistanceFunction)) {
LOG.warning("k-means optimizes the sum of squares - it should be used with squared euclidean distance and may stop converging otherwise!");
}
}
IntParameter kP = new IntParameter(K_ID);
- kP.addConstraint(new GreaterConstraint(1));
- if (config.grab(kP)) {
+ kP.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
+ if(config.grab(kP)) {
k = kP.getValue();
}
ObjectParameter<KMeansInitialization<V>> initialP = new ObjectParameter<>(INIT_ID, KMeansInitialization.class, KMeansPlusPlusInitialMeans.class);
- if (config.grab(initialP)) {
+ if(config.grab(initialP)) {
initializer = initialP.instantiateClass(config);
}
IntParameter maxiterP = new IntParameter(MAXITER_ID, -1);
- maxiterP.addConstraint(new GreaterEqualConstraint(-1));
- if (config.grab(maxiterP)) {
+ maxiterP.addConstraint(CommonConstraints.GREATER_EQUAL_MINUSONE_INT);
+ if(config.grab(maxiterP)) {
maxiter = maxiterP.intValue();
}
}
diff --git a/src/tutorial/outlier/DistanceStddevOutlier.java b/src/tutorial/outlier/DistanceStddevOutlier.java
index e0a5e244..61859f88 100644
--- a/src/tutorial/outlier/DistanceStddevOutlier.java
+++ b/src/tutorial/outlier/DistanceStddevOutlier.java
@@ -25,7 +25,7 @@ import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -134,7 +134,7 @@ public class DistanceStddevOutlier<O, D extends NumberDistance<D, ?>> extends Ab
// The super class has the distance function parameter!
super.makeOptions(config);
IntParameter kParam = new IntParameter(K_ID);
- kParam.addConstraint(new GreaterConstraint(1));
+ kParam.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
if(config.grab(kParam)) {
k = kParam.getValue();
}
diff --git a/src/tutorial/outlier/ODIN.java b/src/tutorial/outlier/ODIN.java
index a8996305..04f8ee90 100644
--- a/src/tutorial/outlier/ODIN.java
+++ b/src/tutorial/outlier/ODIN.java
@@ -46,7 +46,7 @@ import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -182,7 +182,7 @@ public class ODIN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorit
// Since in a database context, the 1 nearest neighbor
// will usually be the query object itself, we require
// this value to be at least 2.
- param.addConstraint(new GreaterConstraint(1));
+ param.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
if (config.grab(param)) {
k = param.intValue();
}
diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/AbstractSimpleAlgorithmTest.java b/test/de/lmu/ifi/dbs/elki/algorithm/AbstractSimpleAlgorithmTest.java
index 8b0507f0..3e260e8e 100644
--- a/test/de/lmu/ifi/dbs/elki/algorithm/AbstractSimpleAlgorithmTest.java
+++ b/test/de/lmu/ifi/dbs/elki/algorithm/AbstractSimpleAlgorithmTest.java
@@ -97,7 +97,7 @@ public abstract class AbstractSimpleAlgorithmTest {
*/
protected <T> Database makeSimpleDatabase(String filename, int expectedSize, ListParameterization params, Class<?>[] filters) {
org.junit.Assert.assertTrue("Test data set not found: " + filename, (new File(filename)).exists());
- params.addParameter(FileBasedDatabaseConnection.INPUT_ID, filename);
+ params.addParameter(FileBasedDatabaseConnection.Parameterizer.INPUT_ID, filename);
List<Class<?>> filterlist = new ArrayList<>();
filterlist.add(FixedDBIDsFilter.class);
@@ -106,8 +106,8 @@ public abstract class AbstractSimpleAlgorithmTest {
filterlist.add(filter);
}
}
- params.addParameter(FileBasedDatabaseConnection.FILTERS_ID, filterlist);
- params.addParameter(FixedDBIDsFilter.IDSTART_ID, 1);
+ params.addParameter(FileBasedDatabaseConnection.Parameterizer.FILTERS_ID, filterlist);
+ params.addParameter(FixedDBIDsFilter.Parameterizer.IDSTART_ID, 1);
Database db = ClassGenericsUtil.parameterizeOrAbort(StaticArrayDatabase.class, params);
testParameterizationOk(params);
diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/TestKNNJoin.java b/test/de/lmu/ifi/dbs/elki/algorithm/TestKNNJoin.java
index 3d8ef366..19509350 100644
--- a/test/de/lmu/ifi/dbs/elki/algorithm/TestKNNJoin.java
+++ b/test/de/lmu/ifi/dbs/elki/algorithm/TestKNNJoin.java
@@ -80,10 +80,10 @@ public class TestKNNJoin implements JUnit4Test {
@Test
public void testLinearScan() {
ListParameterization inputparams = new ListParameterization();
- inputparams.addParameter(FileBasedDatabaseConnection.INPUT_ID, dataset);
+ inputparams.addParameter(FileBasedDatabaseConnection.Parameterizer.INPUT_ID, dataset);
List<Class<?>> filters = Arrays.asList(new Class<?>[] { FixedDBIDsFilter.class });
- inputparams.addParameter(FileBasedDatabaseConnection.FILTERS_ID, filters);
- inputparams.addParameter(FixedDBIDsFilter.IDSTART_ID, 1);
+ inputparams.addParameter(FileBasedDatabaseConnection.Parameterizer.FILTERS_ID, filters);
+ inputparams.addParameter(FixedDBIDsFilter.Parameterizer.IDSTART_ID, 1);
// get database
Database db = ClassGenericsUtil.parameterizeOrAbort(StaticArrayDatabase.class, inputparams);
@@ -130,7 +130,7 @@ public class TestKNNJoin implements JUnit4Test {
@Test
public void testKNNJoinRtreeMini() {
ListParameterization spatparams = new ListParameterization();
- spatparams.addParameter(StaticArrayDatabase.INDEX_ID, RStarTreeFactory.class);
+ spatparams.addParameter(StaticArrayDatabase.Parameterizer.INDEX_ID, RStarTreeFactory.class);
spatparams.addParameter(AbstractPageFileFactory.Parameterizer.PAGE_SIZE_ID, 200);
doKNNJoin(spatparams);
@@ -144,7 +144,7 @@ public class TestKNNJoin implements JUnit4Test {
@Test
public void testKNNJoinRtreeMaxi() {
ListParameterization spatparams = new ListParameterization();
- spatparams.addParameter(StaticArrayDatabase.INDEX_ID, RStarTreeFactory.class);
+ spatparams.addParameter(StaticArrayDatabase.Parameterizer.INDEX_ID, RStarTreeFactory.class);
spatparams.addParameter(AbstractPageFileFactory.Parameterizer.PAGE_SIZE_ID, 2000);
doKNNJoin(spatparams);
@@ -158,7 +158,7 @@ public class TestKNNJoin implements JUnit4Test {
@Test
public void testKNNJoinDeLiCluTreeMini() {
ListParameterization spatparams = new ListParameterization();
- spatparams.addParameter(StaticArrayDatabase.INDEX_ID, DeLiCluTreeFactory.class);
+ spatparams.addParameter(StaticArrayDatabase.Parameterizer.INDEX_ID, DeLiCluTreeFactory.class);
spatparams.addParameter(AbstractPageFileFactory.Parameterizer.PAGE_SIZE_ID, 200);
doKNNJoin(spatparams);
@@ -171,10 +171,10 @@ public class TestKNNJoin implements JUnit4Test {
* @throws ParameterException
*/
void doKNNJoin(ListParameterization inputparams) {
- inputparams.addParameter(FileBasedDatabaseConnection.INPUT_ID, dataset);
+ inputparams.addParameter(FileBasedDatabaseConnection.Parameterizer.INPUT_ID, dataset);
List<Class<?>> filters = Arrays.asList(new Class<?>[] { FixedDBIDsFilter.class });
- inputparams.addParameter(FileBasedDatabaseConnection.FILTERS_ID, filters);
- inputparams.addParameter(FixedDBIDsFilter.IDSTART_ID, 1);
+ inputparams.addParameter(FileBasedDatabaseConnection.Parameterizer.FILTERS_ID, filters);
+ inputparams.addParameter(FixedDBIDsFilter.Parameterizer.IDSTART_ID, 1);
// get database
Database db = ClassGenericsUtil.parameterizeOrAbort(StaticArrayDatabase.class, inputparams);
diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/TestDBSCANResults.java b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/TestDBSCANResults.java
index 714f345c..34dd2e3d 100644
--- a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/TestDBSCANResults.java
+++ b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/TestDBSCANResults.java
@@ -60,8 +60,8 @@ public class TestDBSCANResults extends AbstractSimpleAlgorithmTest implements JU
// setup algorithm
ListParameterization params = new ListParameterization();
- params.addParameter(DBSCAN.EPSILON_ID, 0.04);
- params.addParameter(DBSCAN.MINPTS_ID, 20);
+ params.addParameter(DBSCAN.Parameterizer.EPSILON_ID, 0.04);
+ params.addParameter(DBSCAN.Parameterizer.MINPTS_ID, 20);
DBSCAN<DoubleVector, DoubleDistance> dbscan = ClassGenericsUtil.parameterizeOrAbort(DBSCAN.class, params);
testParameterizationOk(params);
@@ -84,8 +84,8 @@ public class TestDBSCANResults extends AbstractSimpleAlgorithmTest implements JU
// Setup algorithm
ListParameterization params = new ListParameterization();
- params.addParameter(DBSCAN.EPSILON_ID, 11.5);
- params.addParameter(DBSCAN.MINPTS_ID, 120);
+ params.addParameter(DBSCAN.Parameterizer.EPSILON_ID, 11.5);
+ params.addParameter(DBSCAN.Parameterizer.MINPTS_ID, 120);
DBSCAN<DoubleVector, DoubleDistance> dbscan = ClassGenericsUtil.parameterizeOrAbort(DBSCAN.class, params);
testParameterizationOk(params);
@@ -107,8 +107,8 @@ public class TestDBSCANResults extends AbstractSimpleAlgorithmTest implements JU
// setup algorithm
ListParameterization params = new ListParameterization();
- params.addParameter(DBSCAN.EPSILON_ID, 0.04);
- params.addParameter(DBSCAN.MINPTS_ID, 20);
+ params.addParameter(DBSCAN.Parameterizer.EPSILON_ID, 0.04);
+ params.addParameter(DBSCAN.Parameterizer.MINPTS_ID, 20);
GeneralizedDBSCAN dbscan = ClassGenericsUtil.parameterizeOrAbort(GeneralizedDBSCAN.class, params);
testParameterizationOk(params);
@@ -131,8 +131,8 @@ public class TestDBSCANResults extends AbstractSimpleAlgorithmTest implements JU
// Setup algorithm
ListParameterization params = new ListParameterization();
- params.addParameter(DBSCAN.EPSILON_ID, 11.5);
- params.addParameter(DBSCAN.MINPTS_ID, 120);
+ params.addParameter(DBSCAN.Parameterizer.EPSILON_ID, 11.5);
+ params.addParameter(DBSCAN.Parameterizer.MINPTS_ID, 120);
GeneralizedDBSCAN dbscan = ClassGenericsUtil.parameterizeOrAbort(GeneralizedDBSCAN.class, params);
testParameterizationOk(params);
diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/TestDeLiCluResults.java b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/TestDeLiCluResults.java
index 1323c5fa..b70c0f67 100644
--- a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/TestDeLiCluResults.java
+++ b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/TestDeLiCluResults.java
@@ -62,7 +62,7 @@ public class TestDeLiCluResults extends AbstractSimpleAlgorithmTest implements J
public void testDeLiCluResults() {
ListParameterization indexparams = new ListParameterization();
// We need a special index for this algorithm:
- indexparams.addParameter(StaticArrayDatabase.INDEX_ID, DeLiCluTreeFactory.class);
+ indexparams.addParameter(StaticArrayDatabase.Parameterizer.INDEX_ID, DeLiCluTreeFactory.class);
indexparams.addParameter(AbstractPageFileFactory.Parameterizer.PAGE_SIZE_ID, 1000);
Database db = makeSimpleDatabase(UNITTEST + "hierarchical-2d.ascii", 710, indexparams, null);
diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/TestEMResults.java b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/TestEMResults.java
index 421141a3..065307be 100644
--- a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/TestEMResults.java
+++ b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/TestEMResults.java
@@ -57,14 +57,14 @@ public class TestEMResults extends AbstractSimpleAlgorithmTest implements JUnit4
// Setup algorithm
ListParameterization params = new ListParameterization();
- params.addParameter(KMeans.SEED_ID, 1);
- params.addParameter(EM.K_ID, 5);
+ params.addParameter(KMeans.SEED_ID, 0);
+ params.addParameter(EM.Parameterizer.K_ID, 6);
EM<DoubleVector> em = ClassGenericsUtil.parameterizeOrAbort(EM.class, params);
testParameterizationOk(params);
// run EM on database
Clustering<EMModel<DoubleVector>> result = em.run(db);
- testFMeasure(db, result, 0.961587);
- testClusterSizes(result, new int[] { 5, 91, 98, 200, 316 });
+ testFMeasure(db, result, 0.7551098);
+ testClusterSizes(result, new int[] { 50, 99, 102, 120, 141, 198 });
}
} \ No newline at end of file
diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/TestCOPACResults.java b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/TestCOPACResults.java
index 8af036bf..94c59557 100644
--- a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/TestCOPACResults.java
+++ b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/TestCOPACResults.java
@@ -66,8 +66,8 @@ public class TestCOPACResults extends AbstractSimpleAlgorithmTest implements JUn
// these parameters are not picked too smartly - room for improvement.
ListParameterization params = new ListParameterization();
params.addParameter(COPAC.PARTITION_ALGORITHM_ID, DBSCAN.class);
- params.addParameter(DBSCAN.EPSILON_ID, 0.02);
- params.addParameter(DBSCAN.MINPTS_ID, 50);
+ params.addParameter(DBSCAN.Parameterizer.EPSILON_ID, 0.02);
+ params.addParameter(DBSCAN.Parameterizer.MINPTS_ID, 50);
params.addParameter(COPAC.PREPROCESSOR_ID, KNNQueryFilteredPCAIndex.Factory.class);
params.addParameter(KNNQueryFilteredPCAIndex.Factory.K_ID, 15);
@@ -94,8 +94,8 @@ public class TestCOPACResults extends AbstractSimpleAlgorithmTest implements JUn
// Setup algorithm
ListParameterization params = new ListParameterization();
params.addParameter(COPAC.PARTITION_ALGORITHM_ID, DBSCAN.class);
- params.addParameter(DBSCAN.EPSILON_ID, 0.5);
- params.addParameter(DBSCAN.MINPTS_ID, 20);
+ params.addParameter(DBSCAN.Parameterizer.EPSILON_ID, 0.5);
+ params.addParameter(DBSCAN.Parameterizer.MINPTS_ID, 20);
params.addParameter(COPAC.PREPROCESSOR_ID, KNNQueryFilteredPCAIndex.Factory.class);
params.addParameter(KNNQueryFilteredPCAIndex.Factory.K_ID, 45);
// PCA
diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/TestERiCResults.java b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/TestERiCResults.java
index 92a61e5e..76e8cca2 100644
--- a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/TestERiCResults.java
+++ b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/TestERiCResults.java
@@ -66,8 +66,8 @@ public class TestERiCResults extends AbstractSimpleAlgorithmTest implements JUni
// ERiC
ListParameterization params = new ListParameterization();
params.addParameter(COPAC.PARTITION_ALGORITHM_ID, DBSCAN.class);
- params.addParameter(DBSCAN.MINPTS_ID, 30);
- params.addParameter(DBSCAN.EPSILON_ID, 0);
+ params.addParameter(DBSCAN.Parameterizer.MINPTS_ID, 30);
+ params.addParameter(DBSCAN.Parameterizer.EPSILON_ID, 0);
// ERiC Distance function in DBSCAN:
params.addParameter(COPAC.PARTITION_DISTANCE_ID, ERiCDistanceFunction.class);
params.addParameter(ERiCDistanceFunction.DELTA_ID, 0.20);
@@ -104,8 +104,8 @@ public class TestERiCResults extends AbstractSimpleAlgorithmTest implements JUni
ListParameterization params = new ListParameterization();
// ERiC
params.addParameter(COPAC.PARTITION_ALGORITHM_ID, DBSCAN.class);
- params.addParameter(DBSCAN.MINPTS_ID, 15);
- params.addParameter(DBSCAN.EPSILON_ID, 0);
+ params.addParameter(DBSCAN.Parameterizer.MINPTS_ID, 15);
+ params.addParameter(DBSCAN.Parameterizer.EPSILON_ID, 0);
// ERiC Distance function in DBSCAN:
params.addParameter(COPAC.PARTITION_DISTANCE_ID, ERiCDistanceFunction.class);
params.addParameter(ERiCDistanceFunction.DELTA_ID, 1.0);
diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/TestKMeansResults.java b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/TestKMeansResults.java
index 3419352a..3601b977 100644
--- a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/TestKMeansResults.java
+++ b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/TestKMeansResults.java
@@ -61,7 +61,7 @@ public class TestKMeansResults extends AbstractSimpleAlgorithmTest implements JU
// Setup algorithm
ListParameterization params = new ListParameterization();
params.addParameter(KMeans.K_ID, 5);
- params.addParameter(KMeans.INIT_ID, FirstKInitialMeans.class);
+ params.addParameter(KMeans.SEED_ID, 2);
AbstractKMeans<DoubleVector, DoubleDistance, ?> kmeans = ClassGenericsUtil.parameterizeOrAbort(KMeansLloyd.class, params);
testParameterizationOk(params);
@@ -84,7 +84,7 @@ public class TestKMeansResults extends AbstractSimpleAlgorithmTest implements JU
// Setup algorithm
ListParameterization params = new ListParameterization();
params.addParameter(KMeans.K_ID, 5);
- params.addParameter(KMeans.INIT_ID, FirstKInitialMeans.class);
+ params.addParameter(KMeans.SEED_ID, 2);
AbstractKMeans<DoubleVector, DoubleDistance, ?> kmeans = ClassGenericsUtil.parameterizeOrAbort(KMeansMacQueen.class, params);
testParameterizationOk(params);
@@ -107,7 +107,7 @@ public class TestKMeansResults extends AbstractSimpleAlgorithmTest implements JU
// Setup algorithm
ListParameterization params = new ListParameterization();
params.addParameter(KMeans.K_ID, 5);
- params.addParameter(KMeans.INIT_ID, FirstKInitialMeans.class);
+ params.addParameter(KMeans.SEED_ID, 2);
AbstractKMeans<DoubleVector, DoubleDistance, ?> kmedians = ClassGenericsUtil.parameterizeOrAbort(KMediansLloyd.class, params);
testParameterizationOk(params);
diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/TestP3C.java b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/TestP3C.java
new file mode 100644
index 00000000..4d02351f
--- /dev/null
+++ b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/TestP3C.java
@@ -0,0 +1,84 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.subspace;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import org.junit.Test;
+
+import de.lmu.ifi.dbs.elki.JUnit4Test;
+import de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest;
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.data.DoubleVector;
+import de.lmu.ifi.dbs.elki.data.model.SubspaceModel;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization;
+
+/**
+ * Test P3C on a simple test data set.
+ *
+ * Note: both data sets are really beneficial for P3C, and with reasonably
+ * chosen parameters, it works perfectly.
+ *
+ * @author Erich Schubert
+ */
+public class TestP3C extends AbstractSimpleAlgorithmTest implements JUnit4Test {
+ /**
+ * Run P3C with fixed parameters and compare the result to a golden standard.
+ */
+ @Test
+ public void testP3CSimple() {
+ Database db = makeSimpleDatabase(UNITTEST + "subspace-simple.csv", 600);
+
+ ListParameterization params = new ListParameterization();
+
+ // setup algorithm
+ P3C<DoubleVector> p3c = ClassGenericsUtil.parameterizeOrAbort(P3C.class, params);
+ testParameterizationOk(params);
+
+ // run P3C on database
+ Clustering<SubspaceModel<DoubleVector>> result = p3c.run(db);
+
+ testFMeasure(db, result, 1.0);
+ testClusterSizes(result, new int[] { 200, 400 });
+ }
+
+ /**
+ * Run P3C with fixed parameters and compare the result to a golden standard.
+ */
+ @Test
+ public void testP3COverlapping() {
+ Database db = makeSimpleDatabase(UNITTEST + "subspace-overlapping-3-4d.ascii", 850);
+
+ // Setup algorithm
+ ListParameterization params = new ListParameterization();
+ params.addParameter(P3C.Parameterizer.ALPHA_THRESHOLD_ID, 0.01);
+ P3C<DoubleVector> p3c = ClassGenericsUtil.parameterizeOrAbort(P3C.class, params);
+ testParameterizationOk(params);
+
+ // run P3C on database
+ Clustering<SubspaceModel<DoubleVector>> result = p3c.run(db);
+ testFMeasure(db, result, 1.0);
+ testClusterSizes(result, new int[] { 150, 300, 400 });
+ }
+} \ No newline at end of file
diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/TestPreDeConResults.java b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/TestPreDeConResults.java
index a07c71a1..d00a703f 100644
--- a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/TestPreDeConResults.java
+++ b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/TestPreDeConResults.java
@@ -58,7 +58,7 @@ public class TestPreDeConResults extends AbstractSimpleAlgorithmTest implements
public void testPreDeConResults() {
// Additional input parameters
ListParameterization inp = new ListParameterization();
- inp.addParameter(ClassLabelFilter.CLASS_LABEL_INDEX_ID, 1);
+ inp.addParameter(ClassLabelFilter.Parameterizer.CLASS_LABEL_INDEX_ID, 1);
Class<?>[] filters = new Class<?>[] { ClassLabelFilter.class };
Database db = makeSimpleDatabase(UNITTEST + "axis-parallel-subspace-clusters-6d.csv.gz", 2500, inp, filters);
diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestABOD.java b/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestABOD.java
index 3490ce9a..e213b54e 100644
--- a/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestABOD.java
+++ b/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestABOD.java
@@ -25,7 +25,6 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
import org.junit.Test;
-import de.lmu.ifi.dbs.elki.JUnit4Test;
import de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest;
import de.lmu.ifi.dbs.elki.data.DoubleVector;
import de.lmu.ifi.dbs.elki.database.Database;
@@ -36,16 +35,17 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParamet
/**
* Tests the ABOD algorithm.
*
+ * Note: we don't implement JUnit4Test, as this test is slow.
+ *
* @author Lucia Cichella
*/
-public class TestABOD extends AbstractSimpleAlgorithmTest implements JUnit4Test {
+public class TestABOD extends AbstractSimpleAlgorithmTest {
@Test
public void testABOD() {
Database db = makeSimpleDatabase(UNITTEST + "outlier-3d-3clusters.ascii", 960);
// Parameterization
ListParameterization params = new ListParameterization();
- params.addParameter(ABOD.K_ID, 5);
// setup Algorithm
ABOD<DoubleVector> abod = ClassGenericsUtil.parameterizeOrAbort(ABOD.class, params);
@@ -54,7 +54,7 @@ public class TestABOD extends AbstractSimpleAlgorithmTest implements JUnit4Test
// run ABOD on database
OutlierResult result = abod.run(db);
- testSingleScore(result, 945, 3.7108897864090475E-4);
- testAUC(db, "Noise", result, 0.9638148148148148);
+ testAUC(db, "Noise", result, 0.94887037037037);
+ testSingleScore(result, 945, 1.88108120738508E-4);
}
} \ No newline at end of file
diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestFastABOD.java b/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestFastABOD.java
new file mode 100644
index 00000000..11977af6
--- /dev/null
+++ b/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestFastABOD.java
@@ -0,0 +1,60 @@
+package de.lmu.ifi.dbs.elki.algorithm.outlier;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import org.junit.Test;
+
+import de.lmu.ifi.dbs.elki.JUnit4Test;
+import de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest;
+import de.lmu.ifi.dbs.elki.data.DoubleVector;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization;
+
+/**
+ * Tests the ABOD algorithm.
+ *
+ * @author Lucia Cichella
+ */
+public class TestFastABOD extends AbstractSimpleAlgorithmTest implements JUnit4Test {
+ @Test
+ public void testFastABOD() {
+ Database db = makeSimpleDatabase(UNITTEST + "outlier-3d-3clusters.ascii", 960);
+
+ // Parameterization
+ ListParameterization params = new ListParameterization();
+ params.addParameter(FastABOD.Parameterizer.K_ID, 5);
+
+ // setup Algorithm
+ FastABOD<DoubleVector> abod = ClassGenericsUtil.parameterizeOrAbort(FastABOD.class, params);
+ testParameterizationOk(params);
+
+ // run ABOD on database
+ OutlierResult result = abod.run(db);
+
+ testAUC(db, "Noise", result, 0.963259259259);
+ testSingleScore(result, 945, 0.68723169783);
+ }
+} \ No newline at end of file
diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestLBABOD.java b/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestLBABOD.java
new file mode 100644
index 00000000..5a1f56f2
--- /dev/null
+++ b/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestLBABOD.java
@@ -0,0 +1,62 @@
+package de.lmu.ifi.dbs.elki.algorithm.outlier;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import org.junit.Test;
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest;
+import de.lmu.ifi.dbs.elki.data.DoubleVector;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization;
+
+/**
+ * Tests the LB-ABOD algorithm.
+ *
+ * Note: we don't implement JUnit4Test, as this test is slow.
+ *
+ * @author Lucia Cichella
+ */
+public class TestLBABOD extends AbstractSimpleAlgorithmTest {
+ @Test
+ public void testLBABOD() {
+ Database db = makeSimpleDatabase(UNITTEST + "outlier-3d-3clusters.ascii", 960);
+
+ // Parameterization
+ ListParameterization params = new ListParameterization();
+ params.addParameter(FastABOD.Parameterizer.K_ID, 150);
+ params.addParameter(LBABOD.Parameterizer.L_ID, 10);
+
+ // setup Algorithm
+ LBABOD<DoubleVector> abod = ClassGenericsUtil.parameterizeOrAbort(LBABOD.class, params);
+ testParameterizationOk(params);
+
+ // run ABOD on database
+ OutlierResult result = abod.run(db);
+
+ testAUC(db, "Noise", result, 0.928999999999);
+ testSingleScore(result, 945, 1.88108120738508E-4);
+ }
+} \ No newline at end of file
diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/TestOnlineLOF.java b/test/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/TestOnlineLOF.java
index cd60a58f..889cddce 100644
--- a/test/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/TestOnlineLOF.java
+++ b/test/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/TestOnlineLOF.java
@@ -152,7 +152,7 @@ public class TestOnlineLOF implements JUnit4Test {
*/
private static UpdatableDatabase getDatabase() {
ListParameterization params = new ListParameterization();
- params.addParameter(FileBasedDatabaseConnection.INPUT_ID, dataset);
+ params.addParameter(FileBasedDatabaseConnection.Parameterizer.INPUT_ID, dataset);
UpdatableDatabase db = ClassGenericsUtil.parameterizeOrAbort(HashmapDatabase.class, params);
params.failOnErrors();
diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/TestSOD.java b/test/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/TestSOD.java
index c43f2f35..f8f47886 100644
--- a/test/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/TestSOD.java
+++ b/test/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/TestSOD.java
@@ -47,7 +47,7 @@ public class TestSOD extends AbstractSimpleAlgorithmTest implements JUnit4Test {
// Parameterization
ListParameterization params = new ListParameterization();
- params.addParameter(SOD.KNN_ID, 25);
+ params.addParameter(SOD.Parameterizer.KNN_ID, 25);
params.addParameter(SharedNearestNeighborPreprocessor.Factory.NUMBER_OF_NEIGHBORS_ID, 19);
// setup Algorithm
diff --git a/test/de/lmu/ifi/dbs/elki/database/TestRelationSorting.java b/test/de/lmu/ifi/dbs/elki/database/TestRelationSorting.java
index 01ec174c..2b77f2f5 100644
--- a/test/de/lmu/ifi/dbs/elki/database/TestRelationSorting.java
+++ b/test/de/lmu/ifi/dbs/elki/database/TestRelationSorting.java
@@ -56,7 +56,7 @@ public class TestRelationSorting implements JUnit4Test {
@Test
public void testSorting() {
ListParameterization params = new ListParameterization();
- params.addParameter(FileBasedDatabaseConnection.INPUT_ID, filename);
+ params.addParameter(FileBasedDatabaseConnection.Parameterizer.INPUT_ID, filename);
Database db = ClassGenericsUtil.parameterizeOrAbort(StaticArrayDatabase.class, params);
if (params.hasUnusedParameters()) {
fail("Unused parameters: " + params.getRemainingParameters());
diff --git a/test/de/lmu/ifi/dbs/elki/datasource/parser/TestTermFrequencyParser.java b/test/de/lmu/ifi/dbs/elki/datasource/parser/TestTermFrequencyParser.java
index bf5e2791..2bf288f7 100644
--- a/test/de/lmu/ifi/dbs/elki/datasource/parser/TestTermFrequencyParser.java
+++ b/test/de/lmu/ifi/dbs/elki/datasource/parser/TestTermFrequencyParser.java
@@ -62,14 +62,14 @@ public class TestTermFrequencyParser implements JUnit4Test {
@Test
public void testDBLPData() {
ListParameterization config = new ListParameterization();
- config.addParameter(AbstractDatabaseConnection.PARSER_ID, TermFrequencyParser.class);
- config.addParameter(FileBasedDatabaseConnection.INPUT_ID, DBLP_DATA);
+ config.addParameter(AbstractDatabaseConnection.Parameterizer.PARSER_ID, TermFrequencyParser.class);
+ config.addParameter(FileBasedDatabaseConnection.Parameterizer.INPUT_ID, DBLP_DATA);
ArrayList<Object> filters = new ArrayList<>();
filters.add(TFIDFNormalization.class);
- // Note: this filter is needed for the non-sparse euclidean distance below.
+ // Note: this filter is needed for the non-sparse Euclidean distance below.
filters.add(SparseVectorFieldFilter.class);
- config.addParameter(AbstractDatabaseConnection.FILTERS_ID, filters);
+ config.addParameter(AbstractDatabaseConnection.Parameterizer.FILTERS_ID, filters);
Database db = ClassGenericsUtil.parameterizeOrAbort(StaticArrayDatabase.class, config);
diff --git a/test/de/lmu/ifi/dbs/elki/datasource/parser/TestTokenizer.java b/test/de/lmu/ifi/dbs/elki/datasource/parser/TestTokenizer.java
new file mode 100644
index 00000000..34ccdc04
--- /dev/null
+++ b/test/de/lmu/ifi/dbs/elki/datasource/parser/TestTokenizer.java
@@ -0,0 +1,133 @@
+package de.lmu.ifi.dbs.elki.datasource.parser;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.util.regex.Pattern;
+
+import org.junit.Test;
+
+import de.lmu.ifi.dbs.elki.JUnit4Test;
+
+/**
+ * Simple unit test for testing the new tokenizer
+ *
+ * TODO: add more test cases, refactor into input, expected-output pattern.
+ *
+ * @author Erich Schubert
+ */
+public class TestTokenizer implements JUnit4Test {
+ Tokenizer t = new Tokenizer(Pattern.compile("\\s"), "\"'");
+
+ @Test
+ public void testSimple() {
+ final String input = "1 -234 3.1415 - banana";
+ final Object[] expect = { 1L, -234L, 3.1415, "-", "banana" };
+ t.initialize(input, 0, input.length());
+ tokenizerTest(expect);
+ }
+
+ @Test
+ public void testQuotes() {
+ final String input = "'this is' \"a test\" '123' '123 456' \"bana' na\"";
+ final Object[] expect = { "this is", "a test", 123L, "123 456", "bana' na" };
+ t.initialize(input, 0, input.length());
+ tokenizerTest(expect);
+ }
+
+ @Test
+ public void testSpecials() {
+ final String input = "nan inf -∞ NaN infinity NA";
+ final Object[] expect = { Double.NaN, Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY, Double.NaN, Double.POSITIVE_INFINITY, Double.NaN };
+ t.initialize(input, 0, input.length());
+ tokenizerTest(expect);
+ }
+
+ @Test
+ public void testEmpty() {
+ final String input = "";
+ final Object[] expect = {};
+ t.initialize(input, 0, input.length());
+ tokenizerTest(expect);
+ }
+
+ @Test
+ public void testLineEnd() {
+ final String input = "1 ";
+ final Object[] expect = { 1L };
+ t.initialize(input, 0, input.length());
+ tokenizerTest(expect);
+ }
+
+ @Test
+ public void testPartial() {
+ final String input = "abc1def";
+ final Object[] expect = { 1L };
+ t.initialize(input, 3, 4);
+ tokenizerTest(expect);
+ }
+
+ private void tokenizerTest(Object[] expect) {
+ for(int i = 0; i < expect.length; i++, t.advance()) {
+ assertTrue("Tokenizer stopped early.", t.valid());
+ Object e = expect[i];
+ // Negative tests first:
+ if(e instanceof String || e instanceof Double) {
+ try {
+ long val = t.getLongBase10();
+ fail("The value " + t.getSubstring() + " was expected to be not parseable as long integer, but returned: " + val);
+ }
+ catch(Exception ex) {
+ // pass. this is expected to fail.
+ }
+ }
+ if(e instanceof String) {
+ try {
+ double val = t.getDouble();
+ fail("The value " + t.getSubstring() + " was expected to be not parseable as double, but returned: " + val);
+ }
+ catch(Exception ex) {
+ // pass. this is expected to fail.
+ }
+ }
+ // Positive tests:
+ if(e instanceof Long) {
+ assertEquals("Long parsing failed.", (long) e, t.getLongBase10());
+ }
+ if(e instanceof Double) {
+ // Note: this also works for NaNs, they are treated special.
+ assertEquals("Double parsing failed.", (double) e, t.getDouble(), Double.MIN_VALUE);
+ }
+ if(e instanceof String) {
+ assertEquals("String parsing failed.", (String) e, t.getSubstring());
+ }
+ }
+ if(t.valid()) {
+ assertTrue("Spurous data after expected end: " + t.getSubstring(), !t.valid());
+ }
+ }
+}
diff --git a/test/de/lmu/ifi/dbs/elki/evaluation/paircounting/TestClusterContingencyTable.java b/test/de/lmu/ifi/dbs/elki/evaluation/paircounting/TestClusterContingencyTable.java
index 42188f56..bc83e534 100644
--- a/test/de/lmu/ifi/dbs/elki/evaluation/paircounting/TestClusterContingencyTable.java
+++ b/test/de/lmu/ifi/dbs/elki/evaluation/paircounting/TestClusterContingencyTable.java
@@ -67,7 +67,7 @@ public class TestClusterContingencyTable implements JUnit4Test {
public void testCompareDatabases() {
ListParameterization params = new ListParameterization();
// Input
- params.addParameter(FileBasedDatabaseConnection.INPUT_ID, dataset);
+ params.addParameter(FileBasedDatabaseConnection.Parameterizer.INPUT_ID, dataset);
// get database
Database db = ClassGenericsUtil.parameterizeOrAbort(StaticArrayDatabase.class, params);
diff --git a/test/de/lmu/ifi/dbs/elki/index/TestIndexStructures.java b/test/de/lmu/ifi/dbs/elki/index/TestIndexStructures.java
index 1623bcfa..b412e409 100644
--- a/test/de/lmu/ifi/dbs/elki/index/TestIndexStructures.java
+++ b/test/de/lmu/ifi/dbs/elki/index/TestIndexStructures.java
@@ -37,9 +37,9 @@ import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDList;
import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.DoubleOptimizedDistanceKNNQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.query.knn.LinearScanKNNQuery;
-import de.lmu.ifi.dbs.elki.database.query.range.LinearScanRangeQuery;
+import de.lmu.ifi.dbs.elki.database.query.range.DoubleOptimizedDistanceRangeQuery;
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.datasource.FileBasedDatabaseConnection;
@@ -98,7 +98,7 @@ public class TestIndexStructures implements JUnit4Test {
@Test
public void testExact() {
ListParameterization params = new ListParameterization();
- testFileBasedDatabaseConnection(params, LinearScanKNNQuery.class, LinearScanRangeQuery.class);
+ testFileBasedDatabaseConnection(params, DoubleOptimizedDistanceKNNQuery.class, DoubleOptimizedDistanceRangeQuery.class);
}
/**
@@ -107,7 +107,7 @@ public class TestIndexStructures implements JUnit4Test {
@Test
public void testMetrical() {
ListParameterization metparams = new ListParameterization();
- metparams.addParameter(StaticArrayDatabase.INDEX_ID, MTreeFactory.class);
+ metparams.addParameter(StaticArrayDatabase.Parameterizer.INDEX_ID, MTreeFactory.class);
metparams.addParameter(AbstractPageFileFactory.Parameterizer.PAGE_SIZE_ID, 300);
testFileBasedDatabaseConnection(metparams, DoubleDistanceMetricalIndexKNNQuery.class, DoubleDistanceMetricalIndexRangeQuery.class);
}
@@ -118,7 +118,7 @@ public class TestIndexStructures implements JUnit4Test {
@Test
public void testRStarTree() {
ListParameterization spatparams = new ListParameterization();
- spatparams.addParameter(StaticArrayDatabase.INDEX_ID, RStarTreeFactory.class);
+ spatparams.addParameter(StaticArrayDatabase.Parameterizer.INDEX_ID, RStarTreeFactory.class);
spatparams.addParameter(AbstractPageFileFactory.Parameterizer.PAGE_SIZE_ID, 300);
testFileBasedDatabaseConnection(spatparams, DoubleDistanceRStarTreeKNNQuery.class, DoubleDistanceRStarTreeRangeQuery.class);
}
@@ -129,7 +129,7 @@ public class TestIndexStructures implements JUnit4Test {
@Test
public void testVAFile() {
ListParameterization spatparams = new ListParameterization();
- spatparams.addParameter(StaticArrayDatabase.INDEX_ID, VAFile.Factory.class);
+ spatparams.addParameter(StaticArrayDatabase.Parameterizer.INDEX_ID, VAFile.Factory.class);
spatparams.addParameter(VAFile.Factory.PARTITIONS_ID, 4);
testFileBasedDatabaseConnection(spatparams, VAFile.VAFileKNNQuery.class, VAFile.VAFileRangeQuery.class);
}
@@ -140,7 +140,7 @@ public class TestIndexStructures implements JUnit4Test {
@Test
public void testPartialVAFile() {
ListParameterization spatparams = new ListParameterization();
- spatparams.addParameter(StaticArrayDatabase.INDEX_ID, PartialVAFile.Factory.class);
+ spatparams.addParameter(StaticArrayDatabase.Parameterizer.INDEX_ID, PartialVAFile.Factory.class);
spatparams.addParameter(PartialVAFile.Factory.PARTITIONS_ID, 4);
testFileBasedDatabaseConnection(spatparams, PartialVAFile.PartialVAFileKNNQuery.class, PartialVAFile.PartialVAFileRangeQuery.class);
}
@@ -153,7 +153,7 @@ public class TestIndexStructures implements JUnit4Test {
@Test
public void testRStarTreeFast() {
ListParameterization spatparams = new ListParameterization();
- spatparams.addParameter(StaticArrayDatabase.INDEX_ID, RStarTreeFactory.class);
+ spatparams.addParameter(StaticArrayDatabase.Parameterizer.INDEX_ID, RStarTreeFactory.class);
spatparams.addParameter(AbstractRStarTreeFactory.Parameterizer.INSERTION_STRATEGY_ID, ApproximativeLeastOverlapInsertionStrategy.class);
spatparams.addParameter(ApproximativeLeastOverlapInsertionStrategy.Parameterizer.INSERTION_CANDIDATES_ID, 1);
spatparams.addParameter(AbstractPageFileFactory.Parameterizer.PAGE_SIZE_ID, 300);
@@ -180,7 +180,7 @@ public class TestIndexStructures implements JUnit4Test {
* @param inputparams
*/
void testFileBasedDatabaseConnection(ListParameterization inputparams, Class<?> expectKNNQuery, Class<?> expectRangeQuery) {
- inputparams.addParameter(FileBasedDatabaseConnection.INPUT_ID, dataset);
+ inputparams.addParameter(FileBasedDatabaseConnection.Parameterizer.INPUT_ID, dataset);
// get database
Database db = ClassGenericsUtil.parameterizeOrAbort(StaticArrayDatabase.class, inputparams);
@@ -195,13 +195,13 @@ public class TestIndexStructures implements JUnit4Test {
// get the 10 next neighbors
DoubleVector dv = new DoubleVector(querypoint);
KNNQuery<DoubleVector, DoubleDistance> knnq = db.getKNNQuery(dist, k);
- assertTrue("Returned knn query is not of expected class.", expectKNNQuery.isAssignableFrom(knnq.getClass()));
+ assertTrue("Returned knn query is not of expected class: expected " + expectKNNQuery + " got " + knnq.getClass(), expectKNNQuery.isAssignableFrom(knnq.getClass()));
KNNList<DoubleDistance> ids = knnq.getKNNForObject(dv, k);
assertEquals("Result size does not match expectation!", shouldd.length, ids.size());
// verify that the neighbors match.
int i = 0;
- for (DistanceDBIDListIter<DoubleDistance> res = ids.iter(); res.valid(); res.advance(), i++) {
+ for(DistanceDBIDListIter<DoubleDistance> res = ids.iter(); res.valid(); res.advance(), i++) {
// Verify distance
assertEquals("Expected distance doesn't match.", shouldd[i], res.getDistance().doubleValue());
// verify vector
@@ -214,13 +214,13 @@ public class TestIndexStructures implements JUnit4Test {
// Do a range query
DoubleVector dv = new DoubleVector(querypoint);
RangeQuery<DoubleVector, DoubleDistance> rangeq = db.getRangeQuery(dist, eps);
- assertTrue("Returned range query is not of expected class.", expectRangeQuery.isAssignableFrom(rangeq.getClass()));
+ assertTrue("Returned range query is not of expected class: expected " + expectRangeQuery + " got " + rangeq.getClass(), expectRangeQuery.isAssignableFrom(rangeq.getClass()));
DistanceDBIDList<DoubleDistance> ids = rangeq.getRangeForObject(dv, eps);
assertEquals("Result size does not match expectation!", shouldd.length, ids.size());
// verify that the neighbors match.
int i = 0;
- for (DistanceDBIDListIter<DoubleDistance> res = ids.iter(); res.valid(); res.advance(), i++) {
+ for(DistanceDBIDListIter<DoubleDistance> res = ids.iter(); res.valid(); res.advance(), i++) {
// Verify distance
assertEquals("Expected distance doesn't match.", shouldd[i], res.getDistance().doubleValue());
// verify vector
diff --git a/test/de/lmu/ifi/dbs/elki/index/preprocessed/TestMaterializedKNNAndRKNNPreprocessor.java b/test/de/lmu/ifi/dbs/elki/index/preprocessed/TestMaterializedKNNAndRKNNPreprocessor.java
index 68202213..10bd8a1e 100644
--- a/test/de/lmu/ifi/dbs/elki/index/preprocessed/TestMaterializedKNNAndRKNNPreprocessor.java
+++ b/test/de/lmu/ifi/dbs/elki/index/preprocessed/TestMaterializedKNNAndRKNNPreprocessor.java
@@ -47,7 +47,7 @@ import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.query.knn.LinearScanKNNQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.LinearScanDistanceKNNQuery;
import de.lmu.ifi.dbs.elki.database.query.rknn.LinearScanRKNNQuery;
import de.lmu.ifi.dbs.elki.database.query.rknn.RKNNQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
@@ -98,7 +98,7 @@ public class TestMaterializedKNNAndRKNNPreprocessor implements JUnit4Test {
@Test
public void testPreprocessor() throws ParameterException, UnableToComplyException {
ListParameterization params = new ListParameterization();
- params.addParameter(FileBasedDatabaseConnection.INPUT_ID, dataset);
+ params.addParameter(FileBasedDatabaseConnection.Parameterizer.INPUT_ID, dataset);
// get database
UpdatableDatabase db = ClassGenericsUtil.parameterizeOrAbort(HashmapDatabase.class, params);
@@ -110,7 +110,7 @@ public class TestMaterializedKNNAndRKNNPreprocessor implements JUnit4Test {
assertEquals("Data set size doesn't match parameters.", shoulds, rep.size());
// get linear queries
- LinearScanKNNQuery<DoubleVector, DoubleDistance> lin_knn_query = new LinearScanKNNQuery<>(distanceQuery);
+ LinearScanDistanceKNNQuery<DoubleVector, DoubleDistance> lin_knn_query = new LinearScanDistanceKNNQuery<>(distanceQuery);
LinearScanRKNNQuery<DoubleVector, DoubleDistance> lin_rknn_query = new LinearScanRKNNQuery<>(distanceQuery, lin_knn_query, k);
// get preprocessed queries
@@ -122,8 +122,8 @@ public class TestMaterializedKNNAndRKNNPreprocessor implements JUnit4Test {
RKNNQuery<DoubleVector, DoubleDistance> preproc_rknn_query = preproc.getRKNNQuery(distanceQuery);
// add as index
db.addIndex(preproc);
- assertTrue("Preprocessor knn query class incorrect.", !(preproc_knn_query instanceof LinearScanKNNQuery));
- assertTrue("Preprocessor rknn query class incorrect.", !(preproc_rknn_query instanceof LinearScanKNNQuery));
+ assertTrue("Preprocessor knn query class incorrect.", !(preproc_knn_query instanceof LinearScanDistanceKNNQuery));
+ assertTrue("Preprocessor rknn query class incorrect.", !(preproc_rknn_query instanceof LinearScanDistanceKNNQuery));
// test queries
testKNNQueries(rep, lin_knn_query, preproc_knn_query, k);
diff --git a/test/de/lmu/ifi/dbs/elki/math/TestKernelDensityFitting.java b/test/de/lmu/ifi/dbs/elki/math/TestKernelDensityFitting.java
index 795b7c94..57129241 100644
--- a/test/de/lmu/ifi/dbs/elki/math/TestKernelDensityFitting.java
+++ b/test/de/lmu/ifi/dbs/elki/math/TestKernelDensityFitting.java
@@ -75,7 +75,7 @@ public class TestKernelDensityFitting implements JUnit4Test {
public final void testFitDoubleArray() {
ListParameterization config = new ListParameterization();
// Input
- config.addParameter(FileBasedDatabaseConnection.INPUT_ID, dataset);
+ config.addParameter(FileBasedDatabaseConnection.Parameterizer.INPUT_ID, dataset);
// This data was generated with a mean of 0.0 and stddev 1.23,
// get database
diff --git a/test/de/lmu/ifi/dbs/elki/math/TestWeightFunctions.java b/test/de/lmu/ifi/dbs/elki/math/TestWeightFunctions.java
index c24bab2e..d2a71615 100644
--- a/test/de/lmu/ifi/dbs/elki/math/TestWeightFunctions.java
+++ b/test/de/lmu/ifi/dbs/elki/math/TestWeightFunctions.java
@@ -53,7 +53,7 @@ public class TestWeightFunctions implements JUnit4Test {
double[] at0 = { 1.0, 1.0, 1.0, 1.0, 0.3989422804014327, 1.0, 1.0 };
double[] at01 = { 1.0, 0.8693490686884612, 0.920344325445942, 0.9772372209558107, 0.3969525474770118, 0.91, 0.7943282347242815 };
double[] at09 = { 1.0, 0.13877499454059491, 0.36812025069351895, 0.15488166189124816, 0.2660852498987548, 0.18999999999999995, 0.12589254117941673 };
- double[] at10 = { 1.0, 0.10000000000000016, 0.317310507862914, 0.10000000000000002, 0.24197072451914337, 0.09999999999999998, 0.10000000000000002 };
+ double[] at10 = { 1.0, 0.10000000000000016, 0.31731050786291404, 0.10000000000000002, 0.24197072451914337, 0.09999999999999998, 0.10000000000000002 };
assert (wf.length == at0.length);
assert (wf.length == at01.length);
@@ -65,10 +65,10 @@ public class TestWeightFunctions implements JUnit4Test {
double val01 = wf[i].getWeight(0.1, 1, 1);
double val09 = wf[i].getWeight(0.9, 1, 1);
double val10 = wf[i].getWeight(1.0, 1, 1);
- assertEquals(wf[i].getClass().getSimpleName() + " at 0.0", at0[i], val0, Double.MIN_VALUE);
- assertEquals(wf[i].getClass().getSimpleName() + " at 0.1", at01[i], val01, Double.MIN_VALUE);
- assertEquals(wf[i].getClass().getSimpleName() + " at 0.9", at09[i], val09, Double.MIN_VALUE);
- assertEquals(wf[i].getClass().getSimpleName() + " at 1.0", at10[i], val10, Double.MIN_VALUE);
+ assertEquals(wf[i].getClass().getSimpleName() + " at 0.0", at0[i], val0, 1e-15);
+ assertEquals(wf[i].getClass().getSimpleName() + " at 0.1", at01[i], val01, 1e-15);
+ assertEquals(wf[i].getClass().getSimpleName() + " at 0.9", at09[i], val09, 1e-15);
+ assertEquals(wf[i].getClass().getSimpleName() + " at 1.0", at10[i], val10, 1e-15);
}
}
diff --git a/test/de/lmu/ifi/dbs/elki/math/statistics/distribution/AbstractDistributionTest.java b/test/de/lmu/ifi/dbs/elki/math/statistics/distribution/AbstractDistributionTest.java
index 3e613e4a..63a19072 100644
--- a/test/de/lmu/ifi/dbs/elki/math/statistics/distribution/AbstractDistributionTest.java
+++ b/test/de/lmu/ifi/dbs/elki/math/statistics/distribution/AbstractDistributionTest.java
@@ -10,7 +10,7 @@ import static org.junit.Assert.assertTrue;
*/
public class AbstractDistributionTest {
public void checkPDF(Distribution d, double[] x, double[] expected, double err) {
- int maxerrlev = Integer.MIN_VALUE;
+ int maxerrlev = -15;
for(int i = 0; i < x.length; i++) {
double val = d.pdf(x[i]);
if(val == expected[i]) {
@@ -25,14 +25,11 @@ public class AbstractDistributionTest {
assertEquals("Error magnitude: 1e" + errlev + " at " + x[i], expected[i], val, err);
}
int given = (int) Math.floor(Math.log10(err * 1.1));
- // if (given > maxerrlev) {
- // System.err.println("PDF Error for "+d+" magnitude is not tight: expected "+maxerrlev+" got "+given);
- // }
- assertTrue("Error magnitude is not tight: expected " + maxerrlev + " got " + given, given <= maxerrlev);
+ assertTrue("Error magnitude is not tight: measured " + maxerrlev + " specified " + given, given <= maxerrlev);
}
public void checkCDF(Distribution d, double[] x, double[] expected, double err) {
- int maxerrlev = Integer.MIN_VALUE;
+ int maxerrlev = -15;
for(int i = 0; i < x.length; i++) {
double val = d.cdf(x[i]);
if(val == expected[i]) {
@@ -47,16 +44,13 @@ public class AbstractDistributionTest {
assertEquals("Error magnitude: 1e" + errlev + " at " + x[i], expected[i], val, err);
}
int given = (int) Math.floor(Math.log10(err * 1.1));
- // if (given > maxerrlev) {
- // System.err.println("CDF Error for "+d+" magnitude is not tight: expected "+maxerrlev+" got "+given);
- // }
- assertTrue("Error magnitude is not tight: expected " + maxerrlev + " got " + given, given <= maxerrlev);
+ assertTrue("Error magnitude is not tight: measured " + maxerrlev + " specified " + given, given <= maxerrlev);
}
public void checkQuantile(Distribution d, double[] x, double[] expected, double err) {
- int maxerrlev = Integer.MIN_VALUE;
+ int maxerrlev = -15;
for(int i = 0; i < x.length; i++) {
- if (Double.isNaN(expected[i])) {
+ if(Double.isNaN(expected[i])) {
continue;
}
double val = d.quantile(x[i]);
@@ -72,9 +66,6 @@ public class AbstractDistributionTest {
assertEquals("Error magnitude: 1e" + errlev + " at " + x[i], expected[i], val, err);
}
int given = (int) Math.floor(Math.log10(err * 1.1));
- // if (given > maxerrlev) {
- // System.err.println("Probit Error for "+d+" magnitude is not tight: expected "+maxerrlev+" got "+given);
- // }
- assertTrue("Error magnitude is not tight: expected " + maxerrlev + " got " + given, given <= maxerrlev);
+ assertTrue("Error magnitude is not tight: measured " + maxerrlev + " specified " + given, given <= maxerrlev);
}
} \ No newline at end of file
diff --git a/test/de/lmu/ifi/dbs/elki/utilities/TestFormatUtil.java b/test/de/lmu/ifi/dbs/elki/utilities/TestFormatUtil.java
new file mode 100644
index 00000000..93a739ba
--- /dev/null
+++ b/test/de/lmu/ifi/dbs/elki/utilities/TestFormatUtil.java
@@ -0,0 +1,84 @@
+package de.lmu.ifi.dbs.elki.utilities;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import org.junit.Test;
+
+import de.lmu.ifi.dbs.elki.JUnit4Test;
+
+public class TestFormatUtil implements JUnit4Test {
+ @Test
+ public void testParseDouble() {
+ assertEquals(0., FormatUtil.parseDouble("0"), 0.);
+ assertEquals(1., FormatUtil.parseDouble("1"), 0.);
+ assertEquals(-1., FormatUtil.parseDouble("-1"), 0.);
+ assertEquals(0., FormatUtil.parseDouble("0.0"), 0.);
+ assertEquals(1., FormatUtil.parseDouble("1.0"), 0.);
+ assertEquals(-1., FormatUtil.parseDouble("-1.0"), 0.);
+ assertEquals(.2, FormatUtil.parseDouble("0.2"), 0.);
+ assertEquals(-.2, FormatUtil.parseDouble("-0.2"), 0.);
+ assertEquals(.2, FormatUtil.parseDouble(".2"), 0.);
+ assertEquals(-.2, FormatUtil.parseDouble("-.2"), 0.);
+ assertEquals(2000., FormatUtil.parseDouble("2.0e3"), 0.);
+ assertEquals(2000., FormatUtil.parseDouble("2.0E3"), 0.);
+ assertEquals(-2000., FormatUtil.parseDouble("-2.0e3"), 0.);
+ assertEquals(-2000., FormatUtil.parseDouble("-2.0E3"), 0.);
+ assertEquals(.002, FormatUtil.parseDouble("2.0e-3"), 0.);
+ assertEquals(.002, FormatUtil.parseDouble("2.0E-3"), 0.);
+ assertEquals(-.002, FormatUtil.parseDouble("-2.0e-3"), 0.);
+ assertEquals(-.002, FormatUtil.parseDouble("-2.0E-3"), 0.);
+
+ // Case where the JDK had a serious bug, in a few variations
+ assertEquals(2.2250738585072012e-308, FormatUtil.parseDouble("2.2250738585072012e-308"), 0.);
+ assertEquals(0.00022250738585072012e-304, FormatUtil.parseDouble("0.00022250738585072012e-304"), 0.);
+ assertEquals(00000000002.2250738585072012e-308, FormatUtil.parseDouble("00000000002.2250738585072012e-308"), 0.);
+ assertEquals(2.2250738585072012e-00308, FormatUtil.parseDouble("2.2250738585072012e-00308"), 0.);
+
+ assertTrue(Double.POSITIVE_INFINITY == FormatUtil.parseDouble("inf"));
+ assertTrue(Double.NEGATIVE_INFINITY == FormatUtil.parseDouble("-inf"));
+ assertTrue(Double.POSITIVE_INFINITY == FormatUtil.parseDouble("∞"));
+ assertTrue(Double.NEGATIVE_INFINITY == FormatUtil.parseDouble("-∞"));
+ assertTrue(Double.isNaN(FormatUtil.parseDouble("nan")));
+
+ assertEquals(1, FormatUtil.parseDouble("+1"), 0.);
+ }
+
+ @Test(expected = NumberFormatException.class)
+ public void textOnlyPlus() {
+ FormatUtil.parseDouble("+");
+ }
+
+ @Test(expected = NumberFormatException.class)
+ public void textExtraCharacer() {
+ FormatUtil.parseDouble("123Banana");
+ }
+
+ @Test(expected = NumberFormatException.class)
+ public void textTooManyDigits() {
+ FormatUtil.parseDouble("123456789012345678901234567890");
+ }
+}