summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/algorithm
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/algorithm')
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/APRIORI.java20
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/AbstractAlgorithm.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/AbstractDistanceBasedAlgorithm.java15
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/AbstractPrimitiveDistanceBasedAlgorithm.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/Algorithm.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/DependencyDerivator.java12
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/DistanceBasedAlgorithm.java49
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/DummyAlgorithm.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/KNNDistanceOrder.java12
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/KNNJoin.java30
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/MaterializeDistances.java10
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/NullAlgorithm.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/benchmark/KNNBenchmarkAlgorithm.java51
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/benchmark/RangeQueryBenchmarkAlgorithm.java28
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/benchmark/ValidateApproximativeKNNIndex.java430
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/benchmark/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedClustering.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedDBSCAN.java30
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/CanopyPreClustering.java236
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/ClusteringAlgorithm.java3
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/DBSCAN.java20
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/DeLiClu.java29
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/EM.java54
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/NaiveMeanShiftClustering.java28
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICS.java38
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICSTypeAlgorithm.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICSXi.java33
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/SLINK.java818
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/SNNClustering.java14
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/CASH.java57
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/COPAC.java22
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ERiC.java178
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/FourC.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/HiCO.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/LMCLUS.java12
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ORCLUS.java34
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/CASHInterval.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/CASHIntervalSplit.java10
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/ParameterizationFunction.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/CorePredicate.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/EpsilonNeighborPredicate.java20
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/GeneralizedDBSCAN.java173
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/MinPtsCorePredicate.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/NeighborPredicate.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/CentroidLinkageMethod.java84
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/CompleteLinkageMethod.java70
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/ExtractFlatClusteringFromHierarchy.java854
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/GroupAverageLinkageMethod.java82
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/HierarchicalClusteringAlgorithm.java51
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/LinkageMethod.java56
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/MedianLinkageMethod.java80
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/NaiveAgglomerativeHierarchicalClustering.java303
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/PointerHierarchyRepresentationResult.java97
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/SLINK.java368
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/SingleLinkageMethod.java80
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/WardLinkageMethod.java86
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/WeightedAverageLinkageMethod.java84
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeans.java182
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeansInitialization.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/BestOfMultipleKMeans.java219
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/FarthestPointsInitialMeans.java186
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/FirstKInitialMeans.java9
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeans.java46
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansBisecting.java231
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansInitialization.java12
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansLloyd.java86
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansMacQueen.java88
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansPlusPlusInitialMeans.java16
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMediansLloyd.java76
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsEM.java22
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsInitialization.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsPAM.java22
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/PAMInitialMeans.java12
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyChosenInitialMeans.java11
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyGeneratedInitialMeans.java9
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/SampleKMeansInitialization.java160
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/KMeansQualityMeasure.java54
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/WithinClusterMeanDistanceQualityMeasure.java89
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/WithinClusterVarianceQualityMeasure.java83
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/package-info.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/CLIQUE.java30
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DiSH.java74
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/HiSC.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PROCLUS.java214
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PreDeCon.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SUBCLU.java140
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SubspaceClusteringAlgorithm.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUESubspace.java12
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUEUnit.java12
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelClustering.java12
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelHierarchicalClustering.java65
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelOrAllInOneClustering.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByModelClustering.java10
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/TrivialAllInOne.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/TrivialAllNoise.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/ABOD.java67
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractAggarwalYuOutlier.java10
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractDBOutlier.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuEvolutionary.java168
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuNaive.java14
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/COP.java70
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierDetection.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierScore.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/EMOutlier.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianModel.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianUniformMixture.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/HilOut.java287
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNOutlier.java12
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNWeightOutlier.java18
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/ODIN.java192
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/OPTICSOF.java18
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/OutlierAlgorithm.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/ReferenceBasedOutlierDetection.java30
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleCOP.java22
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/ALOCI.java (renamed from src/de/lmu/ifi/dbs/elki/algorithm/outlier/ALOCI.java)15
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/FlexibleLOF.java (renamed from src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOF.java)204
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/INFLO.java (renamed from src/de/lmu/ifi/dbs/elki/algorithm/outlier/INFLO.java)13
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDF.java (renamed from src/de/lmu/ifi/dbs/elki/algorithm/outlier/LDF.java)72
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDOF.java (renamed from src/de/lmu/ifi/dbs/elki/algorithm/outlier/LDOF.java)19
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOCI.java (renamed from src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOCI.java)29
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOF.java293
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LoOP.java (renamed from src/de/lmu/ifi/dbs/elki/algorithm/outlier/LoOP.java)37
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/OnlineLOF.java (renamed from src/de/lmu/ifi/dbs/elki/algorithm/outlier/OnlineLOF.java)242
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimpleKernelDensityLOF.java (renamed from src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleKernelDensityLOF.java)43
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimplifiedLOF.java (renamed from src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleLOF.java)57
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/package-info.java27
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/ExternalDoubleOutlierScore.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java18
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/HiCS.java36
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/RescaleMetaOutlierAlgorithm.java9
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/SimpleOutlierEnsemble.java11
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractDistanceBasedSpatialOutlier.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractNeighborhoodOutlier.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuGLSBackwardSearchAlgorithm.java28
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMeanMultipleAttributes.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianAlgorithm.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianMultipleAttributes.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMoranScatterplotOutlier.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuRandomWalkEC.java9
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuScatterplotOutlier.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuZTestOutlier.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SLOM.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SOF.java8
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/TrimmedMeanApproach.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/AbstractPrecomputedNeighborhood.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExtendedNeighborhood.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExternalNeighborhood.java19
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/NeighborSetPredicate.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/PrecomputedKNearestNeighborNeighborhood.java10
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/LinearWeightedExtendedNeighborhood.java8
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/UnweightedNeighborhoodAdapter.java8
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/WeightedNeighborSetPredicate.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OUTRES.java42
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OutRankS1.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/SOD.java12
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/ByLabelOutlier.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAllOutlier.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAverageCoordinateOutlier.java98
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialGeneratedOutlier.java79
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialNoOutlier.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/statistics/AddSingleScale.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/statistics/AveragePrecisionAtK.java96
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/statistics/DistanceStatisticsWithClasses.java18
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/statistics/EvaluateRankingQuality.java22
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/statistics/RankingQualityHistogram.java12
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/statistics/package-info.java2
182 files changed, 6852 insertions, 2831 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/APRIORI.java b/src/de/lmu/ifi/dbs/elki/algorithm/APRIORI.java
index 70706df8..07aaf3fc 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/APRIORI.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/APRIORI.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -51,7 +51,6 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.OnlyOneIsAllowed
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Parameter;
/**
* Provides the APRIORI algorithm for Mining Association Rules.
@@ -128,8 +127,8 @@ public class APRIORI extends AbstractAlgorithm<AprioriResult> {
* @return the AprioriResult learned by this APRIORI
*/
public AprioriResult run(Database database, Relation<BitVector> relation) {
- Map<BitSet, Integer> support = new HashMap<BitSet, Integer>();
- List<BitSet> solution = new ArrayList<BitSet>();
+ Map<BitSet, Integer> support = new HashMap<>();
+ List<BitSet> solution = new ArrayList<>();
final int size = relation.size();
if(size > 0) {
int dim;
@@ -176,7 +175,7 @@ public class APRIORI extends AbstractAlgorithm<AprioriResult> {
* frequent already
*/
protected BitSet[] prune(Map<BitSet, Integer> support, BitSet[] candidates, int size) {
- List<BitSet> candidateList = new ArrayList<BitSet>();
+ List<BitSet> candidateList = new ArrayList<>();
// MinFreq pruning
if(minfreq >= 0) {
for(BitSet bitSet : candidates) {
@@ -229,7 +228,7 @@ public class APRIORI extends AbstractAlgorithm<AprioriResult> {
* increasing the length by 1
*/
protected BitSet[] join(BitSet[] frequentItemsets) {
- List<BitSet> joined = new ArrayList<BitSet>();
+ List<BitSet> joined = new ArrayList<>();
for(int i = 0; i < frequentItemsets.length; i++) {
for(int j = i + 1; j < frequentItemsets.length; j++) {
BitSet b1 = (BitSet) frequentItemsets[i].clone();
@@ -272,7 +271,7 @@ public class APRIORI extends AbstractAlgorithm<AprioriResult> {
}
}
}
- List<BitSet> frequentItemsets = new ArrayList<BitSet>();
+ List<BitSet> frequentItemsets = new ArrayList<>();
if(minfreq >= 0.0) {
// TODO: work with integers?
double critsupp = minfreq * database.size();
@@ -340,11 +339,8 @@ public class APRIORI extends AbstractAlgorithm<AprioriResult> {
}
// global parameter constraints
- ArrayList<Parameter<?>> globalConstraints = new ArrayList<Parameter<?>>();
- globalConstraints.add(minfreqP);
- globalConstraints.add(minsuppP);
- config.checkConstraint(new OnlyOneIsAllowedToBeSetGlobalConstraint(globalConstraints));
- config.checkConstraint(new OneMustBeSetGlobalConstraint(globalConstraints));
+ config.checkConstraint(new OnlyOneIsAllowedToBeSetGlobalConstraint(minfreqP, minsuppP));
+ config.checkConstraint(new OneMustBeSetGlobalConstraint(minfreqP, minsuppP));
}
@Override
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/AbstractAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/AbstractAlgorithm.java
index b36df094..68ac9595 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/AbstractAlgorithm.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/AbstractAlgorithm.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -177,6 +177,6 @@ public abstract class AbstractAlgorithm<R extends Result> implements Algorithm {
* @return Parameter object
*/
public static <F extends DistanceFunction<?, ?>> ObjectParameter<F> makeParameterDistanceFunction(Class<?> defaultDistanceFunction, Class<?> restriction) {
- return new ObjectParameter<F>(AbstractDistanceBasedAlgorithm.DISTANCE_FUNCTION_ID, restriction, defaultDistanceFunction);
+ return new ObjectParameter<>(AbstractDistanceBasedAlgorithm.DISTANCE_FUNCTION_ID, restriction, defaultDistanceFunction);
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/AbstractDistanceBasedAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/AbstractDistanceBasedAlgorithm.java
index e9d638dc..3420e279 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/AbstractDistanceBasedAlgorithm.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/AbstractDistanceBasedAlgorithm.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -24,11 +24,10 @@ package de.lmu.ifi.dbs.elki.algorithm;
*/
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.result.Result;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
@@ -45,15 +44,10 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
* @param <D> the type of Distance used by this Algorithm
* @param <R> the type of result to retrieve from this Algorithm
*/
-public abstract class AbstractDistanceBasedAlgorithm<O, D extends Distance<D>, R extends Result> extends AbstractAlgorithm<R> {
- /**
- * OptionID for {@link #DISTANCE_FUNCTION_ID}.
- */
- public static final OptionID DISTANCE_FUNCTION_ID = new OptionID("algorithm.distancefunction", "Distance function to determine the distance between database objects.");
-
+public abstract class AbstractDistanceBasedAlgorithm<O, D extends Distance<D>, R extends Result> extends AbstractAlgorithm<R> implements DistanceBasedAlgorithm<O, D> {
/**
* Holds the instance of the distance function specified by
- * {@link #DISTANCE_FUNCTION_ID}.
+ * {@link DistanceBasedAlgorithm#DISTANCE_FUNCTION_ID}.
*/
private DistanceFunction<? super O, D> distanceFunction;
@@ -72,6 +66,7 @@ public abstract class AbstractDistanceBasedAlgorithm<O, D extends Distance<D>, R
*
* @return the distanceFunction
*/
+ @Override
public DistanceFunction<? super O, D> getDistanceFunction() {
return distanceFunction;
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/AbstractPrimitiveDistanceBasedAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/AbstractPrimitiveDistanceBasedAlgorithm.java
index 7bca1931..40fe67c3 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/AbstractPrimitiveDistanceBasedAlgorithm.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/AbstractPrimitiveDistanceBasedAlgorithm.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -23,8 +23,8 @@ package de.lmu.ifi.dbs.elki.algorithm;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.result.Result;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
@@ -50,7 +50,7 @@ public abstract class AbstractPrimitiveDistanceBasedAlgorithm<O, D extends Dista
* Holds the instance of the distance function specified by
* {@link AbstractDistanceBasedAlgorithm#DISTANCE_FUNCTION_ID}.
*/
- private PrimitiveDistanceFunction<? super O, D> distanceFunction;
+ protected PrimitiveDistanceFunction<? super O, D> distanceFunction;
/**
* Constructor.
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/Algorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/Algorithm.java
index e5a4cc07..9b367db4 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/Algorithm.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/Algorithm.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/DependencyDerivator.java b/src/de/lmu/ifi/dbs/elki/algorithm/DependencyDerivator.java
index 2992ae4a..cc40d13b 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/DependencyDerivator.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/DependencyDerivator.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -33,10 +33,10 @@ import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid;
@@ -163,7 +163,7 @@ public class DependencyDerivator<V extends NumberVector<?>, D extends Distance<D
}
else {
DistanceQuery<V, D> distanceQuery = database.getDistanceQuery(relation, getDistanceFunction());
- KNNResult<D> queryResults = database.getKNNQuery(distanceQuery, this.sampleSize).getKNNForObject(centroidDV, this.sampleSize);
+ KNNList<D> queryResults = database.getKNNQuery(distanceQuery, this.sampleSize).getKNNForObject(centroidDV, this.sampleSize);
ids = DBIDUtil.newHashSet(queryResults);
}
}
@@ -210,7 +210,7 @@ public class DependencyDerivator<V extends NumberVector<?>, D extends Distance<D
// TODO: what if we don't have any weak eigenvectors?
if(weakEigenvectors.getColumnDimensionality() == 0) {
- sol = new CorrelationAnalysisSolution<V>(null, db, strongEigenvectors, weakEigenvectors, pcares.similarityMatrix(), centroid);
+ sol = new CorrelationAnalysisSolution<>(null, db, strongEigenvectors, weakEigenvectors, pcares.similarityMatrix(), centroid);
}
else {
Matrix transposedWeakEigenvectors = weakEigenvectors.transpose();
@@ -249,7 +249,7 @@ public class DependencyDerivator<V extends NumberVector<?>, D extends Distance<D
LinearEquationSystem lq = new LinearEquationSystem(a, b.getArrayRef());
lq.solveByTotalPivotSearch();
- sol = new CorrelationAnalysisSolution<V>(lq, db, strongEigenvectors, pcares.getWeakEigenvectors(), pcares.similarityMatrix(), centroid);
+ sol = new CorrelationAnalysisSolution<>(lq, db, strongEigenvectors, pcares.getWeakEigenvectors(), pcares.similarityMatrix(), centroid);
if(LOG.isDebuggingFine()) {
StringBuilder log = new StringBuilder();
@@ -331,7 +331,7 @@ public class DependencyDerivator<V extends NumberVector<?>, D extends Distance<D
nf.setMaximumFractionDigits(outputAccuracy);
nf.setMinimumFractionDigits(outputAccuracy);
- return new DependencyDerivator<V, D>(distanceFunction, nf, pca, sampleSize, randomSample);
+ return new DependencyDerivator<>(distanceFunction, nf, pca, sampleSize, randomSample);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/DistanceBasedAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/DistanceBasedAlgorithm.java
new file mode 100644
index 00000000..048d2782
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/DistanceBasedAlgorithm.java
@@ -0,0 +1,49 @@
+package de.lmu.ifi.dbs.elki.algorithm;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+
+/**
+ * Very broad interface for distance based algorithms.
+ *
+ * @author Erich Schubert
+ *
+ * @param <O> Object type
+ * @param <D> Distance type
+ */
+public interface DistanceBasedAlgorithm<O, D extends Distance<?>> extends Algorithm {
+ /**
+ * OptionID for {@link #DISTANCE_FUNCTION_ID}.
+ */
+ public static final OptionID DISTANCE_FUNCTION_ID = new OptionID("algorithm.distancefunction", "Distance function to determine the distance between database objects.");
+
+ /**
+ * Returns the distanceFunction.
+ *
+ * @return the distanceFunction
+ */
+ DistanceFunction<? super O, D> getDistanceFunction();
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/DummyAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/DummyAlgorithm.java
index 0f871535..24de36ce 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/DummyAlgorithm.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/DummyAlgorithm.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -31,7 +31,7 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.result.Result;
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/KNNDistanceOrder.java b/src/de/lmu/ifi/dbs/elki/algorithm/KNNDistanceOrder.java
index 7e9ce77e..b696ed36 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/KNNDistanceOrder.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/KNNDistanceOrder.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -32,11 +32,11 @@ import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.result.KNNDistanceOrderResult;
@@ -114,15 +114,15 @@ public class KNNDistanceOrder<O, D extends Distance<D>> extends AbstractDistance
final KNNQuery<O, D> knnQuery = database.getKNNQuery(distanceQuery, k);
final Random random = new Random();
- List<D> knnDistances = new ArrayList<D>(relation.size());
+ List<D> knnDistances = new ArrayList<>(relation.size());
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
if(random.nextDouble() < percentage) {
- final KNNResult<D> neighbors = knnQuery.getKNNForDBID(iditer, k);
+ final KNNList<D> neighbors = knnQuery.getKNNForDBID(iditer, k);
knnDistances.add(neighbors.getKNNDistance());
}
}
Collections.sort(knnDistances, Collections.reverseOrder());
- return new KNNDistanceOrderResult<D>("kNN distance order", "knn-order", knnDistances);
+ return new KNNDistanceOrderResult<>("kNN distance order", "knn-order", knnDistances);
}
@Override
@@ -179,7 +179,7 @@ public class KNNDistanceOrder<O, D extends Distance<D>> extends AbstractDistance
@Override
protected KNNDistanceOrder<O, D> makeInstance() {
- return new KNNDistanceOrder<O, D>(distanceFunction, k, percentage);
+ return new KNNDistanceOrder<>(distanceFunction, k, percentage);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/KNNJoin.java b/src/de/lmu/ifi/dbs/elki/algorithm/KNNJoin.java
index 9e73d959..dddd8fdb 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/KNNJoin.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/KNNJoin.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -36,16 +36,16 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNHeap;
+import de.lmu.ifi.dbs.elki.database.ids.distance.KNNHeap;
+import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.DistanceUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.SpatialPrimitiveDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.SpatialPrimitiveDoubleDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceKNNHeap;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNHeap;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNUtil;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.index.tree.LeafEntry;
import de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialEntry;
@@ -56,7 +56,7 @@ import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress;
import de.lmu.ifi.dbs.elki.result.ResultUtil;
-import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ComparableMinHeap;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
@@ -84,7 +84,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
*/
@Title("K-Nearest Neighbor Join")
@Description("Algorithm to find the k-nearest neighbors of each object in a spatial database")
-public class KNNJoin<V extends NumberVector<?>, D extends Distance<D>, N extends SpatialNode<N, E>, E extends SpatialEntry> extends AbstractDistanceBasedAlgorithm<V, D, DataStore<KNNResult<D>>> {
+public class KNNJoin<V extends NumberVector<?>, D extends Distance<D>, N extends SpatialNode<N, E>, E extends SpatialEntry> extends AbstractDistanceBasedAlgorithm<V, D, DataStore<KNNList<D>>> {
/**
* The logger for this class.
*/
@@ -120,7 +120,7 @@ public class KNNJoin<V extends NumberVector<?>, D extends Distance<D>, N extends
* @return result
*/
@SuppressWarnings("unchecked")
- public WritableDataStore<KNNResult<D>> run(Database database, Relation<V> relation) {
+ public WritableDataStore<KNNList<D>> run(Database database, Relation<V> relation) {
if (!(getDistanceFunction() instanceof SpatialPrimitiveDistanceFunction)) {
throw new IllegalStateException("Distance Function must be an instance of " + SpatialPrimitiveDistanceFunction.class.getName());
}
@@ -134,10 +134,10 @@ public class KNNJoin<V extends NumberVector<?>, D extends Distance<D>, N extends
DBIDs ids = relation.getDBIDs();
// data pages
- List<E> ps_candidates = new ArrayList<E>(index.getLeaves());
+ List<E> ps_candidates = new ArrayList<>(index.getLeaves());
// knn heaps
- List<List<KNNHeap<D>>> heaps = new ArrayList<List<KNNHeap<D>>>(ps_candidates.size());
- Heap<Task> pq = new Heap<Task>(ps_candidates.size() * ps_candidates.size() / 10);
+ List<List<KNNHeap<D>>> heaps = new ArrayList<>(ps_candidates.size());
+ ComparableMinHeap<Task> pq = new ComparableMinHeap<>(ps_candidates.size() * ps_candidates.size() / 10);
// Initialize with the page self-pairing
for (int i = 0; i < ps_candidates.size(); i++) {
@@ -217,7 +217,7 @@ public class KNNJoin<V extends NumberVector<?>, D extends Distance<D>, N extends
fprogress.setCompleted(LOG);
}
- WritableDataStore<KNNResult<D>> knnLists = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_STATIC, KNNResult.class);
+ WritableDataStore<KNNList<D>> knnLists = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_STATIC, KNNList.class);
// FiniteProgress progress = logger.isVerbose() ? new
// FiniteProgress(this.getClass().getName(), relation.size(), logger) :
// null;
@@ -259,10 +259,10 @@ public class KNNJoin<V extends NumberVector<?>, D extends Distance<D>, N extends
* @return List of heaps
*/
private List<KNNHeap<D>> initHeaps(SpatialPrimitiveDistanceFunction<V, D> distFunction, N pr) {
- List<KNNHeap<D>> pr_heaps = new ArrayList<KNNHeap<D>>(pr.getNumEntries());
+ List<KNNHeap<D>> pr_heaps = new ArrayList<>(pr.getNumEntries());
// Create for each data object a knn heap
for (int j = 0; j < pr.getNumEntries(); j++) {
- pr_heaps.add(KNNUtil.newHeap(distFunction, k));
+ pr_heaps.add(DBIDUtil.newHeap(distFunction.getDistanceFactory(), k));
}
// Self-join first, as this is expected to improve most and cannot be
// pruned.
@@ -429,7 +429,7 @@ public class KNNJoin<V extends NumberVector<?>, D extends Distance<D>, N extends
@Override
protected KNNJoin<V, D, N, E> makeInstance() {
- return new KNNJoin<V, D, N, E>(distanceFunction, k);
+ return new KNNJoin<>(distanceFunction, k);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/MaterializeDistances.java b/src/de/lmu/ifi/dbs/elki/algorithm/MaterializeDistances.java
index 95a2a2b9..b02e9fed 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/MaterializeDistances.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/MaterializeDistances.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -82,7 +82,7 @@ public class MaterializeDistances<O, D extends NumberDistance<D, ?>> extends Abs
DistanceQuery<O, D> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
final int size = relation.size();
- Collection<CTriple<DBID, DBID, Double>> r = new ArrayList<CTriple<DBID, DBID, Double>>(size * (size + 1) >> 1);
+ Collection<CTriple<DBID, DBID, Double>> r = new ArrayList<>(size * (size + 1) >> 1);
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
for(DBIDIter iditer2 = relation.iterDBIDs(); iditer2.valid(); iditer2.advance()) {
@@ -91,10 +91,10 @@ public class MaterializeDistances<O, D extends NumberDistance<D, ?>> extends Abs
continue;
}
double d = distFunc.distance(iditer, iditer2).doubleValue();
- r.add(new CTriple<DBID, DBID, Double>(DBIDUtil.deref(iditer), DBIDUtil.deref(iditer2), d));
+ r.add(new CTriple<>(DBIDUtil.deref(iditer), DBIDUtil.deref(iditer2), d));
}
}
- return new CollectionResult<CTriple<DBID, DBID, Double>>("Distance Matrix", "distance-matrix", r);
+ return new CollectionResult<>("Distance Matrix", "distance-matrix", r);
}
@Override
@@ -117,7 +117,7 @@ public class MaterializeDistances<O, D extends NumberDistance<D, ?>> extends Abs
public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
@Override
protected MaterializeDistances<O, D> makeInstance() {
- return new MaterializeDistances<O, D>(distanceFunction);
+ return new MaterializeDistances<>(distanceFunction);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/NullAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/NullAlgorithm.java
index abd4c963..bb3f7f0d 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/NullAlgorithm.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/NullAlgorithm.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/benchmark/KNNBenchmarkAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/benchmark/KNNBenchmarkAlgorithm.java
index b0ea8cc1..40726793 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/benchmark/KNNBenchmarkAlgorithm.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/benchmark/KNNBenchmarkAlgorithm.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.benchmark;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -31,14 +31,15 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDRange;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.datasource.DatabaseConnection;
import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.math.MeanVariance;
@@ -117,7 +118,7 @@ public class KNNBenchmarkAlgorithm<O, D extends Distance<D>> extends AbstractDis
public Result run(Database database, Relation<O> relation) {
// Get a distance and kNN query instance.
DistanceQuery<O, D> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
- KNNQuery<O, D> knnQuery = database.getKNNQuery(distQuery, 10);
+ KNNQuery<O, D> knnQuery = database.getKNNQuery(distQuery, k);
// No query set - use original database.
if (queries == null) {
@@ -131,17 +132,21 @@ public class KNNBenchmarkAlgorithm<O, D extends Distance<D>> extends AbstractDis
int size = (int) Math.min(sampling, relation.size());
sample = DBIDUtil.randomSample(relation.getDBIDs(), size, random);
}
- FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
+ FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
int hash = 0;
- MeanVariance mv = new MeanVariance();
+ MeanVariance mv = new MeanVariance(), mvdist = new MeanVariance();
for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
- KNNResult<D> knns = knnQuery.getKNNForDBID(iditer, k);
+ KNNList<D> knns = knnQuery.getKNNForDBID(iditer, k);
int ichecksum = 0;
for (DBIDIter it = knns.iter(); it.valid(); it.advance()) {
ichecksum += it.internalGetIndex();
}
hash = Util.mixHashCodes(hash, ichecksum);
mv.put(knns.size());
+ D kdist = knns.getKNNDistance();
+ if (kdist instanceof NumberDistance) {
+ mvdist.put(((NumberDistance<?, ?>) kdist).doubleValue());
+ }
if (prog != null) {
prog.incrementProcessed(LOG);
}
@@ -149,9 +154,12 @@ public class KNNBenchmarkAlgorithm<O, D extends Distance<D>> extends AbstractDis
if (prog != null) {
prog.ensureCompleted(LOG);
}
- if (LOG.isVerbose()) {
- LOG.verbose("Result hashcode: " + hash);
- LOG.verbose("Mean number of results: "+mv.toString());
+ if (LOG.isStatistics()) {
+ LOG.statistics("Result hashcode: " + hash);
+ LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
+ if (mvdist.getCount() > 0) {
+ LOG.statistics("Mean k-distance: " + mvdist.getMean() + " +- " + mvdist.getNaiveStddev());
+ }
}
} else {
// Separate query set.
@@ -181,32 +189,39 @@ public class KNNBenchmarkAlgorithm<O, D extends Distance<D>> extends AbstractDis
int size = (int) Math.min(sampling, sids.size());
sample = DBIDUtil.randomSample(sids, size, random);
}
- FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
+ FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
int hash = 0;
- MeanVariance mv = new MeanVariance();
+ MeanVariance mv = new MeanVariance(), mvdist = new MeanVariance();
for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
int off = sids.binarySearch(iditer);
assert (off >= 0);
@SuppressWarnings("unchecked")
O o = (O) bundle.data(off, col);
- KNNResult<D> knns = knnQuery.getKNNForObject(o, k);
+ KNNList<D> knns = knnQuery.getKNNForObject(o, k);
int ichecksum = 0;
for (DBIDIter it = knns.iter(); it.valid(); it.advance()) {
ichecksum += it.internalGetIndex();
}
hash = Util.mixHashCodes(hash, ichecksum);
mv.put(knns.size());
+ D kdist = knns.getKNNDistance();
+ if (kdist instanceof NumberDistance) {
+ mvdist.put(((NumberDistance<?, ?>) kdist).doubleValue());
+ }
if (prog != null) {
prog.incrementProcessed(LOG);
}
- if (LOG.isVerbose()) {
- LOG.verbose("Result hashcode: " + hash);
- LOG.verbose("Mean number of results: "+mv.toString());
- }
}
if (prog != null) {
prog.ensureCompleted(LOG);
}
+ if (LOG.isStatistics()) {
+ LOG.statistics("Result hashcode: " + hash);
+ LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
+ if (mvdist.getCount() > 0) {
+ LOG.statistics("Mean k-distance: " + mvdist.getMean() + " +- " + mvdist.getNaiveStddev());
+ }
+ }
}
return null;
}
@@ -279,7 +294,7 @@ public class KNNBenchmarkAlgorithm<O, D extends Distance<D>> extends AbstractDis
if (config.grab(kP)) {
k = kP.intValue();
}
- ObjectParameter<DatabaseConnection> queryP = new ObjectParameter<DatabaseConnection>(QUERY_ID, DatabaseConnection.class);
+ ObjectParameter<DatabaseConnection> queryP = new ObjectParameter<>(QUERY_ID, DatabaseConnection.class);
queryP.setOptional(true);
if (config.grab(queryP)) {
queries = queryP.instantiateClass(config);
@@ -297,7 +312,7 @@ public class KNNBenchmarkAlgorithm<O, D extends Distance<D>> extends AbstractDis
@Override
protected KNNBenchmarkAlgorithm<O, D> makeInstance() {
- return new KNNBenchmarkAlgorithm<O, D>(distanceFunction, k, queries, sampling, random);
+ return new KNNBenchmarkAlgorithm<>(distanceFunction, k, queries, sampling, random);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/benchmark/RangeQueryBenchmarkAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/benchmark/RangeQueryBenchmarkAlgorithm.java
index f483321d..1b5e827b 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/benchmark/RangeQueryBenchmarkAlgorithm.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/benchmark/RangeQueryBenchmarkAlgorithm.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.benchmark;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -34,6 +34,7 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDRange;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDList;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
@@ -41,7 +42,6 @@ import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.datasource.DatabaseConnection;
import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
@@ -160,12 +160,12 @@ public class RangeQueryBenchmarkAlgorithm<O extends NumberVector<?>, D extends N
int size = (int) Math.min(sampling, relation.size());
sample = DBIDUtil.randomSample(relation.getDBIDs(), size, random);
}
- FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
+ FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
int hash = 0;
MeanVariance mv = new MeanVariance();
for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
D r = dfactory.fromDouble(radrel.get(iditer).doubleValue(0));
- DistanceDBIDResult<D> rres = rangeQuery.getRangeForDBID(iditer, r);
+ DistanceDBIDList<D> rres = rangeQuery.getRangeForDBID(iditer, r);
int ichecksum = 0;
for (DBIDIter it = rres.iter(); it.valid(); it.advance()) {
ichecksum += it.internalGetIndex();
@@ -179,9 +179,9 @@ public class RangeQueryBenchmarkAlgorithm<O extends NumberVector<?>, D extends N
if (prog != null) {
prog.ensureCompleted(LOG);
}
- if (LOG.isVerbose()) {
- LOG.verbose("Result hashcode: " + hash);
- LOG.verbose("Mean number of results: "+mv.toString());
+ if (LOG.isStatistics()) {
+ LOG.statistics("Result hashcode: " + hash);
+ LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
}
return null;
}
@@ -241,7 +241,7 @@ public class RangeQueryBenchmarkAlgorithm<O extends NumberVector<?>, D extends N
int size = (int) Math.min(sampling, sids.size());
sample = DBIDUtil.randomSample(sids, size, random);
}
- FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
+ FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
int hash = 0;
MeanVariance mv = new MeanVariance();
double[] buf = new double[dim];
@@ -254,7 +254,7 @@ public class RangeQueryBenchmarkAlgorithm<O extends NumberVector<?>, D extends N
}
O v = ofactory.newNumberVector(buf);
D r = dfactory.fromDouble(o.doubleValue(dim));
- DistanceDBIDResult<D> rres = rangeQuery.getRangeForObject(v, r);
+ DistanceDBIDList<D> rres = rangeQuery.getRangeForObject(v, r);
int ichecksum = 0;
for (DBIDIter it = rres.iter(); it.valid(); it.advance()) {
ichecksum += it.internalGetIndex();
@@ -268,9 +268,9 @@ public class RangeQueryBenchmarkAlgorithm<O extends NumberVector<?>, D extends N
if (prog != null) {
prog.ensureCompleted(LOG);
}
- if (LOG.isVerbose()) {
- LOG.verbose("Result hashcode: " + hash);
- LOG.verbose("Mean number of results: "+mv.toString());
+ if (LOG.isStatistics()) {
+ LOG.statistics("Result hashcode: " + hash);
+ LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
}
return null;
}
@@ -333,7 +333,7 @@ public class RangeQueryBenchmarkAlgorithm<O extends NumberVector<?>, D extends N
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- ObjectParameter<DatabaseConnection> queryP = new ObjectParameter<DatabaseConnection>(QUERY_ID, DatabaseConnection.class);
+ ObjectParameter<DatabaseConnection> queryP = new ObjectParameter<>(QUERY_ID, DatabaseConnection.class);
queryP.setOptional(true);
if (config.grab(queryP)) {
queries = queryP.instantiateClass(config);
@@ -351,7 +351,7 @@ public class RangeQueryBenchmarkAlgorithm<O extends NumberVector<?>, D extends N
@Override
protected RangeQueryBenchmarkAlgorithm<O, D> makeInstance() {
- return new RangeQueryBenchmarkAlgorithm<O, D>(distanceFunction, queries, sampling, random);
+ return new RangeQueryBenchmarkAlgorithm<>(distanceFunction, queries, sampling, random);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/benchmark/ValidateApproximativeKNNIndex.java b/src/de/lmu/ifi/dbs/elki/algorithm/benchmark/ValidateApproximativeKNNIndex.java
new file mode 100644
index 00000000..3d0ea52a
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/benchmark/ValidateApproximativeKNNIndex.java
@@ -0,0 +1,430 @@
+package de.lmu.ifi.dbs.elki.algorithm.benchmark;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.regex.Pattern;
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.QueryUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDRange;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
+import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
+import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.LinearScanKNNQuery;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.datasource.DatabaseConnection;
+import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.math.MeanVariance;
+import de.lmu.ifi.dbs.elki.result.Result;
+import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.PatternParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
+
+/**
+ * Algorithm to validate the quality of an approximative kNN index, by
+ * performing a number of queries and comparing them to the results obtained by
+ * exact indexing (e.g. linear scanning).
+ *
+ * @author Erich Schubert
+ *
+ * @param <O> Object type
+ *
+ * @apiviz.uses KNNQuery
+ */
+public class ValidateApproximativeKNNIndex<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorithm<O, D, Result> {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging LOG = Logging.getLogger(ValidateApproximativeKNNIndex.class);
+
+ /**
+ * Number of neighbors to retrieve.
+ */
+ protected int k = 10;
+
+ /**
+ * The alternate query point source. Optional.
+ */
+ protected DatabaseConnection queries = null;
+
+ /**
+ * Sampling size.
+ */
+ protected double sampling = -1;
+
+ /**
+ * Force linear scanning.
+ */
+ protected boolean forcelinear = false;
+
+ /**
+ * Random generator factory
+ */
+ protected RandomFactory random;
+
+ /**
+ * Filter pattern
+ */
+ protected Pattern pattern;
+
+ /**
+ * Constructor.
+ *
+ * @param distanceFunction Distance function to use
+ * @param k K parameter
+ * @param queries Query data set (may be null!)
+ * @param sampling Sampling rate
+ * @param random Random factory
+ * @param forcelinear Force the use of linear scanning.
+ * @param pattern
+ */
+ public ValidateApproximativeKNNIndex(DistanceFunction<? super O, D> distanceFunction, int k, DatabaseConnection queries, double sampling, boolean forcelinear, RandomFactory random, Pattern pattern) {
+ super(distanceFunction);
+ this.k = k;
+ this.queries = queries;
+ this.sampling = sampling;
+ this.forcelinear = forcelinear;
+ this.random = random;
+ this.pattern = pattern;
+ }
+
+ /**
+ * Run the algorithm.
+ *
+ * @param database Database
+ * @param relation Relation
+ * @return Null result
+ */
+ public Result run(Database database, Relation<O> relation) {
+ // Get a distance and kNN query instance.
+ DistanceQuery<O, D> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
+ // Approximate query:
+ KNNQuery<O, D> knnQuery = database.getKNNQuery(distQuery, k, DatabaseQuery.HINT_OPTIMIZED_ONLY);
+ if (knnQuery == null || knnQuery instanceof LinearScanKNNQuery) {
+ throw new AbortException("Expected an accelerated query, but got a linear scan -- index is not used.");
+ }
+ // Exact query:
+ KNNQuery<O, D> truekNNQuery;
+ if (forcelinear) {
+ truekNNQuery = QueryUtil.getLinearScanKNNQuery(distQuery);
+ } else {
+ truekNNQuery = database.getKNNQuery(distQuery, k, DatabaseQuery.HINT_EXACT);
+ }
+ if (knnQuery.getClass().equals(truekNNQuery.getClass())) {
+ LOG.warning("Query classes are the same. This experiment may be invalid!");
+ }
+
+ // No query set - use original database.
+ if (queries == null || pattern != null) {
+ // Relation to filter on
+ Relation<String> lrel = (pattern != null) ? DatabaseUtil.guessLabelRepresentation(database) : null;
+
+ final DBIDs sample;
+ if (sampling <= 0) {
+ sample = relation.getDBIDs();
+ } else if (sampling < 1.1) {
+ int size = (int) Math.min(sampling * relation.size(), relation.size());
+ sample = DBIDUtil.randomSample(relation.getDBIDs(), size, random);
+ } else {
+ int size = (int) Math.min(sampling, relation.size());
+ sample = DBIDUtil.randomSample(relation.getDBIDs(), size, random);
+ }
+ FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
+ MeanVariance mv = new MeanVariance(), mvrec = new MeanVariance();
+ MeanVariance mvdist = new MeanVariance(), mvdaerr = new MeanVariance(), mvdrerr = new MeanVariance();
+ int misses = 0;
+ for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
+ if (pattern == null || pattern.matcher(lrel.get(iditer)).find()) {
+ // Query index:
+ KNNList<D> knns = knnQuery.getKNNForDBID(iditer, k);
+ // Query reference:
+ KNNList<D> trueknns = truekNNQuery.getKNNForDBID(iditer, k);
+
+ // Put adjusted knn size:
+ mv.put(knns.size() * k / (double) trueknns.size());
+
+ // Put recall:
+ mvrec.put(DBIDUtil.intersectionSize(knns, trueknns) / trueknns.size());
+
+ if (knns.size() >= k) {
+ D kdist = knns.getKNNDistance();
+ if (kdist instanceof NumberDistance) {
+ final double dist = ((NumberDistance<?, ?>) kdist).doubleValue();
+ final double tdist = ((NumberDistance<?, ?>) trueknns.getKNNDistance()).doubleValue();
+ if (tdist > 0.0) {
+ mvdist.put(dist);
+ mvdaerr.put(dist - tdist);
+ mvdrerr.put(dist / tdist);
+ }
+ }
+ } else {
+ // Less than k objects.
+ misses++;
+ }
+ }
+ if (prog != null) {
+ prog.incrementProcessed(LOG);
+ }
+ }
+ if (prog != null) {
+ prog.ensureCompleted(LOG);
+ }
+ if (LOG.isStatistics()) {
+ LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
+ LOG.statistics("Recall of true results: " + mvrec.getMean() + " +- " + mvrec.getNaiveStddev());
+ if (mvdist.getCount() > 0) {
+ LOG.statistics("Mean k-distance: " + mvdist.getMean() + " +- " + mvdist.getNaiveStddev());
+ LOG.statistics("Mean absolute k-error: " + mvdaerr.getMean() + " +- " + mvdaerr.getNaiveStddev());
+ LOG.statistics("Mean relative k-error: " + mvdrerr.getMean() + " +- " + mvdrerr.getNaiveStddev());
+ }
+ if (misses > 0) {
+ LOG.statistics(String.format("Number of queries that returned less than k=%d objects: %d (%.2f%%)", k, misses, misses * 100. / mv.getCount()));
+ }
+ }
+ } else {
+ // Separate query set.
+ TypeInformation res = getDistanceFunction().getInputTypeRestriction();
+ MultipleObjectsBundle bundle = queries.loadData();
+ int col = -1;
+ for (int i = 0; i < bundle.metaLength(); i++) {
+ if (res.isAssignableFromType(bundle.meta(i))) {
+ col = i;
+ break;
+ }
+ }
+ if (col < 0) {
+ throw new AbortException("No compatible data type in query input was found. Expected: " + res.toString());
+ }
+ // Random sampling is a bit of hack, sorry.
+ // But currently, we don't (yet) have an "integer random sample" function.
+ DBIDRange sids = DBIDUtil.generateStaticDBIDRange(bundle.dataLength());
+
+ final DBIDs sample;
+ if (sampling <= 0) {
+ sample = sids;
+ } else if (sampling < 1.1) {
+ int size = (int) Math.min(sampling * relation.size(), relation.size());
+ sample = DBIDUtil.randomSample(sids, size, random);
+ } else {
+ int size = (int) Math.min(sampling, sids.size());
+ sample = DBIDUtil.randomSample(sids, size, random);
+ }
+ FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
+ MeanVariance mv = new MeanVariance(), mvrec = new MeanVariance();
+ MeanVariance mvdist = new MeanVariance(), mvdaerr = new MeanVariance(), mvdrerr = new MeanVariance();
+ int misses = 0;
+ for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
+ int off = sids.binarySearch(iditer);
+ assert (off >= 0);
+ @SuppressWarnings("unchecked")
+ O o = (O) bundle.data(off, col);
+
+ // Query index:
+ KNNList<D> knns = knnQuery.getKNNForObject(o, k);
+ // Query reference:
+ KNNList<D> trueknns = truekNNQuery.getKNNForObject(o, k);
+
+ // Put adjusted knn size:
+ mv.put(knns.size() * k / (double) trueknns.size());
+
+ // Put recall:
+ mvrec.put(DBIDUtil.intersectionSize(knns, trueknns) / trueknns.size());
+
+ if (knns.size() >= k) {
+ D kdist = knns.getKNNDistance();
+ if (kdist instanceof NumberDistance) {
+ final double dist = ((NumberDistance<?, ?>) kdist).doubleValue();
+ final double tdist = ((NumberDistance<?, ?>) trueknns.getKNNDistance()).doubleValue();
+ if (tdist > 0.0) {
+ mvdist.put(dist);
+ mvdaerr.put(dist - tdist);
+ mvdrerr.put(dist / tdist);
+ }
+ }
+ } else {
+ // Less than k objects.
+ misses++;
+ }
+ if (prog != null) {
+ prog.incrementProcessed(LOG);
+ }
+ }
+ if (prog != null) {
+ prog.ensureCompleted(LOG);
+ }
+ if (LOG.isStatistics()) {
+ LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
+ LOG.statistics("Recall of true results: " + mvrec.getMean() + " +- " + mvrec.getNaiveStddev());
+ if (mvdist.getCount() > 0) {
+ LOG.statistics("Mean absolute k-error: " + mvdaerr.getMean() + " +- " + mvdaerr.getNaiveStddev());
+ LOG.statistics("Mean relative k-error: " + mvdrerr.getMean() + " +- " + mvdrerr.getNaiveStddev());
+ }
+ if (misses > 0) {
+ LOG.statistics(String.format("Number of queries that returned less than k=%d objects: %d (%.2f%%)", k, misses, misses * 100. / mv.getCount()));
+ }
+ }
+ }
+ return null;
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(getDistanceFunction().getInputTypeRestriction());
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class
+ *
+ * @apiviz.exclude
+ *
+ * @author Erich Schubert
+ *
+ * @param <O> Object type
+ * @param <D> Distance type
+ */
+ public static class Parameterizer<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ /**
+ * Parameter for the number of neighbors.
+ */
+ public static final OptionID K_ID = new OptionID("validateknn.k", "Number of neighbors to retreive for kNN benchmarking.");
+
+ /**
+ * Parameter for the query dataset.
+ */
+ public static final OptionID QUERY_ID = new OptionID("validateknn.query", "Data source for the queries. If not set, the queries are taken from the database.");
+
+ /**
+ * Parameter for the sampling size.
+ */
+ public static final OptionID SAMPLING_ID = new OptionID("validateknn.sampling", "Sampling size parameter. If the value is less or equal 1, it is assumed to be the relative share. Larger values will be interpreted as integer sizes. By default, all data will be used.");
+
+ /**
+ * Force linear scanning.
+ */
+ public static final OptionID FORCE_ID = new OptionID("validateknn.force-linear", "Force the use of linear scanning as reference.");
+
+ /**
+ * Parameter for the random generator.
+ */
+ public static final OptionID RANDOM_ID = new OptionID("validateknn.random", "Random generator for sampling.");
+
+ /**
+ * Parameter to select query points.
+ */
+ public static final OptionID PATTERN_ID = new OptionID("validateknn.pattern", "Pattern to select query points.");
+
+ /**
+ * K parameter
+ */
+ protected int k = 10;
+
+ /**
+ * The alternate query point source. Optional.
+ */
+ protected DatabaseConnection queries = null;
+
+ /**
+ * Sampling size.
+ */
+ protected double sampling = -1;
+
+ /**
+ * Force linear scanning.
+ */
+ protected boolean forcelinear = false;
+
+ /**
+ * Random generator factory
+ */
+ protected RandomFactory random;
+
+ /**
+ * Filter pattern for query points.
+ */
+ protected Pattern pattern;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ IntParameter kP = new IntParameter(K_ID);
+ if (config.grab(kP)) {
+ k = kP.intValue();
+ }
+ PatternParameter patternP = new PatternParameter(PATTERN_ID);
+ patternP.setOptional(true);
+ if (config.grab(patternP)) {
+ pattern = patternP.getValue();
+ } else {
+ ObjectParameter<DatabaseConnection> queryP = new ObjectParameter<>(QUERY_ID, DatabaseConnection.class);
+ queryP.setOptional(true);
+ if (config.grab(queryP)) {
+ queries = queryP.instantiateClass(config);
+ }
+ }
+ DoubleParameter samplingP = new DoubleParameter(SAMPLING_ID);
+ samplingP.setOptional(true);
+ if (config.grab(samplingP)) {
+ sampling = samplingP.doubleValue();
+ }
+ Flag forceP = new Flag(FORCE_ID);
+ if (config.grab(forceP)) {
+ forcelinear = forceP.isTrue();
+ }
+ RandomParameter randomP = new RandomParameter(RANDOM_ID, RandomFactory.DEFAULT);
+ if (config.grab(randomP)) {
+ random = randomP.getValue();
+ }
+ }
+
+ @Override
+ protected ValidateApproximativeKNNIndex<O, D> makeInstance() {
+ return new ValidateApproximativeKNNIndex<>(distanceFunction, k, queries, sampling, forcelinear, random, pattern);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/benchmark/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/benchmark/package-info.java
index 6a98fa64..b10ef2ed 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/benchmark/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/benchmark/package-info.java
@@ -8,7 +8,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedClustering.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedClustering.java
index 05cc2b4f..0c4eb5fc 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedClustering.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedClustering.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -31,7 +31,7 @@ import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.QueryUtil;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedDBSCAN.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedDBSCAN.java
index f8b73f48..ee3b234c 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedDBSCAN.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedDBSCAN.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -41,14 +41,14 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDMIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDList;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.IndexBasedDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.LocallyWeightedDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
@@ -179,7 +179,7 @@ public abstract class AbstractProjectedDBSCAN<R extends Clustering<Model>, V ext
public Clustering<Model> run(Database database, Relation<V> relation) {
FiniteProgress objprog = getLogger().isVerbose() ? new FiniteProgress("Processing objects", relation.size(), getLogger()) : null;
IndefiniteProgress clusprog = getLogger().isVerbose() ? new IndefiniteProgress("Number of clusters", getLogger()) : null;
- resultList = new ArrayList<ModifiableDBIDs>();
+ resultList = new ArrayList<>();
noise = DBIDUtil.newHashSet();
processedIDs = DBIDUtil.newHashSet(relation.size());
@@ -215,14 +215,14 @@ public abstract class AbstractProjectedDBSCAN<R extends Clustering<Model>, V ext
clusprog.setProcessed(resultList.size(), getLogger());
}
- Clustering<Model> result = new Clustering<Model>(getLongResultName(), getShortResultName());
+ Clustering<Model> result = new Clustering<>(getLongResultName(), getShortResultName());
for(Iterator<ModifiableDBIDs> resultListIter = resultList.iterator(); resultListIter.hasNext();) {
Cluster<Model> c = new Cluster<Model>(resultListIter.next(), ClusterModel.CLUSTER);
- result.addCluster(c);
+ result.addToplevelCluster(c);
}
Cluster<Model> n = new Cluster<Model>(noise, true, ClusterModel.CLUSTER);
- result.addCluster(n);
+ result.addToplevelCluster(n);
if(objprog != null && clusprog != null) {
objprog.setProcessed(processedIDs.size(), getLogger());
@@ -279,7 +279,7 @@ public abstract class AbstractProjectedDBSCAN<R extends Clustering<Model>, V ext
}
// compute weighted epsilon neighborhood
- DistanceDBIDResult<DoubleDistance> neighbors = rangeQuery.getRangeForDBID(startObjectID, epsilon);
+ DistanceDBIDList<DoubleDistance> neighbors = rangeQuery.getRangeForDBID(startObjectID, epsilon);
// neighbors < minPts -> noise
if(neighbors.size() < minpts) {
noise.add(startObjectID);
@@ -294,7 +294,7 @@ public abstract class AbstractProjectedDBSCAN<R extends Clustering<Model>, V ext
// try to expand the cluster
ModifiableDBIDs currentCluster = DBIDUtil.newArray();
ModifiableDBIDs seeds = DBIDUtil.newHashSet();
- for (DistanceDBIDResultIter<DoubleDistance> seed = neighbors.iter(); seed.valid(); seed.advance()) {
+ for (DistanceDBIDListIter<DoubleDistance> seed = neighbors.iter(); seed.valid(); seed.advance()) {
int nextID_corrDim = distFunc.getIndex().getLocalProjection(seed).getCorrelationDimension();
// nextID is not reachable from start object
if(nextID_corrDim > lambda) {
@@ -320,11 +320,11 @@ public abstract class AbstractProjectedDBSCAN<R extends Clustering<Model>, V ext
continue;
}
- DistanceDBIDResult<DoubleDistance> reachables = rangeQuery.getRangeForDBID(iter, epsilon);
+ DistanceDBIDList<DoubleDistance> reachables = rangeQuery.getRangeForDBID(iter, epsilon);
iter.remove();
if(reachables.size() > minpts) {
- for (DistanceDBIDResultIter<DoubleDistance> r = reachables.iter(); r.valid(); r.advance()) {
+ for (DistanceDBIDListIter<DoubleDistance> r = reachables.iter(); r.valid(); r.advance()) {
int corrDim_r = distFunc.getIndex().getLocalProjection(r).getCorrelationDimension();
// r is not reachable from q
if(corrDim_r > lambda) {
@@ -395,7 +395,7 @@ public abstract class AbstractProjectedDBSCAN<R extends Clustering<Model>, V ext
protected Integer lambda;
protected void configInnerDistance(Parameterization config) {
- ObjectParameter<DistanceFunction<V, D>> innerdistP = new ObjectParameter<DistanceFunction<V, D>>(AbstractProjectedDBSCAN.INNER_DISTANCE_FUNCTION_ID, DistanceFunction.class, EuclideanDistanceFunction.class);
+ ObjectParameter<DistanceFunction<V, D>> innerdistP = new ObjectParameter<>(AbstractProjectedDBSCAN.INNER_DISTANCE_FUNCTION_ID, DistanceFunction.class, EuclideanDistanceFunction.class);
if(config.grab(innerdistP)) {
innerdist = innerdistP.instantiateClass(config);
}
@@ -403,7 +403,7 @@ public abstract class AbstractProjectedDBSCAN<R extends Clustering<Model>, V ext
protected void configEpsilon(Parameterization config, DistanceFunction<V, D> innerdist) {
D distanceParser = innerdist != null ? innerdist.getDistanceFactory() : null;
- DistanceParameter<D> epsilonP = new DistanceParameter<D>(EPSILON_ID, distanceParser);
+ DistanceParameter<D> epsilonP = new DistanceParameter<>(EPSILON_ID, distanceParser);
if(config.grab(epsilonP)) {
epsilon = epsilonP.getValue();
}
@@ -418,7 +418,7 @@ public abstract class AbstractProjectedDBSCAN<R extends Clustering<Model>, V ext
}
protected void configOuterDistance(Parameterization config, D epsilon, int minpts, Class<?> preprocessorClass, DistanceFunction<V, D> innerdist) {
- ObjectParameter<LocallyWeightedDistanceFunction<V>> outerdistP = new ObjectParameter<LocallyWeightedDistanceFunction<V>>(OUTER_DISTANCE_FUNCTION_ID, LocallyWeightedDistanceFunction.class, LocallyWeightedDistanceFunction.class);
+ ObjectParameter<LocallyWeightedDistanceFunction<V>> outerdistP = new ObjectParameter<>(OUTER_DISTANCE_FUNCTION_ID, LocallyWeightedDistanceFunction.class, LocallyWeightedDistanceFunction.class);
if(config.grab(outerdistP)) {
// parameters for the distance function
ListParameterization distanceFunctionParameters = new ListParameterization();
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/CanopyPreClustering.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/CanopyPreClustering.java
new file mode 100644
index 00000000..2dff7554
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/CanopyPreClustering.java
@@ -0,0 +1,236 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering;
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.ArrayList;
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.data.Cluster;
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.data.model.ClusterModel;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDMIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDVar;
+import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.WrongParameterValueException;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DistanceParameter;
+
+/**
+ * Canopy pre-clustering is a simple preprocessing step for clustering.
+ *
+ * <p>
+ * Reference:<br>
+ * A. McCallum, K. Nigam, L.H. Ungar<br />
+ * Efficient Clustering of High Dimensional Data Sets with Application to
+ * Reference Matching<br />
+ * Proc. 6th ACM SIGKDD international conference on Knowledge discovery and data
+ * mining
+ * </p>
+ *
+ * @author Erich Schubert
+ *
+ * @param <O> Object type
+ * @param <D> Distance type
+ */
+@Reference(authors = "A. McCallum, K. Nigam, L.H. Ungar", title = "Efficient Clustering of High Dimensional Data Sets with Application to Reference Matching", booktitle = "Proc. 6th ACM SIGKDD international conference on Knowledge discovery and data mining", url = "http://dx.doi.org/10.1145%2F347090.347123")
+public class CanopyPreClustering<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorithm<O, D, Clustering<ClusterModel>> implements ClusteringAlgorithm<Clustering<ClusterModel>> {
+ /**
+ * Class logger.
+ */
+ private static final Logging LOG = Logging.getLogger(CanopyPreClustering.class);
+
+ /**
+ * Threshold for inclusion
+ */
+ private D t1;
+
+ /**
+ * Threshold for removal
+ */
+ private D t2;
+
+ /**
+ * Constructor.
+ *
+ * @param distanceFunction Distance function
+ * @param t1 Inclusion threshold
+ * @param t2 Exclusion threshold
+ */
+ public CanopyPreClustering(DistanceFunction<? super O, D> distanceFunction, D t1, D t2) {
+ super(distanceFunction);
+ this.t1 = t1;
+ this.t2 = t2;
+ }
+
+ /**
+ * Run the algorithm
+ *
+ * @param database Database
+ * @param relation Relation to process
+ */
+ public Clustering<ClusterModel> run(Database database, Relation<O> relation) {
+ DistanceQuery<O, D> dq = database.getDistanceQuery(relation, getDistanceFunction());
+ ModifiableDBIDs ids = DBIDUtil.newHashSet(relation.getDBIDs());
+ ArrayList<Cluster<ClusterModel>> clusters = new ArrayList<>();
+ final int size = relation.size();
+
+ if(t1.compareTo(t2) <= 0) {
+ LOG.warning(Parameterizer.T1_ID.getName() + " must be larger than " + Parameterizer.T2_ID.getName());
+ }
+
+ FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Canopy clustering", size, LOG) : null;
+
+ DBIDVar first = DBIDUtil.newVar();
+ while(!ids.isEmpty()) {
+ // Remove first element:
+ DBIDMIter iter = ids.iter();
+ first.set(iter);
+ iter.remove();
+ iter.advance();
+
+ // Start a new cluster:
+ ModifiableDBIDs cids = DBIDUtil.newArray();
+ cids.add(first);
+
+ // Compare to remaining objects:
+ for(; iter.valid(); iter.advance()) {
+ D dist = dq.distance(first, iter);
+ // Inclusion threshold:
+ if(t1.compareTo(dist) >= 0) {
+ cids.add(iter);
+ }
+ // Removal threshold:
+ if(t2.compareTo(dist) >= 0) {
+ iter.remove();
+ }
+ }
+ // TODO: remember the central object using a CanopyModel?
+ // Construct cluster:
+ clusters.add(new Cluster<>(cids, ClusterModel.CLUSTER));
+
+ if(prog != null) {
+ prog.setProcessed(size - ids.size(), LOG);
+ }
+ }
+ if(prog != null) {
+ prog.ensureCompleted(LOG);
+ }
+
+ return new Clustering<>("Canopy clustering", "canopy-clustering", clusters);
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(getDistanceFunction().getInputTypeRestriction());
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ *
+ * @param <O> Object type
+ * @param <D> Distance type
+ */
+ public static class Parameterizer<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ /**
+ * Parameter for the inclusion threshold of canopy clustering.
+ *
+ * Note: t1 > t2
+ *
+ * Syntax:
+ *
+ * <pre>
+ * -canopy.t1 &lt;value&gt;
+ * </pre>
+ */
+ public static final OptionID T1_ID = new OptionID("canopy.t1", "Inclusion threshold for canopy clustering. t1 > t2!");
+
+ /**
+ * Parameter for the removal threshold of canopy clustering.
+ *
+ * Note: t1 > t2
+ *
+ * Syntax:
+ *
+ * <pre>
+ * -canopy.t2 &lt;value&gt;
+ * </pre>
+ */
+ public static final OptionID T2_ID = new OptionID("canopy.t2", "Removal threshold for canopy clustering. t1 > t2!");
+
+ /**
+ * Threshold for inclusion
+ */
+ private D t1;
+
+ /**
+ * Threshold for removal
+ */
+ private D t2;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ DistanceParameter<D> t1P = new DistanceParameter<>(T1_ID, distanceFunction);
+ if(config.grab(t1P)) {
+ t1 = t1P.getValue();
+ }
+
+ DistanceParameter<D> t2P = new DistanceParameter<>(T2_ID, distanceFunction);
+ // TODO: add distance constraint t1 > t2
+ if(config.grab(t2P)) {
+ t2 = t2P.getValue();
+ if(t1.compareTo(t2) <= 0) {
+ config.reportError(new WrongParameterValueException(t2P, T1_ID.getName() + " must be larger than " + T2_ID.getName()));
+ }
+ }
+ }
+
+ @Override
+ protected CanopyPreClustering<O, D> makeInstance() {
+ return new CanopyPreClustering<>(distanceFunction, t1, t2);
+ }
+
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/ClusteringAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/ClusteringAlgorithm.java
index 8f637460..249dc313 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/ClusteringAlgorithm.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/ClusteringAlgorithm.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -42,6 +42,7 @@ import de.lmu.ifi.dbs.elki.database.Database;
*
* @apiviz.has Clustering
* @apiviz.has Model
+ * @apiviz.excludeSubtypes
*
* @param <C> Clustering type
*/
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/DBSCAN.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/DBSCAN.java
index fcf81faa..57dcb435 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/DBSCAN.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/DBSCAN.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -40,10 +40,10 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDList;
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
@@ -140,7 +140,7 @@ public class DBSCAN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
FiniteProgress objprog = LOG.isVerbose() ? new FiniteProgress("Processing objects", size, LOG) : null;
IndefiniteProgress clusprog = LOG.isVerbose() ? new IndefiniteProgress("Number of clusters", LOG) : null;
- resultList = new ArrayList<ModifiableDBIDs>();
+ resultList = new ArrayList<>();
noise = DBIDUtil.newHashSet();
processedIDs = DBIDUtil.newHashSet(size);
if(size < minpts) {
@@ -170,14 +170,14 @@ public class DBSCAN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
clusprog.setCompleted(LOG);
}
- Clustering<Model> result = new Clustering<Model>("DBSCAN Clustering", "dbscan-clustering");
+ Clustering<Model> result = new Clustering<>("DBSCAN Clustering", "dbscan-clustering");
for(ModifiableDBIDs res : resultList) {
Cluster<Model> c = new Cluster<Model>(res, ClusterModel.CLUSTER);
- result.addCluster(c);
+ result.addToplevelCluster(c);
}
Cluster<Model> n = new Cluster<Model>(noise, true, ClusterModel.CLUSTER);
- result.addCluster(n);
+ result.addToplevelCluster(n);
return result;
}
@@ -193,7 +193,7 @@ public class DBSCAN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
* @param objprog the progress object for logging the current status
*/
protected void expandCluster(Relation<O> relation, RangeQuery<O, D> rangeQuery, DBIDRef startObjectID, FiniteProgress objprog, IndefiniteProgress clusprog) {
- DistanceDBIDResult<D> neighbors = rangeQuery.getRangeForDBID(startObjectID, epsilon);
+ DistanceDBIDList<D> neighbors = rangeQuery.getRangeForDBID(startObjectID, epsilon);
// startObject is no core-object
if(neighbors.size() < minpts) {
@@ -224,7 +224,7 @@ public class DBSCAN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
while(seeds.size() > 0) {
DBIDMIter o = seeds.iter();
- DistanceDBIDResult<D> neighborhood = rangeQuery.getRangeForDBID(o, epsilon);
+ DistanceDBIDList<D> neighborhood = rangeQuery.getRangeForDBID(o, epsilon);
o.remove();
if(neighborhood.size() >= minpts) {
@@ -289,7 +289,7 @@ public class DBSCAN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- DistanceParameter<D> epsilonP = new DistanceParameter<D>(EPSILON_ID, distanceFunction);
+ DistanceParameter<D> epsilonP = new DistanceParameter<>(EPSILON_ID, distanceFunction);
if(config.grab(epsilonP)) {
epsilon = epsilonP.getValue();
}
@@ -303,7 +303,7 @@ public class DBSCAN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
@Override
protected DBSCAN<O, D> makeInstance() {
- return new DBSCAN<O, D>(distanceFunction, epsilon, minpts);
+ return new DBSCAN<>(distanceFunction, epsilon, minpts);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/DeLiClu.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/DeLiClu.java
index 22875715..3c2e0278 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/DeLiClu.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/DeLiClu.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -23,9 +23,10 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+import gnu.trove.set.TIntSet;
+
import java.util.Collection;
import java.util.List;
-import java.util.Set;
import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.KNNJoin;
@@ -37,11 +38,11 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.DistanceUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.SpatialPrimitiveDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.index.tree.LeafEntry;
import de.lmu.ifi.dbs.elki.index.tree.TreeIndexPathComponent;
@@ -119,14 +120,14 @@ public class DeLiClu<NV extends NumberVector<?>, D extends Distance<D>> extends
*/
public DeLiClu(DistanceFunction<? super NV, D> distanceFunction, int minpts) {
super(distanceFunction);
- this.knnJoin = new KNNJoin<NV, D, DeLiCluNode, DeLiCluEntry>(distanceFunction, minpts);
+ this.knnJoin = new KNNJoin<>(distanceFunction, minpts);
this.minpts = minpts;
}
public ClusterOrderResult<D> run(Database database, Relation<NV> relation) {
Collection<DeLiCluTreeIndex<NV>> indexes = ResultUtil.filterResults(database, DeLiCluTreeIndex.class);
if(indexes.size() != 1) {
- throw new AbortException("DeLiClu found " + indexes.size() + " DeLiCluTree indexes, expected exactly one.");
+ throw new AbortException("DeLiClu found " + indexes.size() + " DeLiCluTree indexes. DeLiClu needs a special index to operate, therefore you need to add this index to your database.");
}
DeLiCluTreeIndex<NV> index = indexes.iterator().next();
// FIXME: check that the index matches the relation!
@@ -141,13 +142,13 @@ public class DeLiClu<NV extends NumberVector<?>, D extends Distance<D>> extends
if(LOG.isVerbose()) {
LOG.verbose("knnJoin...");
}
- DataStore<KNNResult<D>> knns = knnJoin.run(database, relation);
+ DataStore<KNNList<D>> knns = knnJoin.run(database, relation);
FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("DeLiClu", relation.size(), LOG) : null;
final int size = relation.size();
- ClusterOrderResult<D> clusterOrder = new ClusterOrderResult<D>("DeLiClu Clustering", "deliclu-clustering");
- heap = new UpdatableHeap<SpatialObjectPair>();
+ ClusterOrderResult<D> clusterOrder = new ClusterOrderResult<>("DeLiClu Clustering", "deliclu-clustering");
+ heap = new UpdatableHeap<>();
// add start object to cluster order and (root, root) to priority queue
DBID startID = getStartObject(relation);
@@ -217,7 +218,7 @@ public class DeLiClu<NV extends NumberVector<?>, D extends Distance<D>> extends
* @param nodePair the pair of nodes to be expanded
* @param knns the knn list
*/
- private void expandNodes(DeLiCluTree index, SpatialPrimitiveDistanceFunction<NV, D> distFunction, SpatialObjectPair nodePair, DataStore<KNNResult<D>> knns) {
+ private void expandNodes(DeLiCluTree index, SpatialPrimitiveDistanceFunction<NV, D> distFunction, SpatialObjectPair nodePair, DataStore<KNNList<D>> knns) {
DeLiCluNode node1 = index.getNode(((SpatialDirectoryEntry) nodePair.entry1).getPageID());
DeLiCluNode node2 = index.getNode(((SpatialDirectoryEntry) nodePair.entry2).getPageID());
@@ -274,7 +275,7 @@ public class DeLiClu<NV extends NumberVector<?>, D extends Distance<D>> extends
* @param node2 the second node
* @param knns the knn list
*/
- private void expandLeafNodes(SpatialPrimitiveDistanceFunction<NV, D> distFunction, DeLiCluNode node1, DeLiCluNode node2, DataStore<KNNResult<D>> knns) {
+ private void expandLeafNodes(SpatialPrimitiveDistanceFunction<NV, D> distFunction, DeLiCluNode node1, DeLiCluNode node2, DataStore<KNNList<D>> knns) {
if(LOG.isDebuggingFinest()) {
LOG.debugFinest("ExpandLeafNodes: " + node1.getPageID() + " + " + node2.getPageID());
}
@@ -310,12 +311,12 @@ public class DeLiClu<NV extends NumberVector<?>, D extends Distance<D>> extends
* @param path the path of the object inserted last
* @param knns the knn list
*/
- private void reinsertExpanded(SpatialPrimitiveDistanceFunction<NV, D> distFunction, DeLiCluTree index, List<TreeIndexPathComponent<DeLiCluEntry>> path, DataStore<KNNResult<D>> knns) {
+ private void reinsertExpanded(SpatialPrimitiveDistanceFunction<NV, D> distFunction, DeLiCluTree index, List<TreeIndexPathComponent<DeLiCluEntry>> path, DataStore<KNNList<D>> knns) {
SpatialDirectoryEntry rootEntry = (SpatialDirectoryEntry) path.remove(0).getEntry();
reinsertExpanded(distFunction, index, path, 0, rootEntry, knns);
}
- private void reinsertExpanded(SpatialPrimitiveDistanceFunction<NV, D> distFunction, DeLiCluTree index, List<TreeIndexPathComponent<DeLiCluEntry>> path, int pos, SpatialDirectoryEntry parentEntry, DataStore<KNNResult<D>> knns) {
+ private void reinsertExpanded(SpatialPrimitiveDistanceFunction<NV, D> distFunction, DeLiCluTree index, List<TreeIndexPathComponent<DeLiCluEntry>> path, int pos, SpatialDirectoryEntry parentEntry, DataStore<KNNList<D>> knns) {
DeLiCluNode parentNode = index.getNode(parentEntry.getPageID());
SpatialEntry entry2 = path.get(pos).getEntry();
@@ -332,7 +333,7 @@ public class DeLiClu<NV extends NumberVector<?>, D extends Distance<D>> extends
}
}
else {
- Set<Integer> expanded = index.getExpanded(entry2);
+ TIntSet expanded = index.getExpanded(entry2);
for(int i = 0; i < parentNode.getNumEntries(); i++) {
SpatialDirectoryEntry entry1 = (SpatialDirectoryEntry) parentNode.getEntry(i);
@@ -503,7 +504,7 @@ public class DeLiClu<NV extends NumberVector<?>, D extends Distance<D>> extends
@Override
protected DeLiClu<NV, D> makeInstance() {
- return new DeLiClu<NV, D>(distanceFunction, minpts);
+ return new DeLiClu<>(distanceFunction, minpts);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/EM.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/EM.java
index 514e63bd..c66442a1 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/EM.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/EM.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -46,7 +46,7 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.MathUtil;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
@@ -179,13 +179,13 @@ public class EM<V extends NumberVector<?>> extends AbstractAlgorithm<Clustering<
if (LOG.isVerbose()) {
LOG.verbose("initializing " + k + " models");
}
- List<Vector> means = new ArrayList<Vector>();
- for (NumberVector<?> nv : initializer.chooseInitialMeans(relation, k, EuclideanDistanceFunction.STATIC)) {
+ List<Vector> means = new ArrayList<>();
+ for (NumberVector<?> nv : initializer.chooseInitialMeans(database, relation, k, EuclideanDistanceFunction.STATIC)) {
means.add(nv.getColumnVector());
}
- List<Matrix> covarianceMatrices = new ArrayList<Matrix>(k);
+ List<Matrix> covarianceMatrices = new ArrayList<>(k);
double[] normDistrFactor = new double[k];
- List<Matrix> invCovMatr = new ArrayList<Matrix>(k);
+ List<Matrix> invCovMatr = new ArrayList<>(k);
double[] clusterWeights = new double[k];
probClusterIGivenX = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_SORTED, double[].class);
@@ -193,7 +193,13 @@ public class EM<V extends NumberVector<?>> extends AbstractAlgorithm<Clustering<
for (int i = 0; i < k; i++) {
Matrix m = Matrix.identity(dimensionality, dimensionality);
covarianceMatrices.add(m);
- normDistrFactor[i] = 1.0 / Math.sqrt(Math.pow(MathUtil.TWOPI, dimensionality) * m.det());
+ final double det = m.det();
+ if (det > 0.) {
+ normDistrFactor[i] = 1.0 / Math.sqrt(Math.pow(MathUtil.TWOPI, dimensionality) * det);
+ } else {
+ LOG.warning("Encountered matrix with 0 determinant - degenerated.");
+ normDistrFactor[i] = 1.0; // Not really well defined
+ }
invCovMatr.add(m.inverse());
clusterWeights[i] = 1.0 / k;
if (LOG.isDebuggingFinest()) {
@@ -201,7 +207,7 @@ public class EM<V extends NumberVector<?>> extends AbstractAlgorithm<Clustering<
msg.append(" model ").append(i).append(":\n");
msg.append(" mean: ").append(means.get(i)).append('\n');
msg.append(" m:\n").append(FormatUtil.format(m, " ")).append('\n');
- msg.append(" m.det(): ").append(m.det()).append('\n');
+ msg.append(" m.det(): ").append(det).append('\n');
msg.append(" cluster weight: ").append(clusterWeights[i]).append('\n');
msg.append(" normDistFact: ").append(normDistrFactor[i]).append('\n');
LOG.debugFine(msg.toString());
@@ -222,7 +228,7 @@ public class EM<V extends NumberVector<?>> extends AbstractAlgorithm<Clustering<
em = emNew;
// recompute models
- List<Vector> meanSums = new ArrayList<Vector>(k);
+ List<Vector> meanSums = new ArrayList<>(k);
double[] sumOfClusterProbabilities = new double[k];
for (int i = 0; i < k; i++) {
@@ -260,7 +266,13 @@ public class EM<V extends NumberVector<?>> extends AbstractAlgorithm<Clustering<
covarianceMatrices.set(i, covarianceMatrices.get(i).times(1 / sumOfClusterProbabilities[i]).cheatToAvoidSingularity(SINGULARITY_CHEAT));
}
for (int i = 0; i < k; i++) {
- normDistrFactor[i] = 1.0 / Math.sqrt(Math.pow(MathUtil.TWOPI, dimensionality) * covarianceMatrices.get(i).det());
+ final double det = covarianceMatrices.get(i).det();
+ if (det > 0.) {
+ normDistrFactor[i] = 1.0 / Math.sqrt(Math.pow(MathUtil.TWOPI, dimensionality) * det);
+ } else {
+ LOG.warning("Encountered matrix with 0 determinant - degenerated.");
+ normDistrFactor[i] = 1.0; // Not really well defined
+ }
invCovMatr.set(i, covarianceMatrices.get(i).inverse());
}
// reassign probabilities
@@ -279,7 +291,7 @@ public class EM<V extends NumberVector<?>> extends AbstractAlgorithm<Clustering<
}
// fill result with clusters and models
- List<ModifiableDBIDs> hardClusters = new ArrayList<ModifiableDBIDs>(k);
+ List<ModifiableDBIDs> hardClusters = new ArrayList<>(k);
for (int i = 0; i < k; i++) {
hardClusters.add(DBIDUtil.newHashSet());
}
@@ -298,14 +310,14 @@ public class EM<V extends NumberVector<?>> extends AbstractAlgorithm<Clustering<
hardClusters.get(maxIndex).add(iditer);
}
final NumberVector.Factory<V, ?> factory = RelationUtil.getNumberVectorFactory(relation);
- Clustering<EMModel<V>> result = new Clustering<EMModel<V>>("EM Clustering", "em-clustering");
+ Clustering<EMModel<V>> result = new Clustering<>("EM Clustering", "em-clustering");
// provide models within the result
for (int i = 0; i < k; i++) {
// TODO: re-do labeling.
// SimpleClassLabel label = new SimpleClassLabel();
// label.init(result.canonicalClusterLabel(i));
- Cluster<EMModel<V>> model = new Cluster<EMModel<V>>(hardClusters.get(i), new EMModel<V>(factory.newNumberVector(means.get(i).getArrayRef()), covarianceMatrices.get(i)));
- result.addCluster(model);
+ Cluster<EMModel<V>> model = new Cluster<>(hardClusters.get(i), new EMModel<>(factory.newNumberVector(means.get(i).getArrayRef()), covarianceMatrices.get(i)));
+ result.addToplevelCluster(model);
}
return result;
}
@@ -339,6 +351,9 @@ public class EM<V extends NumberVector<?>> extends AbstractAlgorithm<Clustering<
if (LOG.isDebuggingFinest()) {
LOG.debugFinest(" difference vector= ( " + difference.toString() + " )\n" + " difference:\n" + FormatUtil.format(difference, " ") + "\n" + " rowTimesCovTimesCol:\n" + rowTimesCovTimesCol + "\n" + " power= " + power + "\n" + " prob=" + prob + "\n" + " inv cov matrix: \n" + FormatUtil.format(invCovMatr.get(i), " "));
}
+ if (!(prob >= 0.)) {
+ LOG.warning("Invalid probability: " + prob + " power: " + power + " factor: " + normDistrFactor[i]);
+ }
probabilities[i] = prob;
}
double priorProbability = 0.0;
@@ -352,13 +367,12 @@ public class EM<V extends NumberVector<?>> extends AbstractAlgorithm<Clustering<
double[] clusterProbabilities = new double[k];
for (int i = 0; i < k; i++) {
- assert (priorProbability >= 0.0);
assert (clusterWeights[i] >= 0.0);
// do not divide by zero!
- if (priorProbability == 0.0) {
- clusterProbabilities[i] = 0.0;
- } else {
+ if (priorProbability > 0.0) {
clusterProbabilities[i] = probabilities[i] / priorProbability * clusterWeights[i];
+ } else {
+ clusterProbabilities[i] = 0.0;
}
}
probClusterIGivenX.put(iditer, clusterProbabilities);
@@ -412,7 +426,7 @@ public class EM<V extends NumberVector<?>> extends AbstractAlgorithm<Clustering<
k = kP.getValue();
}
- ObjectParameter<KMeansInitialization<V>> initialP = new ObjectParameter<KMeansInitialization<V>>(INIT_ID, KMeansInitialization.class, RandomlyGeneratedInitialMeans.class);
+ ObjectParameter<KMeansInitialization<V>> initialP = new ObjectParameter<>(INIT_ID, KMeansInitialization.class, RandomlyGeneratedInitialMeans.class);
if (config.grab(initialP)) {
initializer = initialP.instantiateClass(config);
}
@@ -433,7 +447,7 @@ public class EM<V extends NumberVector<?>> extends AbstractAlgorithm<Clustering<
@Override
protected EM<V> makeInstance() {
- return new EM<V>(k, delta, initializer, maxiter);
+ return new EM<>(k, delta, initializer, maxiter);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/NaiveMeanShiftClustering.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/NaiveMeanShiftClustering.java
index 8429d8ac..a4d6e307 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/NaiveMeanShiftClustering.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/NaiveMeanShiftClustering.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -36,19 +36,19 @@ import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDList;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid;
-import de.lmu.ifi.dbs.elki.math.statistics.EpanechnikovKernelDensityFunction;
-import de.lmu.ifi.dbs.elki.math.statistics.KernelDensityFunction;
+import de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions.EpanechnikovKernelDensityFunction;
+import de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions.KernelDensityFunction;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
@@ -135,7 +135,7 @@ public class NaiveMeanShiftClustering<V extends NumberVector<?>, D extends Numbe
final double threshold = bandwidth * 1E-10;
// Result store:
- ArrayList<Pair<V, ModifiableDBIDs>> clusters = new ArrayList<Pair<V, ModifiableDBIDs>>();
+ ArrayList<Pair<V, ModifiableDBIDs>> clusters = new ArrayList<>();
ModifiableDBIDs noise = DBIDUtil.newArray();
@@ -148,11 +148,11 @@ public class NaiveMeanShiftClustering<V extends NumberVector<?>, D extends Numbe
// Compute new position:
V newvec = null;
{
- DistanceDBIDResult<D> neigh = rangeq.getRangeForObject(position, range);
+ DistanceDBIDList<D> neigh = rangeq.getRangeForObject(position, range);
boolean okay = (neigh.size() > 1) || (neigh.size() >= 1 && j > 1);
if (okay) {
Centroid newpos = new Centroid(dim);
- for (DistanceDBIDResultIter<D> niter = neigh.iter(); niter.valid(); niter.advance()) {
+ for (DistanceDBIDListIter<D> niter = neigh.iter(); niter.valid(); niter.advance()) {
final double weight = kernel.density(niter.getDistance().doubleValue() / bandwidth);
newpos.put(relation.get(niter), weight);
}
@@ -206,14 +206,14 @@ public class NaiveMeanShiftClustering<V extends NumberVector<?>, D extends Numbe
prog.ensureCompleted(LOG);
}
- ArrayList<Cluster<MeanModel<V>>> cs = new ArrayList<Cluster<MeanModel<V>>>(clusters.size());
+ ArrayList<Cluster<MeanModel<V>>> cs = new ArrayList<>(clusters.size());
for (Pair<V, ModifiableDBIDs> pair : clusters) {
- cs.add(new Cluster<MeanModel<V>>(pair.second, new MeanModel<V>(pair.first)));
+ cs.add(new Cluster<>(pair.second, new MeanModel<>(pair.first)));
}
if (noise.size() > 0) {
cs.add(new Cluster<MeanModel<V>>(noise, true));
}
- Clustering<MeanModel<V>> c = new Clustering<MeanModel<V>>("Mean-shift Clustering", "mean-shift-clustering", cs);
+ Clustering<MeanModel<V>> c = new Clustering<>("Mean-shift Clustering", "mean-shift-clustering", cs);
return c;
}
@@ -261,11 +261,11 @@ public class NaiveMeanShiftClustering<V extends NumberVector<?>, D extends Numbe
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- ObjectParameter<KernelDensityFunction> kernelP = new ObjectParameter<KernelDensityFunction>(KERNEL_ID, KernelDensityFunction.class, EpanechnikovKernelDensityFunction.class);
+ ObjectParameter<KernelDensityFunction> kernelP = new ObjectParameter<>(KERNEL_ID, KernelDensityFunction.class, EpanechnikovKernelDensityFunction.class);
if (config.grab(kernelP)) {
kernel = kernelP.instantiateClass(config);
}
- DistanceParameter<D> rangeP = new DistanceParameter<D>(RANGE_ID, distanceFunction);
+ DistanceParameter<D> rangeP = new DistanceParameter<>(RANGE_ID, distanceFunction);
if (config.grab(rangeP)) {
range = rangeP.getValue();
}
@@ -273,7 +273,7 @@ public class NaiveMeanShiftClustering<V extends NumberVector<?>, D extends Numbe
@Override
protected NaiveMeanShiftClustering<V, D> makeInstance() {
- return new NaiveMeanShiftClustering<V, D>(distanceFunction, kernel, range);
+ return new NaiveMeanShiftClustering<>(distanceFunction, kernel, range);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICS.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICS.java
index 2c098dc0..e928d041 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICS.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICS.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -31,17 +31,17 @@ import de.lmu.ifi.dbs.elki.database.QueryUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
-import de.lmu.ifi.dbs.elki.database.ids.DistanceDBIDPair;
-import de.lmu.ifi.dbs.elki.database.ids.DoubleDistanceDBIDPair;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDList;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDPair;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDPair;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter;
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.DistanceUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceDBIDResultIter;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
@@ -139,7 +139,7 @@ public class OPTICS<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
final FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("OPTICS", size, LOG) : null;
processedIDs = DBIDUtil.newHashSet(size);
- ClusterOrderResult<D> clusterOrder = new ClusterOrderResult<D>("OPTICS Clusterorder", "optics-clusterorder");
+ ClusterOrderResult<D> clusterOrder = new ClusterOrderResult<>("OPTICS Clusterorder", "optics-clusterorder");
if(getDistanceFunction() instanceof PrimitiveDoubleDistanceFunction && DoubleDistance.class.isInstance(epsilon)) {
// Optimized codepath for double-based distances. Avoids Java
@@ -182,25 +182,25 @@ public class OPTICS<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
* the algorithm
*/
protected void expandClusterOrder(ClusterOrderResult<D> clusterOrder, Database database, RangeQuery<O, D> rangeQuery, DBID objectID, D epsilon, FiniteProgress progress) {
- UpdatableHeap<ClusterOrderEntry<D>> heap = new UpdatableHeap<ClusterOrderEntry<D>>();
- heap.add(new GenericClusterOrderEntry<D>(objectID, null, getDistanceFunction().getDistanceFactory().infiniteDistance()));
+ UpdatableHeap<ClusterOrderEntry<D>> heap = new UpdatableHeap<>();
+ heap.add(new GenericClusterOrderEntry<>(objectID, null, getDistanceFunction().getDistanceFactory().infiniteDistance()));
while(!heap.isEmpty()) {
final ClusterOrderEntry<D> current = heap.poll();
clusterOrder.add(current);
processedIDs.add(current.getID());
- DistanceDBIDResult<D> neighbors = rangeQuery.getRangeForDBID(current.getID(), epsilon);
+ DistanceDBIDList<D> neighbors = rangeQuery.getRangeForDBID(current.getID(), epsilon);
if(neighbors.size() >= minpts) {
final DistanceDBIDPair<D> last = neighbors.get(minpts - 1);
D coreDistance = last.getDistance();
- for(DistanceDBIDResultIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ for(DistanceDBIDListIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
if(processedIDs.contains(neighbor)) {
continue;
}
D reachability = DistanceUtil.max(neighbor.getDistance(), coreDistance);
- heap.add(new GenericClusterOrderEntry<D>(DBIDUtil.deref(neighbor), current.getID(), reachability));
+ heap.add(new GenericClusterOrderEntry<>(DBIDUtil.deref(neighbor), current.getID(), reachability));
}
}
if(progress != null) {
@@ -221,7 +221,7 @@ public class OPTICS<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
* the algorithm
*/
protected void expandClusterOrderDouble(ClusterOrderResult<DoubleDistance> clusterOrder, Database database, RangeQuery<O, DoubleDistance> rangeQuery, DBID objectID, DoubleDistance epsilon, FiniteProgress progress) {
- UpdatableHeap<DoubleDistanceClusterOrderEntry> heap = new UpdatableHeap<DoubleDistanceClusterOrderEntry>();
+ UpdatableHeap<DoubleDistanceClusterOrderEntry> heap = new UpdatableHeap<>();
heap.add(new DoubleDistanceClusterOrderEntry(objectID, null, Double.POSITIVE_INFINITY));
while(!heap.isEmpty()) {
@@ -229,17 +229,17 @@ public class OPTICS<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
clusterOrder.add(current);
processedIDs.add(current.getID());
- DistanceDBIDResult<DoubleDistance> neighbors = rangeQuery.getRangeForDBID(current.getID(), epsilon);
+ DistanceDBIDList<DoubleDistance> neighbors = rangeQuery.getRangeForDBID(current.getID(), epsilon);
if(neighbors.size() >= minpts) {
final DistanceDBIDPair<DoubleDistance> last = neighbors.get(minpts - 1);
if(last instanceof DoubleDistanceDBIDPair) {
double coreDistance = ((DoubleDistanceDBIDPair) last).doubleDistance();
- for(DistanceDBIDResultIter<DoubleDistance> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ for(DistanceDBIDListIter<DoubleDistance> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
if(processedIDs.contains(neighbor)) {
continue;
}
- double reachability = Math.max(((DoubleDistanceDBIDResultIter) neighbor).doubleDistance(), coreDistance);
+ double reachability = Math.max(((DoubleDistanceDBIDListIter) neighbor).doubleDistance(), coreDistance);
heap.add(new DoubleDistanceClusterOrderEntry(DBIDUtil.deref(neighbor), current.getID(), reachability));
}
}
@@ -248,7 +248,7 @@ public class OPTICS<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
// Only if we got an optimized result before.
double coreDistance = last.getDistance().doubleValue();
- for(DistanceDBIDResultIter<DoubleDistance> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ for(DistanceDBIDListIter<DoubleDistance> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
if(processedIDs.contains(neighbor)) {
continue;
}
@@ -298,7 +298,7 @@ public class OPTICS<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- DistanceParameter<D> epsilonP = new DistanceParameter<D>(EPSILON_ID, distanceFunction, true);
+ DistanceParameter<D> epsilonP = new DistanceParameter<>(EPSILON_ID, distanceFunction, true);
if(config.grab(epsilonP)) {
epsilon = epsilonP.getValue();
}
@@ -312,7 +312,7 @@ public class OPTICS<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
@Override
protected OPTICS<O, D> makeInstance() {
- return new OPTICS<O, D>(distanceFunction, epsilon, minpts);
+ return new OPTICS<>(distanceFunction, epsilon, minpts);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICSTypeAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICSTypeAlgorithm.java
index 3ead6f3e..82d7ec88 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICSTypeAlgorithm.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICSTypeAlgorithm.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICSXi.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICSXi.java
index 39a0ebd6..583d402b 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICSXi.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICSXi.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -46,8 +46,6 @@ import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.result.IterableResult;
import de.lmu.ifi.dbs.elki.result.optics.ClusterOrderEntry;
import de.lmu.ifi.dbs.elki.result.optics.ClusterOrderResult;
-import de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy.HierarchyHashmapList;
-import de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy.ModifiableHierarchy;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
@@ -135,13 +133,13 @@ public class OPTICSXi<N extends NumberDistance<N, ?>> extends AbstractAlgorithm<
double mib = 0.0;
// TODO: make it configurable to keep this list; this is mostly useful for
// visualization
- List<SteepArea> salist = new ArrayList<SteepArea>();
- List<SteepDownArea> sdaset = new ArrayList<SteepDownArea>();
- ModifiableHierarchy<Cluster<OPTICSModel>> hier = new HierarchyHashmapList<Cluster<OPTICSModel>>();
- HashSet<Cluster<OPTICSModel>> curclusters = new HashSet<Cluster<OPTICSModel>>();
+ List<SteepArea> salist = new ArrayList<>();
+ List<SteepDownArea> sdaset = new ArrayList<>();
+ final Clustering<OPTICSModel> clustering = new Clustering<>("OPTICS Xi-Clusters", "optics");
+ HashSet<Cluster<OPTICSModel>> curclusters = new HashSet<>();
HashSetModifiableDBIDs unclaimedids = DBIDUtil.newHashSet(relation.getDBIDs());
- SteepScanPosition<N> scan = new SteepScanPosition<N>(clusterOrder);
+ SteepScanPosition<N> scan = new SteepScanPosition<>(clusterOrder);
while(scan.hasNext()) {
final int curpos = scan.index;
// Update maximum-inbetween
@@ -285,7 +283,7 @@ public class OPTICSXi<N extends NumberDistance<N, ?>> extends AbstractAlgorithm<
LOG.debugFine("Found cluster with " + dbids.size() + " new objects, length " + (cstart - cend + 1));
}
OPTICSModel model = new OPTICSModel(cstart, cend);
- Cluster<OPTICSModel> cluster = new Cluster<OPTICSModel>("Cluster_" + cstart + "_" + cend, dbids, model, hier);
+ Cluster<OPTICSModel> cluster = new Cluster<>("Cluster_" + cstart + "_" + cend, dbids, model);
// Build the hierarchy
{
Iterator<Cluster<OPTICSModel>> iter = curclusters.iterator();
@@ -293,7 +291,7 @@ public class OPTICSXi<N extends NumberDistance<N, ?>> extends AbstractAlgorithm<
Cluster<OPTICSModel> clus = iter.next();
OPTICSModel omodel = clus.getModel();
if(model.getStartIndex() <= omodel.getStartIndex() && omodel.getEndIndex() <= model.getEndIndex()) {
- hier.add(cluster, clus);
+ clustering.addChildCluster(cluster, clus);
iter.remove();
}
}
@@ -308,23 +306,22 @@ public class OPTICSXi<N extends NumberDistance<N, ?>> extends AbstractAlgorithm<
}
}
if(curclusters.size() > 0 || unclaimedids.size() > 0) {
- final Clustering<OPTICSModel> clustering = new Clustering<OPTICSModel>("OPTICS Xi-Clusters", "optics");
if(unclaimedids.size() > 0) {
final Cluster<OPTICSModel> allcluster;
if(clusterOrder.get(clusterOrder.size() - 1).getReachability().isInfiniteDistance()) {
- allcluster = new Cluster<OPTICSModel>("Noise", unclaimedids, true, new OPTICSModel(0, clusterOrder.size() - 1), hier);
+ allcluster = new Cluster<>("Noise", unclaimedids, true, new OPTICSModel(0, clusterOrder.size() - 1));
}
else {
- allcluster = new Cluster<OPTICSModel>("Cluster", unclaimedids, new OPTICSModel(0, clusterOrder.size() - 1), hier);
+ allcluster = new Cluster<>("Cluster", unclaimedids, new OPTICSModel(0, clusterOrder.size() - 1));
}
for(Cluster<OPTICSModel> cluster : curclusters) {
- hier.add(allcluster, cluster);
+ clustering.addChildCluster(allcluster, cluster);
}
- clustering.addCluster(allcluster);
+ clustering.addToplevelCluster(allcluster);
}
else {
for(Cluster<OPTICSModel> cluster : curclusters) {
- clustering.addCluster(cluster);
+ clustering.addToplevelCluster(cluster);
}
}
clustering.addChildResult(clusterOrderResult);
@@ -663,7 +660,7 @@ public class OPTICSXi<N extends NumberDistance<N, ?>> extends AbstractAlgorithm<
xi = xiP.doubleValue();
}
- ClassParameter<OPTICSTypeAlgorithm<D>> opticsP = new ClassParameter<OPTICSTypeAlgorithm<D>>(XIALG_ID, OPTICSTypeAlgorithm.class, OPTICS.class);
+ ClassParameter<OPTICSTypeAlgorithm<D>> opticsP = new ClassParameter<>(XIALG_ID, OPTICSTypeAlgorithm.class, OPTICS.class);
if(config.grab(opticsP)) {
optics = opticsP.instantiateClass(config);
}
@@ -671,7 +668,7 @@ public class OPTICSXi<N extends NumberDistance<N, ?>> extends AbstractAlgorithm<
@Override
protected OPTICSXi<D> makeInstance() {
- return new OPTICSXi<D>(optics, xi);
+ return new OPTICSXi<>(optics, xi);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/SLINK.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/SLINK.java
deleted file mode 100644
index 3e1f0650..00000000
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/SLINK.java
+++ /dev/null
@@ -1,818 +0,0 @@
-package de.lmu.ifi.dbs.elki.algorithm.clustering;
-
-/*
- This file is part of ELKI:
- Environment for Developing KDD-Applications Supported by Index-Structures
-
- Copyright (C) 2012
- Ludwig-Maximilians-Universität München
- Lehr- und Forschungseinheit für Datenbanksysteme
- ELKI Development Team
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-import gnu.trove.list.array.TDoubleArrayList;
-
-import java.util.ArrayList;
-import java.util.Comparator;
-
-import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
-import de.lmu.ifi.dbs.elki.data.Cluster;
-import de.lmu.ifi.dbs.elki.data.Clustering;
-import de.lmu.ifi.dbs.elki.data.model.DendrogramModel;
-import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
-import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
-import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
-import de.lmu.ifi.dbs.elki.database.Database;
-import de.lmu.ifi.dbs.elki.database.datastore.DBIDDataStore;
-import de.lmu.ifi.dbs.elki.database.datastore.DataStore;
-import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
-import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
-import de.lmu.ifi.dbs.elki.database.datastore.DoubleDistanceDataStore;
-import de.lmu.ifi.dbs.elki.database.datastore.WritableDBIDDataStore;
-import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
-import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDistanceDataStore;
-import de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDVar;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
-import de.lmu.ifi.dbs.elki.database.relation.Relation;
-import de.lmu.ifi.dbs.elki.distance.DistanceUtil;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
-import de.lmu.ifi.dbs.elki.logging.Logging;
-import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
-import de.lmu.ifi.dbs.elki.result.BasicResult;
-import de.lmu.ifi.dbs.elki.result.OrderingFromDataStore;
-import de.lmu.ifi.dbs.elki.result.Result;
-import de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy.HierarchyHashmapList;
-import de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy.ModifiableHierarchy;
-import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
-import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
-import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
-
-/**
- * Implementation of the efficient Single-Link Algorithm SLINK of R. Sibson.
- * <p>
- * Reference: R. Sibson: SLINK: An optimally efficient algorithm for the
- * single-link cluster method. <br>
- * In: The Computer Journal 16 (1973), No. 1, p. 30-34.
- * </p>
- *
- * @author Elke Achtert
- * @param <O> the type of DatabaseObject the algorithm is applied on
- * @param <D> the type of Distance used
- */
-@Title("SLINK: Single Link Clustering")
-@Description("Hierarchical clustering algorithm based on single-link connectivity.")
-@Reference(authors = "R. Sibson", title = "SLINK: An optimally efficient algorithm for the single-link cluster method", booktitle = "The Computer Journal 16 (1973), No. 1, p. 30-34.", url = "http://dx.doi.org/10.1093/comjnl/16.1.30")
-public class SLINK<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorithm<O, D, Result> {
- /**
- * The logger for this class.
- */
- private static final Logging LOG = Logging.getLogger(SLINK.class);
-
- /**
- * Minimum number of clusters to extract
- */
- private int minclusters = -1;
-
- /**
- * Constructor.
- *
- * @param distanceFunction Distance function
- * @param minclusters Minimum clusters to extract. Can be {@code -1}.
- */
- public SLINK(DistanceFunction<? super O, D> distanceFunction, int minclusters) {
- super(distanceFunction);
- this.minclusters = minclusters;
- }
-
- /**
- * Performs the SLINK algorithm on the given database.
- */
- public Result run(Database database, Relation<O> relation) {
- DistanceQuery<O, D> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
- @SuppressWarnings("unchecked")
- Class<D> distCls = (Class<D>) getDistanceFunction().getDistanceFactory().getClass();
- WritableDBIDDataStore pi = DataStoreUtil.makeDBIDStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
- WritableDataStore<D> lambda = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, distCls);
- // Temporary storage for m.
- WritableDataStore<D> m = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, distCls);
-
- FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Running SLINK", relation.size(), LOG) : null;
- // has to be an array for monotonicity reasons!
- ModifiableDBIDs processedIDs = DBIDUtil.newArray(relation.size());
-
- // Optimized code path for double distances
- if (getDistanceFunction() instanceof PrimitiveDoubleDistanceFunction && lambda instanceof WritableDoubleDistanceDataStore && m instanceof WritableDoubleDistanceDataStore) {
- @SuppressWarnings("unchecked")
- PrimitiveDoubleDistanceFunction<? super O> dist = (PrimitiveDoubleDistanceFunction<? super O>) getDistanceFunction();
- WritableDoubleDistanceDataStore lambdad = (WritableDoubleDistanceDataStore) lambda;
- WritableDoubleDistanceDataStore md = (WritableDoubleDistanceDataStore) m;
- // apply the algorithm
- for (DBIDIter id = relation.iterDBIDs(); id.valid(); id.advance()) {
- step1double(id, pi, lambdad);
- step2double(id, processedIDs, distQuery.getRelation(), dist, md);
- step3double(id, pi, lambdad, processedIDs, md);
- step4double(id, pi, lambdad, processedIDs);
-
- processedIDs.add(id);
-
- if (progress != null) {
- progress.incrementProcessed(LOG);
- }
- }
- } else {
- // apply the algorithm
- for (DBIDIter id = relation.iterDBIDs(); id.valid(); id.advance()) {
- step1(id, pi, lambda);
- step2(id, processedIDs, distQuery, m);
- step3(id, pi, lambda, processedIDs, m);
- step4(id, pi, lambda, processedIDs);
-
- processedIDs.add(id);
-
- if (progress != null) {
- progress.incrementProcessed(LOG);
- }
- }
- }
-
- if (progress != null) {
- progress.ensureCompleted(LOG);
- }
- // We don't need m anymore.
- m.destroy();
- m = null;
-
- // Build dendrogam clusters identified by their target object
- if (LOG.isVerbose()) {
- LOG.verbose("Extracting clusters.");
- }
- final BasicResult result;
- if (lambda instanceof DoubleDistanceDataStore) {
- result = extractClustersDouble(relation.getDBIDs(), pi, (DoubleDistanceDataStore) lambda, minclusters);
- } else {
- result = extractClusters(relation.getDBIDs(), pi, lambda, minclusters);
- }
-
- result.addChildResult(new MaterializedRelation<DBID>("SLINK pi", "slink-order", TypeUtil.DBID, pi, processedIDs));
- result.addChildResult(new MaterializedRelation<D>("SLINK lambda", "slink-order", new SimpleTypeInformation<D>(distCls), lambda, processedIDs));
- result.addChildResult(new OrderingFromDataStore<D>("SLINK order", "slink-order", processedIDs, lambda));
- return result;
- }
-
- /**
- * First step: Initialize P(id) = id, L(id) = infinity.
- *
- * @param id the id of the object to be inserted into the pointer
- * representation
- * @param pi Pi data store
- * @param lambda Lambda data store
- */
- private void step1(DBIDRef id, WritableDBIDDataStore pi, WritableDataStore<D> lambda) {
- // P(n+1) = n+1:
- pi.put(id, id);
- // L(n+1) = infinity
- lambda.put(id, getDistanceFunction().getDistanceFactory().infiniteDistance());
- }
-
- /**
- * Second step: Determine the pairwise distances from all objects in the
- * pointer representation to the new object with the specified id.
- *
- * @param id the id of the object to be inserted into the pointer
- * representation
- * @param processedIDs the already processed ids
- * @param m Data store
- * @param distFunc Distance function to use
- */
- private void step2(DBIDRef id, DBIDs processedIDs, DistanceQuery<O, D> distFunc, WritableDataStore<D> m) {
- O newObj = distFunc.getRelation().get(id);
- for (DBIDIter it = processedIDs.iter(); it.valid(); it.advance()) {
- // M(i) = dist(i, n+1)
- m.put(it, distFunc.distance(it, newObj));
- }
- }
-
- /**
- * Third step: Determine the values for P and L
- *
- * @param id the id of the object to be inserted into the pointer
- * representation
- * @param pi Pi data store
- * @param lambda Lambda data store
- * @param processedIDs the already processed ids
- * @param m Data store
- */
- private void step3(DBIDRef id, WritableDBIDDataStore pi, WritableDataStore<D> lambda, DBIDs processedIDs, WritableDataStore<D> m) {
- DBIDVar p_i = DBIDUtil.newVar();
- // for i = 1..n
- for (DBIDIter it = processedIDs.iter(); it.valid(); it.advance()) {
- D l_i = lambda.get(it);
- D m_i = m.get(it);
- pi.assignVar(it, p_i); // p_i = pi(it)
- D mp_i = m.get(p_i);
-
- // if L(i) >= M(i)
- if (l_i.compareTo(m_i) >= 0) {
- // M(P(i)) = min { M(P(i)), L(i) }
- m.put(p_i, DistanceUtil.min(mp_i, l_i));
-
- // L(i) = M(i)
- lambda.put(it, m_i);
-
- // P(i) = n+1;
- pi.put(it, id);
- } else {
- // M(P(i)) = min { M(P(i)), M(i) }
- m.put(p_i, DistanceUtil.min(mp_i, m_i));
- }
- }
- }
-
- /**
- * Fourth step: Actualize the clusters if necessary
- *
- * @param id the id of the current object
- * @param pi Pi data store
- * @param lambda Lambda data store
- * @param processedIDs the already processed ids
- */
- private void step4(DBIDRef id, WritableDBIDDataStore pi, WritableDataStore<D> lambda, DBIDs processedIDs) {
- DBIDVar p_i = DBIDUtil.newVar();
- // for i = 1..n
- for (DBIDIter it = processedIDs.iter(); it.valid(); it.advance()) {
- D l_i = lambda.get(it);
- pi.assignVar(it, p_i); // p_i = pi(it)
- D lp_i = lambda.get(p_i);
-
- // if L(i) >= L(P(i))
- if (l_i.compareTo(lp_i) >= 0) {
- // P(i) = n+1
- pi.put(it, id);
- }
- }
- }
-
- /**
- * First step: Initialize P(id) = id, L(id) = infinity.
- *
- * @param id the id of the object to be inserted into the pointer
- * representation
- * @param pi Pi data store
- * @param lambda Lambda data store
- */
- private void step1double(DBIDRef id, WritableDBIDDataStore pi, WritableDoubleDistanceDataStore lambda) {
- // P(n+1) = n+1:
- pi.put(id, id);
- // L(n+1) = infinity
- lambda.putDouble(id, Double.POSITIVE_INFINITY);
- }
-
- /**
- * Second step: Determine the pairwise distances from all objects in the
- * pointer representation to the new object with the specified id.
- *
- * @param id the id of the object to be inserted into the pointer
- * representation
- * @param processedIDs the already processed ids
- * @param m Data store
- * @param relation Data relation
- * @param distFunc Distance function to use
- */
- private void step2double(DBIDRef id, DBIDs processedIDs, Relation<? extends O> relation, PrimitiveDoubleDistanceFunction<? super O> distFunc, WritableDoubleDistanceDataStore m) {
- O newObj = relation.get(id);
- for (DBIDIter it = processedIDs.iter(); it.valid(); it.advance()) {
- // M(i) = dist(i, n+1)
- m.putDouble(it, distFunc.doubleDistance(relation.get(it), newObj));
- }
- }
-
- /**
- * Third step: Determine the values for P and L
- *
- * @param id the id of the object to be inserted into the pointer
- * representation
- * @param pi Pi data store
- * @param lambda Lambda data store
- * @param processedIDs the already processed ids
- * @param m Data store
- */
- private void step3double(DBIDRef id, WritableDBIDDataStore pi, WritableDoubleDistanceDataStore lambda, DBIDs processedIDs, WritableDoubleDistanceDataStore m) {
- DBIDVar p_i = DBIDUtil.newVar();
- // for i = 1..n
- for (DBIDIter it = processedIDs.iter(); it.valid(); it.advance()) {
- double l_i = lambda.doubleValue(it);
- double m_i = m.doubleValue(it);
- pi.assignVar(it, p_i); // p_i = pi(it)
- double mp_i = m.doubleValue(p_i);
-
- // if L(i) >= M(i)
- if (l_i >= m_i) {
- // M(P(i)) = min { M(P(i)), L(i) }
- m.putDouble(p_i, Math.min(mp_i, l_i));
-
- // L(i) = M(i)
- lambda.putDouble(it, m_i);
-
- // P(i) = n+1;
- pi.put(it, id);
- } else {
- // M(P(i)) = min { M(P(i)), M(i) }
- m.putDouble(p_i, Math.min(mp_i, m_i));
- }
- }
- }
-
- /**
- * Fourth step: Actualize the clusters if necessary
- *
- * @param id the id of the current object
- * @param pi Pi data store
- * @param lambda Lambda data store
- * @param processedIDs the already processed ids
- */
- private void step4double(DBIDRef id, WritableDBIDDataStore pi, WritableDoubleDistanceDataStore lambda, DBIDs processedIDs) {
- DBIDVar p_i = DBIDUtil.newVar();
- // for i = 1..n
- for (DBIDIter it = processedIDs.iter(); it.valid(); it.advance()) {
- double l_i = lambda.doubleValue(it);
- pi.assignVar(it, p_i); // p_i = pi(it)
- double lp_i = lambda.doubleValue(p_i);
-
- // if L(i) >= L(P(i))
- if (l_i >= lp_i) {
- // P(i) = n+1
- pi.put(it, id);
- }
- }
- }
-
- /**
- * Extract all clusters from the pi-lambda-representation.
- *
- * @param ids Object ids to process
- * @param pi Pi store
- * @param lambda Lambda store
- * @param minclusters Minimum number of clusters to extract
- *
- * @return Hierarchical clustering
- */
- private Clustering<DendrogramModel<D>> extractClusters(DBIDs ids, final DBIDDataStore pi, final DataStore<D> lambda, int minclusters) {
- FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Extracting clusters", ids.size(), LOG) : null;
- D nulldist = getDistanceFunction().getDistanceFactory().nullDistance();
-
- // Sort DBIDs by lambda. We need this for two things:
- // a) to determine the stop distance from "minclusters" parameter
- // b) to process arrows in decreasing / increasing order
- ArrayModifiableDBIDs order = DBIDUtil.newArray(ids);
- order.sort(new CompareByLambda<D>(lambda));
-
- // Stop distance:
- final D stopdist = (minclusters > 0) ? lambda.get(order.get(ids.size() - minclusters)) : null;
-
- // The initial pass is top-down.
- DBIDArrayIter it = order.iter();
- int split = (minclusters > 0) ? Math.max(ids.size() - minclusters, 0) : 0;
- // Tie handling: decrement split.
- if (stopdist != null) {
- while (split > 0) {
- it.seek(split - 1);
- if (stopdist.compareTo(lambda.get(it)) == 0) {
- split--;
- minclusters++;
- } else {
- break;
- }
- }
- }
-
- // Extract the child clusters
- int cnum = 0;
- int expcnum = Math.max(0, minclusters);
- WritableIntegerDataStore cluster_map = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_TEMP, -1);
- ArrayList<ModifiableDBIDs> cluster_dbids = new ArrayList<ModifiableDBIDs>(expcnum);
- ArrayList<D> cluster_dist = new ArrayList<D>(expcnum);
- ArrayModifiableDBIDs cluster_leads = DBIDUtil.newArray(expcnum);
-
- DBIDVar succ = DBIDUtil.newVar(); // Variable for successor.
- // Go backwards on the lower part.
- for (it.seek(split - 1); it.valid(); it.retract()) {
- D dist = lambda.get(it); // Distance to successor
- pi.assignVar(it, succ); // succ = pi(it)
- int clusterid = cluster_map.intValue(succ);
- // Successor cluster has already been created:
- if (clusterid >= 0) {
- cluster_dbids.get(clusterid).add(it);
- cluster_map.putInt(it, clusterid);
- // Update distance to maximum encountered:
- if (cluster_dist.get(clusterid).compareTo(dist) < 0) {
- cluster_dist.set(clusterid, dist);
- }
- } else {
- // Need to start a new cluster:
- clusterid = cnum; // next cluster number.
- ModifiableDBIDs cids = DBIDUtil.newArray();
- // Add element and successor as initial members:
- cids.add(succ);
- cluster_map.putInt(succ, clusterid);
- cids.add(it);
- cluster_map.putInt(it, clusterid);
- // Store new cluster.
- cluster_dbids.add(cids);
- cluster_leads.add(succ);
- cluster_dist.add(dist);
- cnum++;
- }
-
- // Decrement counter
- if (progress != null) {
- progress.incrementProcessed(LOG);
- }
- }
- // Build a hierarchy out of these clusters.
- Cluster<DendrogramModel<D>> root = null;
- ModifiableHierarchy<Cluster<DendrogramModel<D>>> hier = new HierarchyHashmapList<Cluster<DendrogramModel<D>>>();
- ArrayList<Cluster<DendrogramModel<D>>> clusters = new ArrayList<Cluster<DendrogramModel<D>>>(ids.size() + expcnum - split);
- // Convert initial clusters to cluster objects
- {
- int i = 0;
- for (DBIDIter it2 = cluster_leads.iter(); it2.valid(); it2.advance(), i++) {
- clusters.add(makeCluster(it2, cluster_dist.get(i), cluster_dbids.get(i), hier));
- }
- cluster_dist = null; // Invalidate
- cluster_dbids = null; // Invalidate
- }
- // Process the upper part, bottom-up.
- for (it.seek(split); it.valid(); it.advance()) {
- int clusterid = cluster_map.intValue(it);
- // The current cluster:
- final Cluster<DendrogramModel<D>> clus;
- if (clusterid >= 0) {
- clus = clusters.get(clusterid);
- } else {
- ArrayModifiableDBIDs cids = DBIDUtil.newArray(1);
- cids.add(it);
- clus = makeCluster(it, nulldist, cids, hier);
- // No need to store in clusters: cannot have another incoming pi
- // pointer!
- }
- // The successor to join:
- pi.assignVar(it, succ); // succ = pi(it)
- if (DBIDUtil.equal(it, succ)) {
- assert (root == null);
- root = clus;
- } else {
- // Parent cluster:
- int parentid = cluster_map.intValue(succ);
- D depth = lambda.get(it);
- // Parent cluster exists - merge as a new cluster:
- if (parentid >= 0) {
- Cluster<DendrogramModel<D>> pclus = makeCluster(succ, depth, DBIDUtil.EMPTYDBIDS, hier);
- hier.add(pclus, clusters.get(parentid));
- hier.add(pclus, clus);
- clusters.set(parentid, pclus); // Replace existing parent cluster
- } else {
- // Create a new, one-element, parent cluster.
- parentid = cnum;
- cnum++;
- ArrayModifiableDBIDs cids = DBIDUtil.newArray(1);
- cids.add(succ);
- Cluster<DendrogramModel<D>> pclus = makeCluster(succ, depth, cids, hier);
- hier.add(pclus, clus);
- assert (clusters.size() == parentid);
- clusters.add(pclus); // Remember parent cluster
- cluster_map.putInt(succ, parentid); // Reference
- }
- }
-
- // Decrement counter
- if (progress != null) {
- progress.incrementProcessed(LOG);
- }
- }
-
- if (progress != null) {
- progress.ensureCompleted(LOG);
- }
- // build hierarchy
- final Clustering<DendrogramModel<D>> dendrogram = new Clustering<DendrogramModel<D>>("Single-Link-Dendrogram", "slink-dendrogram");
- dendrogram.addCluster(root);
-
- return dendrogram;
- }
-
- /**
- * Extract all clusters from the pi-lambda-representation.
- *
- * @param ids Object ids to process
- * @param pi Pi store
- * @param lambda Lambda store
- * @param minclusters Minimum number of clusters to extract
- *
- * @return Hierarchical clustering
- */
- private Clustering<DendrogramModel<D>> extractClustersDouble(DBIDs ids, final DBIDDataStore pi, final DoubleDistanceDataStore lambda, int minclusters) {
- FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Extracting clusters", ids.size(), LOG) : null;
- D nulldist = getDistanceFunction().getDistanceFactory().nullDistance();
-
- // Sort DBIDs by lambda. We need this for two things:
- // a) to determine the stop distance from "minclusters" parameter
- // b) to process arrows in decreasing / increasing order
- ArrayModifiableDBIDs order = DBIDUtil.newArray(ids);
- order.sort(new CompareByDoubleLambda(lambda));
-
- // Stop distance:
- final double stopdist = (minclusters > 0) ? lambda.doubleValue(order.get(ids.size() - minclusters)) : Double.POSITIVE_INFINITY;
-
- // The initial pass is top-down.
- DBIDArrayIter it = order.iter();
- int split = (minclusters > 0) ? Math.max(ids.size() - minclusters, 0) : 0;
- // Tie handling: decrement split.
- if (minclusters > 0) {
- while (split > 0) {
- it.seek(split - 1);
- if (stopdist <= lambda.doubleValue(it)) {
- split--;
- minclusters++;
- } else {
- break;
- }
- }
- }
-
- // Extract the child clusters
- int cnum = 0;
- int expcnum = Math.max(0, minclusters);
- WritableIntegerDataStore cluster_map = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_TEMP, -1);
- ArrayList<ModifiableDBIDs> cluster_dbids = new ArrayList<ModifiableDBIDs>(expcnum);
- TDoubleArrayList cluster_dist = new TDoubleArrayList(expcnum);
- ArrayModifiableDBIDs cluster_leads = DBIDUtil.newArray(expcnum);
-
- DBIDVar succ = DBIDUtil.newVar(); // Variable for successor.
- // Go backwards on the lower part.
- for (it.seek(split - 1); it.valid(); it.retract()) {
- double dist = lambda.doubleValue(it); // Distance to successor
- pi.assignVar(it, succ); // succ = pi(it)
- int clusterid = cluster_map.intValue(succ);
- // Successor cluster has already been created:
- if (clusterid >= 0) {
- cluster_dbids.get(clusterid).add(it);
- cluster_map.putInt(it, clusterid);
- // Update distance to maximum encountered:
- if (cluster_dist.get(clusterid) < dist) {
- cluster_dist.set(clusterid, dist);
- }
- } else {
- // Need to start a new cluster:
- clusterid = cnum; // next cluster number.
- ModifiableDBIDs cids = DBIDUtil.newArray();
- // Add element and successor as initial members:
- cids.add(succ);
- cluster_map.putInt(succ, clusterid);
- cids.add(it);
- cluster_map.putInt(it, clusterid);
- // Store new cluster.
- cluster_dbids.add(cids);
- cluster_leads.add(succ);
- cluster_dist.add(dist);
- cnum++;
- }
-
- // Decrement counter
- if (progress != null) {
- progress.incrementProcessed(LOG);
- }
- }
- // Build a hierarchy out of these clusters.
- Cluster<DendrogramModel<D>> root = null;
- ModifiableHierarchy<Cluster<DendrogramModel<D>>> hier = new HierarchyHashmapList<Cluster<DendrogramModel<D>>>();
- ArrayList<Cluster<DendrogramModel<D>>> clusters = new ArrayList<Cluster<DendrogramModel<D>>>(ids.size() + expcnum - split);
- // Convert initial clusters to cluster objects
- {
- int i = 0;
- for (DBIDIter it2 = cluster_leads.iter(); it2.valid(); it2.advance(), i++) {
- @SuppressWarnings("unchecked")
- D depth = (D) new DoubleDistance(cluster_dist.get(i));
- clusters.add(makeCluster(it2, depth, cluster_dbids.get(i), hier));
- }
- cluster_dist = null; // Invalidate
- cluster_dbids = null; // Invalidate
- }
- // Process the upper part, bottom-up.
- for (it.seek(split); it.valid(); it.advance()) {
- int clusterid = cluster_map.intValue(it);
- // The current cluster:
- final Cluster<DendrogramModel<D>> clus;
- if (clusterid >= 0) {
- clus = clusters.get(clusterid);
- } else {
- ArrayModifiableDBIDs cids = DBIDUtil.newArray(1);
- cids.add(it);
- clus = makeCluster(it, nulldist, cids, hier);
- // No need to store in clusters: cannot have another incoming pi
- // pointer!
- }
- // The successor to join:
- pi.assignVar(it, succ); // succ = pi(it)
- if (DBIDUtil.equal(it, succ)) {
- assert (root == null);
- root = clus;
- } else {
- // Parent cluster:
- int parentid = cluster_map.intValue(succ);
- @SuppressWarnings("unchecked")
- D depth = (D) new DoubleDistance(lambda.doubleValue(it));
- // Parent cluster exists - merge as a new cluster:
- if (parentid >= 0) {
- Cluster<DendrogramModel<D>> pclus = makeCluster(succ, depth, DBIDUtil.EMPTYDBIDS, hier);
- hier.add(pclus, clusters.get(parentid));
- hier.add(pclus, clus);
- clusters.set(parentid, pclus); // Replace existing parent cluster
- } else {
- // Create a new, one-element, parent cluster.
- parentid = cnum;
- cnum++;
- ArrayModifiableDBIDs cids = DBIDUtil.newArray(1);
- cids.add(succ);
- Cluster<DendrogramModel<D>> pclus = makeCluster(succ, depth, cids, hier);
- hier.add(pclus, clus);
- assert (clusters.size() == parentid);
- clusters.add(pclus); // Remember parent cluster
- cluster_map.putInt(succ, parentid); // Reference
- }
- }
-
- // Decrement counter
- if (progress != null) {
- progress.incrementProcessed(LOG);
- }
- }
-
- if (progress != null) {
- progress.ensureCompleted(LOG);
- }
- // build hierarchy
- final Clustering<DendrogramModel<D>> dendrogram = new Clustering<DendrogramModel<D>>("Single-Link-Dendrogram", "slink-dendrogram");
- dendrogram.addCluster(root);
-
- return dendrogram;
- }
-
- /**
- * Make the cluster for the given object
- *
- * @param lead Leading object
- * @param depth Linkage depth
- * @param members Member objects
- * @param hier Cluster hierarchy
- * @return Cluster
- */
- private Cluster<DendrogramModel<D>> makeCluster(DBIDRef lead, D depth, DBIDs members, ModifiableHierarchy<Cluster<DendrogramModel<D>>> hier) {
- final String name;
- if (members.size() == 0) {
- name = "merge_" + lead + "_" + depth;
- } else if (depth.isInfiniteDistance()) {
- assert (members.contains(lead));
- name = "object_" + lead;
- } else {
- name = "cluster_" + lead + "_" + depth;
- }
- Cluster<DendrogramModel<D>> cluster = new Cluster<DendrogramModel<D>>(name, members, new DendrogramModel<D>(depth), hier);
- return cluster;
- }
-
- @Override
- public TypeInformation[] getInputTypeRestriction() {
- return TypeUtil.array(getDistanceFunction().getInputTypeRestriction());
- }
-
- @Override
- protected Logging getLogger() {
- return LOG;
- }
-
- /**
- * Order a DBID collection by the lambda value.
- *
- * @author Erich Schubert
- *
- * @apiviz.exclude
- *
- * @param <D> Distance type
- */
- private static final class CompareByLambda<D extends Distance<D>> implements Comparator<DBIDRef> {
- /**
- * Lambda storage
- */
- private final DataStore<D> lambda;
-
- /**
- * Constructor.
- *
- * @param lambda Lambda storage
- */
- protected CompareByLambda(DataStore<D> lambda) {
- this.lambda = lambda;
- }
-
- @Override
- public int compare(DBIDRef id1, DBIDRef id2) {
- D k1 = lambda.get(id1);
- D k2 = lambda.get(id2);
- assert (k1 != null);
- assert (k2 != null);
- return k1.compareTo(k2);
- }
- }
-
- /**
- * Order a DBID collection by the lambda value.
- *
- * @author Erich Schubert
- *
- * @apiviz.exclude
- */
- private static final class CompareByDoubleLambda implements Comparator<DBIDRef> {
- /**
- * Lambda storage
- */
- private final DoubleDistanceDataStore lambda;
-
- /**
- * Constructor.
- *
- * @param lambda Lambda storage
- */
- protected CompareByDoubleLambda(DoubleDistanceDataStore lambda) {
- this.lambda = lambda;
- }
-
- @Override
- public int compare(DBIDRef id1, DBIDRef id2) {
- double k1 = lambda.doubleValue(id1);
- double k2 = lambda.doubleValue(id2);
- return Double.compare(k1, k2);
- }
- }
-
- /**
- * Parameterization class.
- *
- * @author Erich Schubert
- *
- * @apiviz.exclude
- */
- public static class Parameterizer<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
- /**
- * The minimum number of clusters to extract
- */
- public static final OptionID SLINK_MINCLUSTERS_ID = new OptionID("slink.minclusters", "The maximum number of clusters to extract.");
-
- protected int minclusters = -1;
-
- @Override
- protected void makeOptions(Parameterization config) {
- super.makeOptions(config);
- IntParameter minclustersP = new IntParameter(SLINK_MINCLUSTERS_ID);
- minclustersP.addConstraint(new GreaterEqualConstraint(1));
- minclustersP.setOptional(true);
- if (config.grab(minclustersP)) {
- minclusters = minclustersP.intValue();
- }
- }
-
- @Override
- protected SLINK<O, D> makeInstance() {
- return new SLINK<O, D>(distanceFunction, minclusters);
- }
- }
-}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/SNNClustering.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/SNNClustering.java
index f3b59c42..95d9f23c 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/SNNClustering.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/SNNClustering.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -151,7 +151,7 @@ public class SNNClustering<O> extends AbstractAlgorithm<Clustering<Model>> imple
FiniteProgress objprog = LOG.isVerbose() ? new FiniteProgress("SNNClustering", relation.size(), LOG) : null;
IndefiniteProgress clusprog = LOG.isVerbose() ? new IndefiniteProgress("Number of clusters", LOG) : null;
- resultList = new ArrayList<ModifiableDBIDs>();
+ resultList = new ArrayList<>();
noise = DBIDUtil.newHashSet();
processedIDs = DBIDUtil.newHashSet(relation.size());
if(relation.size() >= minpts) {
@@ -183,11 +183,11 @@ public class SNNClustering<O> extends AbstractAlgorithm<Clustering<Model>> imple
clusprog.setCompleted(LOG);
}
- Clustering<Model> result = new Clustering<Model>("Shared-Nearest-Neighbor Clustering", "snn-clustering");
+ Clustering<Model> result = new Clustering<>("Shared-Nearest-Neighbor Clustering", "snn-clustering");
for(Iterator<ModifiableDBIDs> resultListIter = resultList.iterator(); resultListIter.hasNext();) {
- result.addCluster(new Cluster<Model>(resultListIter.next(), ClusterModel.CLUSTER));
+ result.addToplevelCluster(new Cluster<Model>(resultListIter.next(), ClusterModel.CLUSTER));
}
- result.addCluster(new Cluster<Model>(noise, true, ClusterModel.CLUSTER));
+ result.addToplevelCluster(new Cluster<Model>(noise, true, ClusterModel.CLUSTER));
return result;
}
@@ -322,7 +322,7 @@ public class SNNClustering<O> extends AbstractAlgorithm<Clustering<Model>> imple
Class<SharedNearestNeighborSimilarityFunction<O>> cls = ClassGenericsUtil.uglyCastIntoSubclass(SharedNearestNeighborSimilarityFunction.class);
similarityFunction = config.tryInstantiate(cls);
- DistanceParameter<IntegerDistance> epsilonP = new DistanceParameter<IntegerDistance>(EPSILON_ID, IntegerDistance.FACTORY);
+ DistanceParameter<IntegerDistance> epsilonP = new DistanceParameter<>(EPSILON_ID, IntegerDistance.FACTORY);
if(config.grab(epsilonP)) {
epsilon = epsilonP.getValue();
}
@@ -336,7 +336,7 @@ public class SNNClustering<O> extends AbstractAlgorithm<Clustering<Model>> imple
@Override
protected SNNClustering<O> makeInstance() {
- return new SNNClustering<O>(similarityFunction, epsilon, minpts);
+ return new SNNClustering<>(similarityFunction, epsilon, minpts);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/CASH.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/CASH.java
index 1cb1eb0d..0d82add9 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/CASH.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/CASH.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.correlation;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -66,8 +66,9 @@ import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.FirstNEigenPairFilter;
import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAFilteredRunner;
import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil;
-import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ComparableMinHeap;
import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.IntegerPriorityObject;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ObjectHeap;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
@@ -263,8 +264,8 @@ public class CASH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
*/
private Relation<ParameterizationFunction> preprocess(Database db, Relation<V> vrel) {
DBIDs ids = vrel.getDBIDs();
- SimpleTypeInformation<ParameterizationFunction> type = new SimpleTypeInformation<ParameterizationFunction>(ParameterizationFunction.class);
- MaterializedRelation<ParameterizationFunction> prep = new MaterializedRelation<ParameterizationFunction>(db, type, ids);
+ SimpleTypeInformation<ParameterizationFunction> type = new SimpleTypeInformation<>(ParameterizationFunction.class);
+ MaterializedRelation<ParameterizationFunction> prep = new MaterializedRelation<>(db, type, ids);
// Project
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
@@ -284,12 +285,12 @@ public class CASH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
* @return a mapping of subspace dimensionalities to clusters
*/
private Clustering<Model> doRun(Relation<ParameterizationFunction> relation, FiniteProgress progress) {
- Clustering<Model> res = new Clustering<Model>("CASH clustering", "cash-clustering");
+ Clustering<Model> res = new Clustering<>("CASH clustering", "cash-clustering");
final int dim = dimensionality(relation);
// init heap
- Heap<IntegerPriorityObject<CASHInterval>> heap = new Heap<IntegerPriorityObject<CASHInterval>>();
+ ObjectHeap<IntegerPriorityObject<CASHInterval>> heap = new ComparableMinHeap<>();
ModifiableDBIDs noiseIDs = DBIDUtil.newHashSet(relation.getDBIDs());
initHeap(heap, relation, dim, noiseIDs);
@@ -338,7 +339,7 @@ public class CASH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
// add result of dim-1 to this result
Clustering<Model> res_dim_minus_1 = doRun(db, progress);
for (Cluster<Model> cluster : res_dim_minus_1.getAllClusters()) {
- res.addCluster(cluster);
+ res.addToplevelCluster(cluster);
noiseIDs.removeDBIDs(cluster.getIDs());
clusterIDs.addDBIDs(cluster.getIDs());
processedIDs.addDBIDs(cluster.getIDs());
@@ -349,23 +350,23 @@ public class CASH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
else {
LinearEquationSystem les = runDerivator(relation, dim - 1, interval.getIDs());
Cluster<Model> c = new Cluster<Model>(interval.getIDs(), new LinearEquationModel(les));
- res.addCluster(c);
+ res.addToplevelCluster(c);
noiseIDs.removeDBIDs(interval.getIDs());
clusterIDs.addDBIDs(interval.getIDs());
processedIDs.addDBIDs(interval.getIDs());
}
// Rebuild heap
- ArrayList<IntegerPriorityObject<CASHInterval>> heapVector = new ArrayList<IntegerPriorityObject<CASHInterval>>(heap.size());
- for (IntegerPriorityObject<CASHInterval> obj : heap) {
- heapVector.add(obj);
+ ArrayList<IntegerPriorityObject<CASHInterval>> heapVector = new ArrayList<>(heap.size());
+ for (ObjectHeap.UnsortedIter<IntegerPriorityObject<CASHInterval>> iter = heap.unsortedIter(); iter.valid(); iter.advance()) {
+ heapVector.add(iter.get());
}
heap.clear();
for (IntegerPriorityObject<CASHInterval> pair : heapVector) {
CASHInterval currentInterval = pair.getObject();
currentInterval.removeIDs(clusterIDs);
if (currentInterval.getIDs().size() >= minPts) {
- heap.add(new IntegerPriorityObject<CASHInterval>(currentInterval.priority(), currentInterval));
+ heap.add(new IntegerPriorityObject<>(currentInterval.priority(), currentInterval));
}
}
@@ -378,12 +379,12 @@ public class CASH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
if (!noiseIDs.isEmpty()) {
if (dim == noiseDim) {
Cluster<Model> c = new Cluster<Model>(noiseIDs, true, ClusterModel.CLUSTER);
- res.addCluster(c);
+ res.addToplevelCluster(c);
processedIDs.addDBIDs(noiseIDs);
} else if (noiseIDs.size() >= minPts) {
LinearEquationSystem les = runDerivator(fulldatabase, dim - 1, noiseIDs);
Cluster<Model> c = new Cluster<Model>(noiseIDs, true, new LinearEquationModel(les));
- res.addCluster(c);
+ res.addToplevelCluster(c);
processedIDs.addDBIDs(noiseIDs);
}
}
@@ -427,7 +428,7 @@ public class CASH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
* @param dim the dimensionality of the database
* @param ids the ids of the database
*/
- private void initHeap(Heap<IntegerPriorityObject<CASHInterval>> heap, Relation<ParameterizationFunction> relation, int dim, DBIDs ids) {
+ private void initHeap(ObjectHeap<IntegerPriorityObject<CASHInterval>> heap, Relation<ParameterizationFunction> relation, int dim, DBIDs ids) {
CASHIntervalSplit split = new CASHIntervalSplit(relation, minPts);
// determine minimum and maximum function value of all functions
@@ -479,7 +480,7 @@ public class CASH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
ModifiableDBIDs intervalIDs = split.determineIDs(ids, alphaInterval, d_mins[i], d_maxs[i]);
if (intervalIDs != null && intervalIDs.size() >= minPts) {
CASHInterval rootInterval = new CASHInterval(alphaMin, alphaMax, split, intervalIDs, -1, 0, d_mins[i], d_maxs[i]);
- heap.add(new IntegerPriorityObject<CASHInterval>(rootInterval.priority(), rootInterval));
+ heap.add(new IntegerPriorityObject<>(rootInterval.priority(), rootInterval));
}
}
@@ -503,8 +504,8 @@ public class CASH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
*/
private MaterializedRelation<ParameterizationFunction> buildDB(int dim, Matrix basis, DBIDs ids, Relation<ParameterizationFunction> relation) {
ProxyDatabase proxy = new ProxyDatabase(ids);
- SimpleTypeInformation<ParameterizationFunction> type = new SimpleTypeInformation<ParameterizationFunction>(ParameterizationFunction.class);
- MaterializedRelation<ParameterizationFunction> prep = new MaterializedRelation<ParameterizationFunction>(proxy, type, ids);
+ SimpleTypeInformation<ParameterizationFunction> type = new SimpleTypeInformation<>(ParameterizationFunction.class);
+ MaterializedRelation<ParameterizationFunction> prep = new MaterializedRelation<>(proxy, type, ids);
proxy.addRelation(prep);
// Project
@@ -566,7 +567,7 @@ public class CASH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
private double sinusProduct(int start, int end, double[] alpha) {
double result = 1;
for (int j = start; j < end; j++) {
- result *= StrictMath.sin(alpha[j]);
+ result *= Math.sin(alpha[j]);
}
return result;
}
@@ -578,7 +579,7 @@ public class CASH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
* @param heap the heap storing the intervals
* @return the next ''best'' interval at maximum level
*/
- private CASHInterval determineNextIntervalAtMaxLevel(Heap<IntegerPriorityObject<CASHInterval>> heap) {
+ private CASHInterval determineNextIntervalAtMaxLevel(ObjectHeap<IntegerPriorityObject<CASHInterval>> heap) {
CASHInterval next = doDetermineNextIntervalAtMaxLevel(heap);
// noise path was chosen
while (next == null) {
@@ -598,7 +599,7 @@ public class CASH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
* @param heap the heap storing the intervals
* @return the next ''best'' interval at maximum level
*/
- private CASHInterval doDetermineNextIntervalAtMaxLevel(Heap<IntegerPriorityObject<CASHInterval>> heap) {
+ private CASHInterval doDetermineNextIntervalAtMaxLevel(ObjectHeap<IntegerPriorityObject<CASHInterval>> heap) {
CASHInterval interval = heap.poll().getObject();
int dim = interval.getDimensionality();
while (true) {
@@ -632,10 +633,10 @@ public class CASH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
int comp = interval.getLeftChild().compareTo(interval.getRightChild());
if (comp < 0) {
bestInterval = interval.getRightChild();
- heap.add(new IntegerPriorityObject<CASHInterval>(interval.getLeftChild().priority(), interval.getLeftChild()));
+ heap.add(new IntegerPriorityObject<>(interval.getLeftChild().priority(), interval.getLeftChild()));
} else {
bestInterval = interval.getLeftChild();
- heap.add(new IntegerPriorityObject<CASHInterval>(interval.getRightChild().priority(), interval.getRightChild()));
+ heap.add(new IntegerPriorityObject<>(interval.getRightChild().priority(), interval.getRightChild()));
}
} else if (interval.getLeftChild() == null) {
bestInterval = interval.getRightChild();
@@ -733,8 +734,8 @@ public class CASH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
DBIDs ids = interval.getIDs();
ProxyDatabase proxy = new ProxyDatabase(ids);
int dim = dimensionality(relation);
- SimpleTypeInformation<DoubleVector> type = new VectorFieldTypeInformation<DoubleVector>(DoubleVector.FACTORY, dim);
- MaterializedRelation<DoubleVector> prep = new MaterializedRelation<DoubleVector>(proxy, type, ids);
+ SimpleTypeInformation<DoubleVector> type = new VectorFieldTypeInformation<>(DoubleVector.FACTORY, dim);
+ MaterializedRelation<DoubleVector> prep = new MaterializedRelation<>(proxy, type, ids);
proxy.addRelation(prep);
// Project
@@ -792,8 +793,8 @@ public class CASH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
private Database buildDerivatorDB(Relation<ParameterizationFunction> relation, DBIDs ids) {
ProxyDatabase proxy = new ProxyDatabase(ids);
int dim = dimensionality(relation);
- SimpleTypeInformation<DoubleVector> type = new VectorFieldTypeInformation<DoubleVector>(DoubleVector.FACTORY, dim);
- MaterializedRelation<DoubleVector> prep = new MaterializedRelation<DoubleVector>(proxy, type, ids);
+ SimpleTypeInformation<DoubleVector> type = new VectorFieldTypeInformation<>(DoubleVector.FACTORY, dim);
+ MaterializedRelation<DoubleVector> prep = new MaterializedRelation<>(proxy, type, ids);
proxy.addRelation(prep);
// Project
@@ -864,7 +865,7 @@ public class CASH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
@Override
protected CASH<NumberVector<?>> makeInstance() {
- return new CASH<NumberVector<?>>(minpts, maxlevel, mindim, jitter, adjust);
+ return new CASH<>(minpts, maxlevel, mindim, jitter, adjust);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/COPAC.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/COPAC.java
index ac50559e..9a4b8512 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/COPAC.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/COPAC.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.correlation;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -185,7 +185,7 @@ public class COPAC<V extends NumberVector<?>, D extends Distance<D>> extends Abs
LocalProjectionIndex<V, ?> preprocin = partitionDistanceQuery.getIndex();
// partitioning
- Map<Integer, ModifiableDBIDs> partitionMap = new HashMap<Integer, ModifiableDBIDs>();
+ Map<Integer, ModifiableDBIDs> partitionMap = new HashMap<>();
FiniteProgress partitionProgress = LOG.isVerbose() ? new FiniteProgress("Partitioning", relation.size(), LOG) : null;
int processed = 1;
@@ -214,7 +214,7 @@ public class COPAC<V extends NumberVector<?>, D extends Distance<D>> extends Abs
// convert for partition algorithm.
// TODO: do this with DynamicDBIDs instead
- Map<Integer, DBIDs> pmap = new HashMap<Integer, DBIDs>();
+ Map<Integer, DBIDs> pmap = new HashMap<>();
for(Entry<Integer, ModifiableDBIDs> ent : partitionMap.entrySet()) {
pmap.put(ent.getKey(), ent.getValue());
}
@@ -230,14 +230,14 @@ public class COPAC<V extends NumberVector<?>, D extends Distance<D>> extends Abs
* @param query The preprocessor based query function
*/
private Clustering<Model> runPartitionAlgorithm(Relation<V> relation, Map<Integer, DBIDs> partitionMap, DistanceQuery<V, D> query) {
- Clustering<Model> result = new Clustering<Model>("COPAC clustering", "copac-clustering");
+ Clustering<Model> result = new Clustering<>("COPAC clustering", "copac-clustering");
// TODO: use an extra finite progress for the partitions?
for(Entry<Integer, DBIDs> pair : partitionMap.entrySet()) {
// noise partition
if(pair.getKey() == RelationUtil.dimensionality(relation)) {
// Make a Noise cluster
- result.addCluster(new Cluster<Model>(pair.getValue(), true, ClusterModel.CLUSTER));
+ result.addToplevelCluster(new Cluster<Model>(pair.getValue(), true, ClusterModel.CLUSTER));
}
else {
DBIDs partids = pair.getValue();
@@ -251,10 +251,10 @@ public class COPAC<V extends NumberVector<?>, D extends Distance<D>> extends Abs
// Re-Wrap resulting Clusters as DimensionModel clusters.
for(Cluster<Model> clus : p.getAllClusters()) {
if(clus.isNoise()) {
- result.addCluster(new Cluster<Model>(clus.getIDs(), true, ClusterModel.CLUSTER));
+ result.addToplevelCluster(new Cluster<Model>(clus.getIDs(), true, ClusterModel.CLUSTER));
}
else {
- result.addCluster(new Cluster<Model>(clus.getIDs(), new DimensionModel(pair.getKey())));
+ result.addToplevelCluster(new Cluster<Model>(clus.getIDs(), new DimensionModel(pair.getKey())));
}
}
}
@@ -316,12 +316,12 @@ public class COPAC<V extends NumberVector<?>, D extends Distance<D>> extends Abs
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- ClassParameter<Factory<V, ?>> indexP = new ClassParameter<LocalProjectionIndex.Factory<V, ?>>(PREPROCESSOR_ID, LocalProjectionIndex.Factory.class);
+ ClassParameter<Factory<V, ?>> indexP = new ClassParameter<>(PREPROCESSOR_ID, LocalProjectionIndex.Factory.class);
if(config.grab(indexP)) {
indexI = indexP.instantiateClass(config);
}
- ObjectParameter<FilteredLocalPCABasedDistanceFunction<V, ?, D>> pdistP = new ObjectParameter<FilteredLocalPCABasedDistanceFunction<V, ?, D>>(PARTITION_DISTANCE_ID, FilteredLocalPCABasedDistanceFunction.class, LocallyWeightedDistanceFunction.class);
+ ObjectParameter<FilteredLocalPCABasedDistanceFunction<V, ?, D>> pdistP = new ObjectParameter<>(PARTITION_DISTANCE_ID, FilteredLocalPCABasedDistanceFunction.class, LocallyWeightedDistanceFunction.class);
if(config.grab(pdistP)) {
ListParameterization predefinedDist = new ListParameterization();
predefinedDist.addParameter(IndexBasedDistanceFunction.INDEX_ID, indexI);
@@ -332,7 +332,7 @@ public class COPAC<V extends NumberVector<?>, D extends Distance<D>> extends Abs
}
// Parameterize algorithm:
- ClassParameter<ClusteringAlgorithm<Clustering<Model>>> algP = new ClassParameter<ClusteringAlgorithm<Clustering<Model>>>(PARTITION_ALGORITHM_ID, ClusteringAlgorithm.class);
+ ClassParameter<ClusteringAlgorithm<Clustering<Model>>> algP = new ClassParameter<>(PARTITION_ALGORITHM_ID, ClusteringAlgorithm.class);
if(config.grab(algP)) {
ListParameterization predefined = new ListParameterization();
predefined.addParameter(AbstractDistanceBasedAlgorithm.DISTANCE_FUNCTION_ID, pdistI);
@@ -348,7 +348,7 @@ public class COPAC<V extends NumberVector<?>, D extends Distance<D>> extends Abs
@Override
protected COPAC<V, D> makeInstance() {
- return new COPAC<V, D>(pdistI, algC, algO);
+ return new COPAC<>(pdistI, algC, algO);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ERiC.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ERiC.java
index 7e7314b4..d535e136 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ERiC.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ERiC.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.correlation;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -25,8 +25,6 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.correlation;
import java.util.ArrayList;
import java.util.List;
-import java.util.SortedMap;
-import java.util.TreeMap;
import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm;
@@ -58,6 +56,8 @@ import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.FirstNEigenPairFilter;
import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAFilteredResult;
import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAFilteredRunner;
import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy.Hierarchy;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy.Hierarchy.Iter;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
@@ -125,7 +125,7 @@ public class ERiC<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
StepProgress stepprog = LOG.isVerbose() ? new StepProgress(3) : null;
// run COPAC
- if(stepprog != null) {
+ if (stepprog != null) {
stepprog.beginStep(1, "Preprocessing local correlation dimensionalities and partitioning data", LOG);
}
Clustering<Model> copacResult = copacAlgorithm.run(relation);
@@ -133,16 +133,16 @@ public class ERiC<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
DistanceQuery<V, IntegerDistance> query = copacAlgorithm.getPartitionDistanceQuery();
// extract correlation clusters
- if(stepprog != null) {
+ if (stepprog != null) {
stepprog.beginStep(2, "Extract correlation clusters", LOG);
}
- SortedMap<Integer, List<Cluster<CorrelationModel<V>>>> clusterMap = extractCorrelationClusters(copacResult, relation, dimensionality);
- if(LOG.isDebugging()) {
+ List<List<Cluster<CorrelationModel<V>>>> clusterMap = extractCorrelationClusters(copacResult, relation, dimensionality);
+ if (LOG.isDebugging()) {
StringBuilder msg = new StringBuilder("Step 2: Extract correlation clusters...");
- for(Integer corrDim : clusterMap.keySet()) {
+ for (int corrDim = 0; corrDim < clusterMap.size(); corrDim++) {
List<Cluster<CorrelationModel<V>>> correlationClusters = clusterMap.get(corrDim);
msg.append("\n\ncorrDim ").append(corrDim);
- for(Cluster<CorrelationModel<V>> cluster : correlationClusters) {
+ for (Cluster<CorrelationModel<V>> cluster : correlationClusters) {
msg.append("\n cluster ").append(cluster).append(", ids: ").append(cluster.getIDs().size());
// .append(", level: ").append(cluster.getLevel()).append(", index: ").append(cluster.getLevelIndex());
// msg.append("\n basis " +
@@ -152,45 +152,45 @@ public class ERiC<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
}
LOG.debugFine(msg.toString());
}
- if(LOG.isVerbose()) {
+ if (LOG.isVerbose()) {
int clusters = 0;
- for(List<Cluster<CorrelationModel<V>>> correlationClusters : clusterMap.values()) {
+ for (List<Cluster<CorrelationModel<V>>> correlationClusters : clusterMap) {
clusters += correlationClusters.size();
}
LOG.verbose(clusters + " clusters extracted.");
}
// build hierarchy
- if(stepprog != null) {
+ if (stepprog != null) {
stepprog.beginStep(3, "Building hierarchy", LOG);
}
- buildHierarchy(clusterMap, query);
- if(LOG.isDebugging()) {
+ Clustering<CorrelationModel<V>> clustering = new Clustering<>("ERiC clustering", "eric-clustering");
+ buildHierarchy(clustering, clusterMap, query);
+ if (LOG.isDebugging()) {
StringBuilder msg = new StringBuilder("Step 3: Build hierarchy");
- for(Integer corrDim : clusterMap.keySet()) {
+ for (int corrDim = 0; corrDim < clusterMap.size(); corrDim++) {
List<Cluster<CorrelationModel<V>>> correlationClusters = clusterMap.get(corrDim);
- for(Cluster<CorrelationModel<V>> cluster : correlationClusters) {
+ for (Cluster<CorrelationModel<V>> cluster : correlationClusters) {
msg.append("\n cluster ").append(cluster).append(", ids: ").append(cluster.getIDs().size());
// .append(", level: ").append(cluster.getLevel()).append(", index: ").append(cluster.getLevelIndex());
- for(int i = 0; i < cluster.getParents().size(); i++) {
- msg.append("\n parent ").append(cluster.getParents().get(i));
+ for (Iter<Cluster<CorrelationModel<V>>> iter = clustering.getClusterHierarchy().iterParents(cluster); iter.valid(); iter.advance()) {
+ msg.append("\n parent ").append(iter.get());
}
- for(int i = 0; i < cluster.numChildren(); i++) {
- msg.append("\n child ").append(cluster.getChildren().get(i));
+ for (Iter<Cluster<CorrelationModel<V>>> iter = clustering.getClusterHierarchy().iterChildren(cluster); iter.valid(); iter.advance()) {
+ msg.append("\n child ").append(iter.get());
}
}
}
LOG.debugFine(msg.toString());
}
- if(stepprog != null) {
+ if (stepprog != null) {
stepprog.setCompleted(LOG);
}
- Clustering<CorrelationModel<V>> result = new Clustering<CorrelationModel<V>>("ERiC clustering", "eric-clustering");
- for(Cluster<CorrelationModel<V>> rc : clusterMap.get(clusterMap.lastKey())) {
- result.addCluster(rc);
+ for (Cluster<CorrelationModel<V>> rc : clusterMap.get(clusterMap.size() - 1)) {
+ clustering.addToplevelCluster(rc);
}
- return result;
+ return clustering;
}
/**
@@ -203,77 +203,75 @@ public class ERiC<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
*
* @param database the database containing the objects
* @param dimensionality the dimensionality of the feature space
- * @return a mapping of correlation dimension to maps of clusters
+ * @return a list of clusters for each dimensionality
*/
- private SortedMap<Integer, List<Cluster<CorrelationModel<V>>>> extractCorrelationClusters(Clustering<Model> copacResult, Relation<V> database, int dimensionality) {
+ private List<List<Cluster<CorrelationModel<V>>>> extractCorrelationClusters(Clustering<Model> copacResult, Relation<V> database, int dimensionality) {
// result
- SortedMap<Integer, List<Cluster<CorrelationModel<V>>>> clusterMap = new TreeMap<Integer, List<Cluster<CorrelationModel<V>>>>();
+ List<List<Cluster<CorrelationModel<V>>>> clusterMap = new ArrayList<>();
+ for (int i = 0; i <= dimensionality; i++) {
+ clusterMap.add(new ArrayList<Cluster<CorrelationModel<V>>>());
+ }
// noise cluster containing all noise objects over all partitions
Cluster<Model> noise = null;
// iterate over correlation dimensions
- for(Cluster<Model> clus : copacResult.getAllClusters()) {
+ for (Cluster<Model> clus : copacResult.getAllClusters()) {
DBIDs group = clus.getIDs();
- if(clus.getModel() != null && clus.getModel() instanceof DimensionModel) {
+ if (clus.getModel() != null && clus.getModel() instanceof DimensionModel) {
int correlationDimension = ((DimensionModel) clus.getModel()).getDimension();
ListParameterization parameters = pcaParameters(correlationDimension);
Class<PCAFilteredRunner<V>> cls = ClassGenericsUtil.uglyCastIntoSubclass(PCAFilteredRunner.class);
PCAFilteredRunner<V> pca = parameters.tryInstantiate(cls);
- for(ParameterException e : parameters.getErrors()) {
- LOG.warning("Error in internal parameterization: " + e.getMessage());
- }
+ parameters.failOnErrors();
// get cluster list for this dimension.
List<Cluster<CorrelationModel<V>>> correlationClusters = clusterMap.get(correlationDimension);
- if(correlationClusters == null) {
- correlationClusters = new ArrayList<Cluster<CorrelationModel<V>>>();
- clusterMap.put(correlationDimension, correlationClusters);
- }
-
PCAFilteredResult pcares = pca.processIds(group, database);
V centroid = Centroid.make(database, group).toVector(database);
- Cluster<CorrelationModel<V>> correlationCluster = new Cluster<CorrelationModel<V>>("[" + correlationDimension + "_" + correlationClusters.size() + "]", group, new CorrelationModel<V>(pcares, centroid), new ArrayList<Cluster<CorrelationModel<V>>>(), new ArrayList<Cluster<CorrelationModel<V>>>());
+ Cluster<CorrelationModel<V>> correlationCluster = new Cluster<>("[" + correlationDimension + "_" + correlationClusters.size() + "]", group, new CorrelationModel<>(pcares, centroid));
correlationClusters.add(correlationCluster);
}
// partition containing noise
- else if(clus.getModel() != null && clus.isNoise()) {
- if(noise == null) {
+ else if (clus.getModel() != null && clus.isNoise()) {
+ if (noise == null) {
noise = clus;
- }
- else {
+ } else {
ModifiableDBIDs merged = DBIDUtil.newHashSet(noise.getIDs());
merged.addDBIDs(clus.getIDs());
noise.setIDs(merged);
}
- }
- else {
+ } else {
throw new IllegalStateException("Unexpected group returned: " + clus.getClass().getName());
}
}
- if(noise != null && noise.size() > 0) {
+ if (noise != null && noise.size() > 0) {
// get cluster list for this dimension.
List<Cluster<CorrelationModel<V>>> correlationClusters = clusterMap.get(dimensionality);
- if(correlationClusters == null) {
- correlationClusters = new ArrayList<Cluster<CorrelationModel<V>>>();
- clusterMap.put(dimensionality, correlationClusters);
- }
ListParameterization parameters = pcaParameters(dimensionality);
Class<PCAFilteredRunner<V>> cls = ClassGenericsUtil.uglyCastIntoSubclass(PCAFilteredRunner.class);
PCAFilteredRunner<V> pca = parameters.tryInstantiate(cls);
- for(ParameterException e : parameters.getErrors()) {
+ for (ParameterException e : parameters.getErrors()) {
LOG.warning("Error in internal parameterization: " + e.getMessage());
}
PCAFilteredResult pcares = pca.processIds(noise.getIDs(), database);
V centroid = Centroid.make(database, noise.getIDs()).toVector(database);
- Cluster<CorrelationModel<V>> correlationCluster = new Cluster<CorrelationModel<V>>("[noise]", noise.getIDs(), new CorrelationModel<V>(pcares, centroid), new ArrayList<Cluster<CorrelationModel<V>>>(), new ArrayList<Cluster<CorrelationModel<V>>>());
+ Cluster<CorrelationModel<V>> correlationCluster = new Cluster<>("[noise]", noise.getIDs(), new CorrelationModel<>(pcares, centroid));
correlationClusters.add(correlationCluster);
}
+ // Delete dimensionalities not found.
+ for (int i = dimensionality; i > 0; i--) {
+ if (clusterMap.get(i).size() > 0) {
+ break;
+ }
+ clusterMap.remove(i);
+ }
+
return clusterMap;
}
@@ -292,48 +290,48 @@ public class ERiC<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
return parameters;
}
- private void buildHierarchy(SortedMap<Integer, List<Cluster<CorrelationModel<V>>>> clusterMap, DistanceQuery<V, IntegerDistance> query) {
- StringBuilder msg = new StringBuilder();
+ private void buildHierarchy(Clustering<CorrelationModel<V>> clustering, List<List<Cluster<CorrelationModel<V>>>> clusterMap, DistanceQuery<V, IntegerDistance> query) {
+ StringBuilder msg = LOG.isDebuggingFine() ? new StringBuilder() : null;
+ Hierarchy<Cluster<CorrelationModel<V>>> hier = clustering.getClusterHierarchy();
DBSCAN<V, DoubleDistance> dbscan = ClassGenericsUtil.castWithGenericsOrNull(DBSCAN.class, copacAlgorithm.getPartitionAlgorithm(query));
- if(dbscan == null) {
+ if (dbscan == null) {
// TODO: appropriate exception class?
throw new IllegalArgumentException("ERiC was run without DBSCAN as COPAC algorithm!");
}
DistanceFunction<? super V, ?> dfun = ProxyDistanceFunction.unwrapDistance(dbscan.getDistanceFunction());
ERiCDistanceFunction distanceFunction = ClassGenericsUtil.castWithGenericsOrNull(ERiCDistanceFunction.class, dfun);
- if(distanceFunction == null) {
+ if (distanceFunction == null) {
// TODO: appropriate exception class?
throw new IllegalArgumentException("ERiC was run without ERiCDistanceFunction as distance function: got " + dfun.getClass());
}
- Integer lambda_max = clusterMap.lastKey();
+ // Find maximum dimensionality found:
+ int lambda_max = clusterMap.size() - 1;
- for(Integer childCorrDim : clusterMap.keySet()) {
+ for (int childCorrDim = 0; childCorrDim < lambda_max; childCorrDim++) {
List<Cluster<CorrelationModel<V>>> children = clusterMap.get(childCorrDim);
- SortedMap<Integer, List<Cluster<CorrelationModel<V>>>> parentMap = clusterMap.tailMap(childCorrDim + 1);
- if(LOG.isDebugging()) {
+ // SortedMap<Integer, List<Cluster<CorrelationModel<V>>>> parentMap =
+ // clusterMap.tailMap(childCorrDim + 1);
+ if (msg != null) {
msg.append("\ncorrdim ").append(childCorrDim);
- msg.append("\nparents ").append(parentMap.keySet());
+ // msg.append("\nparents ").append(parentMap.keySet());
}
- for(Cluster<CorrelationModel<V>> child : children) {
- for(Integer parentCorrDim : parentMap.keySet()) {
- List<Cluster<CorrelationModel<V>>> parents = parentMap.get(parentCorrDim);
- for(Cluster<CorrelationModel<V>> parent : parents) {
+ for (Cluster<CorrelationModel<V>> child : children) {
+ for (int parentCorrDim = childCorrDim + 1; parentCorrDim <= lambda_max; parentCorrDim++) {
+ List<Cluster<CorrelationModel<V>>> parents = clusterMap.get(parentCorrDim);
+ for (Cluster<CorrelationModel<V>> parent : parents) {
int subspaceDim_parent = parent.getModel().getPCAResult().getCorrelationDimension();
- if(subspaceDim_parent == lambda_max && child.getParents().isEmpty()) {
- parent.getChildren().add(child);
- child.getParents().add(parent);
- if(LOG.isDebugging()) {
+ if (subspaceDim_parent == lambda_max && hier.numParents(child) == 0) {
+ clustering.addChildCluster(parent, child);
+ if (msg != null) {
msg.append('\n').append(parent).append(" is parent of ").append(child);
}
- }
- else {
+ } else {
BitDistance dist = distanceFunction.distance(parent.getModel().getCentroid(), child.getModel().getCentroid(), parent.getModel().getPCAResult(), child.getModel().getPCAResult());
- if(!dist.bitValue() && (child.getParents().isEmpty() || !isParent(distanceFunction, parent, child.getParents()))) {
- parent.getChildren().add(child);
- child.getParents().add(parent);
- if(LOG.isDebugging()) {
+ if (!dist.bitValue() && (hier.numParents(child) == 0 || !isParent(distanceFunction, parent, hier.iterParents(child)))) {
+ clustering.addChildCluster(parent, child);
+ if (msg != null) {
msg.append('\n').append(parent).append(" is parent of ").append(child);
}
}
@@ -342,7 +340,7 @@ public class ERiC<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
}
}
}
- if(LOG.isDebugging()) {
+ if (msg != null) {
LOG.debugFine(msg.toString());
}
@@ -355,32 +353,32 @@ public class ERiC<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
* @param distanceFunction the distance function for distance computation
* between the clusters
* @param parent the parent to be tested
- * @param children the list of children to be tested
+ * @param iter the list of children to be tested
* @return true, if the specified parent cluster is a parent of one child of
* the children clusters, false otherwise
*/
- private boolean isParent(ERiCDistanceFunction distanceFunction, Cluster<CorrelationModel<V>> parent, List<Cluster<CorrelationModel<V>>> children) {
-
- StringBuilder msg = new StringBuilder();
+ private boolean isParent(ERiCDistanceFunction distanceFunction, Cluster<CorrelationModel<V>> parent, Iter<Cluster<CorrelationModel<V>>> iter) {
+ StringBuilder msg = LOG.isDebugging() ? new StringBuilder() : null;
- for(Cluster<CorrelationModel<V>> child : children) {
- if(parent.getModel().getPCAResult().getCorrelationDimension() == child.getModel().getPCAResult().getCorrelationDimension()) {
+ for (; iter.valid(); iter.advance()) {
+ Cluster<CorrelationModel<V>> child = iter.get();
+ if (parent.getModel().getPCAResult().getCorrelationDimension() == child.getModel().getPCAResult().getCorrelationDimension()) {
return false;
}
BitDistance dist = distanceFunction.distance(parent.getModel().getCentroid(), child.getModel().getCentroid(), parent.getModel().getPCAResult(), child.getModel().getPCAResult());
- if(LOG.isDebugging()) {
+ if (msg != null) {
msg.append("\ndist(").append(child).append(" - ").append(parent).append(") = ").append(dist);
}
- if(!dist.bitValue()) {
- if(LOG.isDebugging()) {
- LOG.debugFine(msg.toString());
+ if (!dist.bitValue()) {
+ if (msg != null) {
+ LOG.debugFine(msg);
}
return true;
}
}
- if(LOG.isDebugging()) {
+ if (msg != null) {
LOG.debugFine(msg.toString());
}
return false;
@@ -395,7 +393,7 @@ public class ERiC<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
protected Logging getLogger() {
return LOG;
}
-
+
/**
* Parameterization class.
*
@@ -418,7 +416,7 @@ public class ERiC<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
@Override
protected ERiC<V> makeInstance() {
- return new ERiC<V>(copac);
+ return new ERiC<>(copac);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/FourC.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/FourC.java
index f56342e0..5235273c 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/FourC.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/FourC.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.correlation;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -115,7 +115,7 @@ public class FourC<V extends NumberVector<?>> extends AbstractProjectedDBSCAN<Cl
@Override
protected FourC<O> makeInstance() {
- return new FourC<O>(epsilon, minpts, outerdist, lambda);
+ return new FourC<>(epsilon, minpts, outerdist, lambda);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/HiCO.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/HiCO.java
index 759e8f59..d1b714bf 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/HiCO.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/HiCO.java
@@ -64,7 +64,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
*/
@Title("Mining Hierarchies of Correlation Clusters")
@Description("Algorithm for detecting hierarchies of correlation clusters.")
-@Reference(authors = "E. Achtert, C. Böhm, P. Kröger, A. Zimek", title = "Mining Hierarchies of Correlation Clusterse", booktitle = "Proc. Int. Conf. on Scientific and Statistical Database Management (SSDBM'06), Vienna, Austria, 2006", url = "http://dx.doi.org/10.1109/SSDBM.2006.35")
+@Reference(authors = "E. Achtert, C. Böhm, P. Kröger, A. Zimek", title = "Mining Hierarchies of Correlation Clusters", booktitle = "Proc. Int. Conf. on Scientific and Statistical Database Management (SSDBM'06), Vienna, Austria, 2006", url = "http://dx.doi.org/10.1109/SSDBM.2006.35")
public class HiCO<V extends NumberVector<?>> extends OPTICS<V, PCACorrelationDistance> {
/**
* The logger for this class.
@@ -207,7 +207,7 @@ public class HiCO<V extends NumberVector<?>> extends OPTICS<V, PCACorrelationDis
@Override
protected HiCO<V> makeInstance() {
- return new HiCO<V>(distance, mu);
+ return new HiCO<>(distance, mu);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/LMCLUS.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/LMCLUS.java
index fdea8b35..f9531be0 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/LMCLUS.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/LMCLUS.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.correlation;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -159,7 +159,7 @@ public class LMCLUS extends AbstractAlgorithm<Clustering<Model>> {
* @return Clustering result
*/
public Clustering<Model> run(Database database, Relation<NumberVector<?>> relation) {
- Clustering<Model> ret = new Clustering<Model>("LMCLUS Clustering", "lmclus-clustering");
+ Clustering<Model> ret = new Clustering<>("LMCLUS Clustering", "lmclus-clustering");
FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Clustered objects", relation.size(), LOG) : null;
IndefiniteProgress cprogress = LOG.isVerbose() ? new IndefiniteProgress("Clusters found", LOG) : null;
ModifiableDBIDs unclustered = DBIDUtil.newHashSet(relation.getDBIDs());
@@ -204,10 +204,10 @@ public class LMCLUS extends AbstractAlgorithm<Clustering<Model>> {
}
// New cluster found
// TODO: annotate cluster with dimensionality
- final Cluster<Model> cluster = new Cluster<Model>(current);
+ final Cluster<Model> cluster = new Cluster<>(current);
cluster.setName("Cluster_" + lmDim + "d_" + cnum);
cnum++;
- ret.addCluster(cluster);
+ ret.addToplevelCluster(cluster);
// Remove from main working set.
unclustered.removeDBIDs(current);
if (progress != null) {
@@ -219,7 +219,7 @@ public class LMCLUS extends AbstractAlgorithm<Clustering<Model>> {
}
// Remaining objects are noise
if (unclustered.size() > 0) {
- ret.addCluster(new Cluster<Model>(unclustered, true));
+ ret.addToplevelCluster(new Cluster<>(unclustered, true));
}
if (progress != null) {
progress.setProcessed(relation.size(), LOG);
@@ -281,7 +281,7 @@ public class LMCLUS extends AbstractAlgorithm<Clustering<Model>> {
// Build orthogonal basis from remainder
Matrix basis;
{
- List<Vector> vectors = new ArrayList<Vector>(sample.size() - 1);
+ List<Vector> vectors = new ArrayList<>(sample.size() - 1);
for (; iter.valid(); iter.advance()) {
Vector vec = relation.get(iter).getColumnVector();
vectors.add(vec.minusEquals(originV));
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ORCLUS.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ORCLUS.java
index f567098b..a9c67a58 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ORCLUS.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ORCLUS.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.correlation;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -42,10 +42,10 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.generic.GenericDistanceDBIDList;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.GenericDistanceDBIDList;
import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress;
@@ -178,9 +178,9 @@ public class ORCLUS<V extends NumberVector<?>> extends AbstractProjectedClusteri
}
// get the result
- Clustering<Model> r = new Clustering<Model>("ORCLUS clustering", "orclus-clustering");
+ Clustering<Model> r = new Clustering<>("ORCLUS clustering", "orclus-clustering");
for (ORCLUSCluster c : clusters) {
- r.addCluster(new Cluster<Model>(c.objectIDs, ClusterModel.CLUSTER));
+ r.addToplevelCluster(new Cluster<Model>(c.objectIDs, ClusterModel.CLUSTER));
}
return r;
} catch (Exception e) {
@@ -198,7 +198,7 @@ public class ORCLUS<V extends NumberVector<?>> extends AbstractProjectedClusteri
private List<ORCLUSCluster> initialSeeds(Relation<V> database, int k) {
DBIDs randomSample = DBIDUtil.randomSample(database.getDBIDs(), k, rnd);
NumberVector.Factory<V, ?> factory = RelationUtil.getNumberVectorFactory(database);
- List<ORCLUSCluster> seeds = new ArrayList<ORCLUSCluster>();
+ List<ORCLUSCluster> seeds = new ArrayList<>();
for (DBIDIter iter = randomSample.iter(); iter.valid(); iter.advance()) {
seeds.add(new ORCLUSCluster(database.get(iter), iter, factory));
}
@@ -222,7 +222,7 @@ public class ORCLUS<V extends NumberVector<?>> extends AbstractProjectedClusteri
}
// projected centroids of the clusters
- List<V> projectedCentroids = new ArrayList<V>(clusters.size());
+ List<V> projectedCentroids = new ArrayList<>(clusters.size());
for (ORCLUSCluster c : clusters) {
projectedCentroids.add(projection(c, c.centroid, factory));
}
@@ -270,7 +270,7 @@ public class ORCLUS<V extends NumberVector<?>> extends AbstractProjectedClusteri
private Matrix findBasis(Relation<V> database, DistanceQuery<V, DoubleDistance> distFunc, ORCLUSCluster cluster, int dim) {
// covariance matrix of cluster
// Matrix covariance = Util.covarianceMatrix(database, cluster.objectIDs);
- GenericDistanceDBIDList<DoubleDistance> results = new GenericDistanceDBIDList<DoubleDistance>(cluster.objectIDs.size());
+ GenericDistanceDBIDList<DoubleDistance> results = new GenericDistanceDBIDList<>(cluster.objectIDs.size());
for (DBIDIter it = cluster.objectIDs.iter(); it.valid(); it.advance()) {
DoubleDistance distance = distFunc.distance(cluster.centroid, database.get(it));
results.add(distance, it);
@@ -303,7 +303,7 @@ public class ORCLUS<V extends NumberVector<?>> extends AbstractProjectedClusteri
* @param d_new the new dimensionality of the subspaces for each seed
*/
private void merge(Relation<V> database, DistanceQuery<V, DoubleDistance> distFunc, List<ORCLUSCluster> clusters, int k_new, int d_new, IndefiniteProgress cprogress) {
- ArrayList<ProjectedEnergy> projectedEnergies = new ArrayList<ProjectedEnergy>();
+ ArrayList<ProjectedEnergy> projectedEnergies = new ArrayList<>();
for (int i = 0; i < clusters.size(); i++) {
for (int j = 0; j < clusters.size(); j++) {
if (i >= j) {
@@ -387,16 +387,16 @@ public class ORCLUS<V extends NumberVector<?>> extends AbstractProjectedClusteri
ORCLUSCluster c_ij = union(database, distFunc, c_i, c_j, dim);
NumberVector.Factory<V, ?> factory = RelationUtil.getNumberVectorFactory(database);
- DoubleDistance sum = getDistanceFunction().getDistanceFactory().nullDistance();
+ double sum = 0.;
V c_proj = projection(c_ij, c_ij.centroid, factory);
for (DBIDIter iter = c_ij.objectIDs.iter(); iter.valid(); iter.advance()) {
V o_proj = projection(c_ij, database.get(iter), factory);
- DoubleDistance dist = distFunc.distance(o_proj, c_proj);
- sum = sum.plus(dist.times(dist));
+ double dist = distFunc.distance(o_proj, c_proj).doubleValue();
+ sum += dist * dist;
}
- DoubleDistance projectedEnergy = sum.times(1.0 / c_ij.objectIDs.size());
+ sum /= c_ij.objectIDs.size();
- return new ProjectedEnergy(i, j, c_ij, projectedEnergy);
+ return new ProjectedEnergy(i, j, c_ij, sum);
}
/**
@@ -520,9 +520,9 @@ public class ORCLUS<V extends NumberVector<?>> extends AbstractProjectedClusteri
ORCLUSCluster cluster;
- DoubleDistance projectedEnergy;
+ double projectedEnergy;
- ProjectedEnergy(int i, int j, ORCLUSCluster cluster, DoubleDistance projectedEnergy) {
+ ProjectedEnergy(int i, int j, ORCLUSCluster cluster, double projectedEnergy) {
this.i = i;
this.j = j;
this.cluster = cluster;
@@ -538,7 +538,7 @@ public class ORCLUS<V extends NumberVector<?>> extends AbstractProjectedClusteri
*/
@Override
public int compareTo(ProjectedEnergy o) {
- return this.projectedEnergy.compareTo(o.projectedEnergy);
+ return Double.compare(projectedEnergy, o.projectedEnergy);
}
}
@@ -606,7 +606,7 @@ public class ORCLUS<V extends NumberVector<?>> extends AbstractProjectedClusteri
@Override
protected ORCLUS<V> makeInstance() {
- return new ORCLUS<V>(k, k_i, l, alpha, rnd, pca);
+ return new ORCLUS<>(k, k_i, l, alpha, rnd, pca);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/CASHInterval.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/CASHInterval.java
index 0153ddc3..95cb2e58 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/CASHInterval.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/CASHInterval.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.correlation.cash;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/CASHIntervalSplit.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/CASHIntervalSplit.java
index 12f10725..328fe3b3 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/CASHIntervalSplit.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/CASHIntervalSplit.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.correlation.cash;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -81,8 +81,8 @@ public class CASHIntervalSplit {
this.database = database;
this.minPts = minPts;
- this.f_minima = new HashMap<HyperBoundingBox, Map<DBID, Double>>();
- this.f_maxima = new HashMap<HyperBoundingBox, Map<DBID, Double>>();
+ this.f_minima = new HashMap<>();
+ this.f_maxima = new HashMap<>();
}
/**
@@ -108,9 +108,9 @@ public class CASHIntervalSplit {
Map<DBID, Double> minima = f_minima.get(interval);
Map<DBID, Double> maxima = f_maxima.get(interval);
if(minima == null || maxima == null) {
- minima = new HashMap<DBID, Double>();
+ minima = new HashMap<>();
f_minima.put(interval, minima);
- maxima = new HashMap<DBID, Double>();
+ maxima = new HashMap<>();
f_maxima.put(interval, maxima);
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/ParameterizationFunction.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/ParameterizationFunction.java
index 56e68bfe..5c690feb 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/ParameterizationFunction.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/ParameterizationFunction.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.correlation.cash;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/package-info.java
index 8b6d104c..bfc272fd 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/package-info.java
@@ -7,7 +7,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2012
+Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/package-info.java
index 665de632..89d3c930 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/package-info.java
@@ -7,7 +7,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2012
+Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/CorePredicate.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/CorePredicate.java
index a4440a29..27cc48d6 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/CorePredicate.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/CorePredicate.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.gdbscan;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/EpsilonNeighborPredicate.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/EpsilonNeighborPredicate.java
index 2b946f1c..545a8171 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/EpsilonNeighborPredicate.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/EpsilonNeighborPredicate.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.gdbscan;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -32,11 +32,11 @@ import de.lmu.ifi.dbs.elki.database.QueryUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDList;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
@@ -91,7 +91,7 @@ public class EpsilonNeighborPredicate<O, D extends Distance<D>> implements Neigh
public <T> NeighborPredicate.Instance<T> instantiate(Database database, SimpleTypeInformation<?> type) {
DistanceQuery<O, D> dq = QueryUtil.getDistanceQuery(database, distFunc);
RangeQuery<O, D> rq = database.getRangeQuery(dq);
- return (NeighborPredicate.Instance<T>) new Instance<D>(epsilon, rq, dq.getRelation().getDBIDs());
+ return (NeighborPredicate.Instance<T>) new Instance<>(epsilon, rq, dq.getRelation().getDBIDs());
}
@Override
@@ -109,7 +109,7 @@ public class EpsilonNeighborPredicate<O, D extends Distance<D>> implements Neigh
*
* @author Erich Schubert
*/
- public static class Instance<D extends Distance<D>> implements NeighborPredicate.Instance<DistanceDBIDResult<D>> {
+ public static class Instance<D extends Distance<D>> implements NeighborPredicate.Instance<DistanceDBIDList<D>> {
/**
* Range to query with
*/
@@ -145,12 +145,12 @@ public class EpsilonNeighborPredicate<O, D extends Distance<D>> implements Neigh
}
@Override
- public DistanceDBIDResult<D> getNeighbors(DBIDRef reference) {
+ public DistanceDBIDList<D> getNeighbors(DBIDRef reference) {
return rq.getRangeForDBID(reference, epsilon);
}
@Override
- public void addDBIDs(ModifiableDBIDs ids, DistanceDBIDResult<D> neighbors) {
+ public void addDBIDs(ModifiableDBIDs ids, DistanceDBIDList<D> neighbors) {
ids.addDBIDs(neighbors);
}
}
@@ -177,14 +177,14 @@ public class EpsilonNeighborPredicate<O, D extends Distance<D>> implements Neigh
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
// Get a distance function.
- ObjectParameter<DistanceFunction<O, D>> distanceP = new ObjectParameter<DistanceFunction<O, D>>(AbstractDistanceBasedAlgorithm.DISTANCE_FUNCTION_ID, DistanceFunction.class, EuclideanDistanceFunction.class);
+ ObjectParameter<DistanceFunction<O, D>> distanceP = new ObjectParameter<>(AbstractDistanceBasedAlgorithm.DISTANCE_FUNCTION_ID, DistanceFunction.class, EuclideanDistanceFunction.class);
D distanceFactory = null;
if(config.grab(distanceP)) {
distfun = distanceP.instantiateClass(config);
distanceFactory = distfun.getDistanceFactory();
}
// Get the epsilon parameter
- DistanceParameter<D> epsilonP = new DistanceParameter<D>(de.lmu.ifi.dbs.elki.algorithm.clustering.DBSCAN.EPSILON_ID, distanceFactory);
+ DistanceParameter<D> epsilonP = new DistanceParameter<>(de.lmu.ifi.dbs.elki.algorithm.clustering.DBSCAN.EPSILON_ID, distanceFactory);
if(config.grab(epsilonP)) {
epsilon = epsilonP.getValue();
}
@@ -192,7 +192,7 @@ public class EpsilonNeighborPredicate<O, D extends Distance<D>> implements Neigh
@Override
protected EpsilonNeighborPredicate<O, D> makeInstance() {
- return new EpsilonNeighborPredicate<O, D>(epsilon, distfun);
+ return new EpsilonNeighborPredicate<>(epsilon, distfun);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/GeneralizedDBSCAN.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/GeneralizedDBSCAN.java
index ef1cb0dc..1e0a8642 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/GeneralizedDBSCAN.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/GeneralizedDBSCAN.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.gdbscan;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -32,6 +32,7 @@ import de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm;
import de.lmu.ifi.dbs.elki.data.Cluster;
import de.lmu.ifi.dbs.elki.data.Clustering;
import de.lmu.ifi.dbs.elki.data.model.ClusterModel;
+import de.lmu.ifi.dbs.elki.data.model.CoreObjectsModel;
import de.lmu.ifi.dbs.elki.data.model.Model;
import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
@@ -53,6 +54,7 @@ import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
/**
@@ -67,7 +69,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
*
* @author Erich Schubert
* @author Arthur Zimek
- *
+ *
* @apiviz.landmark
*
* @apiviz.has Instance
@@ -92,22 +94,29 @@ public class GeneralizedDBSCAN extends AbstractAlgorithm<Clustering<Model>> impl
CorePredicate corepred;
/**
+ * Track which objects are "core" objects.
+ */
+ boolean coremodel = false;
+
+ /**
* Constructor for parameterized algorithm.
*
- * @param npred Neighbor predicate
- * @param corepred Core point predicate
+ * @param npred Neighbor predicate.
+ * @param corepred Core point predicate.
+ * @param coremodel Keep track of core points.
*/
- public GeneralizedDBSCAN(NeighborPredicate npred, CorePredicate corepred) {
+ public GeneralizedDBSCAN(NeighborPredicate npred, CorePredicate corepred, boolean coremodel) {
super();
this.npred = npred;
this.corepred = corepred;
+ this.coremodel = coremodel;
}
@Override
public Clustering<Model> run(Database database) {
for (SimpleTypeInformation<?> t : npred.getOutputType()) {
if (corepred.acceptsType(t)) {
- return new Instance<Object>(npred.instantiate(database, t), corepred.instantiate(database, t)).run();
+ return new Instance<>(npred.instantiate(database, t), corepred.instantiate(database, t), coremodel).run();
}
}
throw new AbortException("No compatible types found.");
@@ -127,7 +136,7 @@ public class GeneralizedDBSCAN extends AbstractAlgorithm<Clustering<Model>> impl
* Instance for a particular data set.
*
* @author Erich Schubert
- *
+ *
* @apiviz.composedOf CorePredicate.Instance
* @apiviz.composedOf NeighborPredicate.Instance
*/
@@ -135,17 +144,12 @@ public class GeneralizedDBSCAN extends AbstractAlgorithm<Clustering<Model>> impl
/**
* Unprocessed IDs
*/
- private static final int UNPROCESSED = -2;
-
- /**
- * Noise IDs
- */
- private static final int NOISE = -1;
+ private static final int UNPROCESSED = 0;
/**
* Noise IDs
*/
- private static final int FIRST_CLUSTER = 0;
+ private static final int NOISE = 1;
/**
* The neighborhood predicate
@@ -158,15 +162,22 @@ public class GeneralizedDBSCAN extends AbstractAlgorithm<Clustering<Model>> impl
final CorePredicate.Instance<T> corepred;
/**
+ * Track which objects are "core" objects.
+ */
+ boolean coremodel = false;
+
+ /**
* Full Constructor
*
* @param npred Neighborhood predicate
* @param corepred Core object predicate
+ * @param coremodel Keep track of core points.
*/
- public Instance(NeighborPredicate.Instance<T> npred, CorePredicate.Instance<T> corepred) {
+ public Instance(NeighborPredicate.Instance<T> npred, CorePredicate.Instance<T> corepred, boolean coremodel) {
super();
this.npred = npred;
this.corepred = corepred;
+ this.coremodel = coremodel;
}
/**
@@ -177,78 +188,85 @@ public class GeneralizedDBSCAN extends AbstractAlgorithm<Clustering<Model>> impl
public Clustering<Model> run() {
final DBIDs ids = npred.getIDs();
// Setup progress logging
- final FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Clustering", ids.size(), LOG) : null;
- final IndefiniteProgress clusprogress = LOG.isVerbose() ? new IndefiniteProgress("Clusters", LOG) : null;
+ final FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Generalized DBSCAN Clustering", ids.size(), LOG) : null;
+ final IndefiniteProgress clusprogress = LOG.isVerbose() ? new IndefiniteProgress("Number of clusters found", LOG) : null;
// (Temporary) store the cluster ID assigned.
final WritableIntegerDataStore clusterids = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_TEMP, UNPROCESSED);
- // Note: these are not exact!
+ // Note: these are not exact, as objects may be stolen from noise.
final TIntArrayList clustersizes = new TIntArrayList();
+ clustersizes.add(0); // Unprocessed dummy value.
+ clustersizes.add(0); // Noise counter.
// Implementation Note: using Integer objects should result in
// reduced memory use in the HashMap!
- int clusterid = FIRST_CLUSTER;
- int clustersize = 0;
- int noisesize = 0;
+ int clusterid = NOISE + 1;
// Iterate over all objects in the database.
- for(DBIDIter id = ids.iter(); id.valid(); id.advance()) {
+ for (DBIDIter id = ids.iter(); id.valid(); id.advance()) {
// Skip already processed ids.
- if(clusterids.intValue(id) != UNPROCESSED) {
+ if (clusterids.intValue(id) != UNPROCESSED) {
continue;
}
// Evaluate Neighborhood predicate
final T neighbors = npred.getNeighbors(id);
// Evaluate Core-Point predicate:
- if(corepred.isCorePoint(id, neighbors)) {
+ if (corepred.isCorePoint(id, neighbors)) {
clusterids.putInt(id, clusterid);
- clustersize = 1 + setbasedExpandCluster(clusterid, clusterids, neighbors, progress);
+ clustersizes.add(expandCluster(clusterid, clusterids, neighbors, progress));
// start next cluster on next iteration.
- clustersizes.add(clustersize);
- clustersize = 0;
- clusterid += 1;
- if(clusprogress != null) {
+ ++clusterid;
+ if (clusprogress != null) {
clusprogress.setProcessed(clusterid, LOG);
}
- }
- else {
+ } else {
// otherwise, it's a noise point
clusterids.putInt(id, NOISE);
- noisesize += 1;
+ clustersizes.set(NOISE, clustersizes.get(NOISE) + 1);
}
// We've completed this element
- if(progress != null) {
+ if (progress != null) {
progress.incrementProcessed(LOG);
}
}
// Finish progress logging.
- if(progress != null) {
+ if (progress != null) {
progress.ensureCompleted(LOG);
}
- if(clusprogress != null) {
+ if (clusprogress != null) {
clusprogress.setCompleted(LOG);
}
// Transform cluster ID mapping into a clustering result:
- ArrayList<ArrayModifiableDBIDs> clusterlists = new ArrayList<ArrayModifiableDBIDs>(clusterid + 1);
- // add noise cluster storage
- clusterlists.add(DBIDUtil.newArray(noisesize));
+ ArrayList<ArrayModifiableDBIDs> clusterlists = new ArrayList<>(clusterid);
+ ArrayList<ArrayModifiableDBIDs> corelists = coremodel ? new ArrayList<ArrayModifiableDBIDs>(clusterid) : null;
// add storage containers for clusters
- for(int i = 0; i < clustersizes.size(); i++) {
+ for (int i = 0; i < clustersizes.size(); i++) {
clusterlists.add(DBIDUtil.newArray(clustersizes.get(i)));
+ if (corelists != null) {
+ corelists.add(DBIDUtil.newArray(clustersizes.get(i)));
+ }
}
// do the actual inversion
- for(DBIDIter id = ids.iter(); id.valid(); id.advance()) {
- int cluster = clusterids.intValue(id);
- clusterlists.get(cluster + 1).add(id);
+ for (DBIDIter id = ids.iter(); id.valid(); id.advance()) {
+ // Negative values are non-core points:
+ int cid = clusterids.intValue(id);
+ int cluster = Math.abs(cid);
+ clusterlists.get(cluster).add(id);
+ if (corelists != null && cid > NOISE) {
+ corelists.get(cluster).add(id);
+ }
}
clusterids.destroy();
- Clustering<Model> result = new Clustering<Model>("GDBSCAN", "gdbscan-clustering");
- int cid = 0;
- for(ArrayModifiableDBIDs res : clusterlists) {
- boolean isNoise = (cid == 0);
- Cluster<Model> c = new Cluster<Model>(res, isNoise, ClusterModel.CLUSTER);
- result.addCluster(c);
- cid++;
+ Clustering<Model> result = new Clustering<>("GDBSCAN", "gdbscan-clustering");
+ for (int cid = NOISE; cid < clusterlists.size(); cid++) {
+ boolean isNoise = (cid == NOISE);
+ Cluster<Model> c;
+ if (corelists != null) {
+ c = new Cluster<Model>(clusterlists.get(cid), isNoise, new CoreObjectsModel(corelists.get(cid)));
+ } else {
+ c = new Cluster<Model>(clusterlists.get(cid), isNoise, ClusterModel.CLUSTER);
+ }
+ result.addToplevelCluster(c);
}
return result;
}
@@ -263,28 +281,36 @@ public class GeneralizedDBSCAN extends AbstractAlgorithm<Clustering<Model>> impl
*
* @return cluster size
*/
- protected int setbasedExpandCluster(final int clusterid, final WritableIntegerDataStore clusterids, final T neighbors, final FiniteProgress progress) {
- int clustersize = 0;
+ protected int expandCluster(final int clusterid, final WritableIntegerDataStore clusterids, final T neighbors, final FiniteProgress progress) {
+ int clustersize = 1; // initial seed!
final ArrayModifiableDBIDs activeSet = DBIDUtil.newArray();
npred.addDBIDs(activeSet, neighbors);
// run expandCluster as long as this set is non-empty (non-recursive
// implementation)
- while(!activeSet.isEmpty()) {
+ while (!activeSet.isEmpty()) {
final DBID id = activeSet.remove(activeSet.size() - 1);
- clustersize += 1;
// Assign object to cluster
- final int oldclus = clusterids.putInt(id, clusterid);
- if(oldclus == -2) {
+ final int oldclus = clusterids.intValue(id);
+ if (oldclus == NOISE) {
+ clustersize += 1;
+ // Non core point cluster member:
+ clusterids.putInt(id, -clusterid);
+ } else if (oldclus == UNPROCESSED) {
+ clustersize += 1;
// expandCluster again:
// Evaluate Neighborhood predicate
final T newneighbors = npred.getNeighbors(id);
// Evaluate Core-Point predicate
- if(corepred.isCorePoint(id, newneighbors)) {
+ if (corepred.isCorePoint(id, newneighbors)) {
// Note: the recursion is unrolled into iteration over the active
// set.
npred.addDBIDs(activeSet, newneighbors);
+ clusterids.putInt(id, clusterid);
+ } else {
+ // Non core point cluster member:
+ clusterids.putInt(id, -clusterid);
}
- if(progress != null) {
+ if (progress != null) {
progress.incrementProcessed(LOG);
}
}
@@ -302,43 +328,58 @@ public class GeneralizedDBSCAN extends AbstractAlgorithm<Clustering<Model>> impl
*/
public static class Parameterizer extends AbstractParameterizer {
/**
- * Neighborhood predicate
+ * Neighborhood predicate.
*/
NeighborPredicate npred = null;
/**
- * Core point predicate
+ * Core point predicate.
*/
CorePredicate corepred = null;
/**
- * Parameter for neighborhood predicate
+ * Track which objects are "core" objects.
+ */
+ boolean coremodel = false;
+
+ /**
+ * Parameter for neighborhood predicate.
*/
public static final OptionID NEIGHBORHOODPRED_ID = new OptionID("gdbscan.neighborhood", "Neighborhood predicate for GDBSCAN");
/**
- * Parameter for core predicate
+ * Parameter for core predicate.
*/
public static final OptionID COREPRED_ID = new OptionID("gdbscan.core", "Core point predicate for GDBSCAN");
+ /**
+ * Flag to keep track of core points.
+ */
+ public static final OptionID COREMODEL_ID = new OptionID("gdbscan.core-model", "Use a model that keeps track of core points. Needs more memory.");
+
@Override
protected void makeOptions(Parameterization config) {
// Neighborhood predicate
- ObjectParameter<NeighborPredicate> npredOpt = new ObjectParameter<NeighborPredicate>(NEIGHBORHOODPRED_ID, NeighborPredicate.class, EpsilonNeighborPredicate.class);
- if(config.grab(npredOpt)) {
+ ObjectParameter<NeighborPredicate> npredOpt = new ObjectParameter<>(NEIGHBORHOODPRED_ID, NeighborPredicate.class, EpsilonNeighborPredicate.class);
+ if (config.grab(npredOpt)) {
npred = npredOpt.instantiateClass(config);
}
// Core point predicate
- ObjectParameter<CorePredicate> corepredOpt = new ObjectParameter<CorePredicate>(COREPRED_ID, CorePredicate.class, MinPtsCorePredicate.class);
- if(config.grab(corepredOpt)) {
+ ObjectParameter<CorePredicate> corepredOpt = new ObjectParameter<>(COREPRED_ID, CorePredicate.class, MinPtsCorePredicate.class);
+ if (config.grab(corepredOpt)) {
corepred = corepredOpt.instantiateClass(config);
}
+
+ Flag coremodelOpt = new Flag(COREMODEL_ID);
+ if (config.grab(coremodelOpt)) {
+ coremodel = coremodelOpt.isTrue();
+ }
}
@Override
protected GeneralizedDBSCAN makeInstance() {
- return new GeneralizedDBSCAN(npred, corepred);
+ return new GeneralizedDBSCAN(npred, corepred, coremodel);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/MinPtsCorePredicate.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/MinPtsCorePredicate.java
index 47097f9b..a6e62e2e 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/MinPtsCorePredicate.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/MinPtsCorePredicate.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.gdbscan;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/NeighborPredicate.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/NeighborPredicate.java
index ed927696..c3e1e8c9 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/NeighborPredicate.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/NeighborPredicate.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.gdbscan;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/package-info.java
index 8be23c7d..7ea3c7e4 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/package-info.java
@@ -22,7 +22,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/CentroidLinkageMethod.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/CentroidLinkageMethod.java
new file mode 100644
index 00000000..72b6fb57
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/CentroidLinkageMethod.java
@@ -0,0 +1,84 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.utilities.Alias;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+
+/**
+ * Centroid linkage clustering method, aka UPGMC: Unweighted Pair-Group Method
+ * using Centroids.
+ *
+ * Reference:
+ * <p>
+ * A. K. Jain and R. C. Dubes<br />
+ * Algorithms for Clustering Data<br />
+ * Prentice-Hall
+ * </p>
+ *
+ * @author Erich Schubert
+ */
+@Alias({ "centroid", "upgmc" })
+@Reference(authors = "A. K. Jain and R. C. Dubes", title = "Algorithms for Clustering Data", booktitle = "Algorithms for Clustering Data, Prentice-Hall")
+public class CentroidLinkageMethod implements LinkageMethod {
+ /**
+ * Static instance of class.
+ */
+ public static final CentroidLinkageMethod STATIC = new CentroidLinkageMethod();
+
+ /**
+ * Constructor.
+ *
+ * @deprecated use the static instance {@link #STATIC} instead.
+ */
+ @Deprecated
+ public CentroidLinkageMethod() {
+ super();
+ }
+
+ @Override
+ public double combine(int sizex, double dx, int sizey, double dy, int sizej, double dxy) {
+ final double wx = sizex / (double) (sizex + sizey);
+ final double wy = sizey / (double) (sizex + sizey);
+ final double beta = (sizex * sizey) / (double) ((sizex + sizey) * (sizex + sizey));
+ return wx * dx + wy * dy - beta * dxy;
+ }
+
+ /**
+ * Class parameterizer.
+ *
+ * Returns the static instance.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ @Override
+ protected CentroidLinkageMethod makeInstance() {
+ return STATIC;
+ }
+ }
+} // Sokal and Michener (1958), Gower (1967)
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/CompleteLinkageMethod.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/CompleteLinkageMethod.java
new file mode 100644
index 00000000..0cb47fa7
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/CompleteLinkageMethod.java
@@ -0,0 +1,70 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+import de.lmu.ifi.dbs.elki.utilities.Alias;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+
+/**
+ * Complete-linkage clustering method.
+ *
+ * @author Erich Schubert
+ */
+@Alias({ "complete", "clink", "complete-link", "farthest-neighbor" })
+public class CompleteLinkageMethod implements LinkageMethod {
+ /**
+ * Static instance of class.
+ */
+ public static final CompleteLinkageMethod STATIC = new CompleteLinkageMethod();
+
+ /**
+ * Constructor.
+ *
+ * @deprecated use the static instance {@link #STATIC} instead.
+ */
+ @Deprecated
+ public CompleteLinkageMethod() {
+ super();
+ }
+
+ @Override
+ public double combine(int sizex, double dx, int sizey, double dy, int sizej, double dxy) {
+ return Math.max(dx, dy);
+ }
+
+ /**
+ * Class parameterizer.
+ *
+ * Returns the static instance.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ @Override
+ protected CompleteLinkageMethod makeInstance() {
+ return STATIC;
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/ExtractFlatClusteringFromHierarchy.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/ExtractFlatClusteringFromHierarchy.java
new file mode 100644
index 00000000..ac5cb77c
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/ExtractFlatClusteringFromHierarchy.java
@@ -0,0 +1,854 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import gnu.trove.list.array.TDoubleArrayList;
+
+import java.util.ArrayList;
+import java.util.Comparator;
+
+import de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm;
+import de.lmu.ifi.dbs.elki.data.Cluster;
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.data.model.DendrogramModel;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.datastore.DBIDDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.DoubleDistanceDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDVar;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DistanceParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.EnumParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+import de.lmu.ifi.dbs.elki.workflow.AlgorithmStep;
+
+/**
+ * Extract a flat clustering from a full hierarchy, represented in pointer form.
+ *
+ * FIXME: re-check tie handling!
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.uses HierarchicalClusteringAlgorithm
+ * @apiviz.uses PointerHierarchyRepresentationResult
+ * @apiviz.has Clustering
+ */
+public class ExtractFlatClusteringFromHierarchy<D extends Distance<D>> implements ClusteringAlgorithm<Clustering<DendrogramModel<D>>> {
+ /**
+ * Class logger.
+ */
+ private static final Logging LOG = Logging.getLogger(ExtractFlatClusteringFromHierarchy.class);
+
+ /**
+ * Threshold mode.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static enum ThresholdMode {
+ /** Cut by minimum number of clusters */
+ BY_MINCLUSTERS,
+ /** Cut by threshold */
+ BY_THRESHOLD,
+ /** No thresholding */
+ NO_THRESHOLD,
+ }
+
+ /**
+ * Output mode.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static enum OutputMode {
+ /** Strict partitioning. */
+ STRICT_PARTITIONS,
+ /** Partial hierarchy. */
+ PARTIAL_HIERARCHY,
+ }
+
+ /**
+ * Minimum number of clusters to extract
+ */
+ private final int minclusters;
+
+ /**
+ * Clustering algorithm to run to obtain the hierarchy.
+ */
+ private HierarchicalClusteringAlgorithm<D> algorithm;
+
+ /**
+ * Include empty cluster in the hierarchy produced.
+ */
+ private OutputMode outputmode = OutputMode.PARTIAL_HIERARCHY;
+
+ /**
+ * Threshold for extracting clusters.
+ */
+ private D threshold = null;
+
+ /**
+ * Disallow singleton clusters, but add them to the parent cluster instead.
+ */
+ private boolean singletons = false;
+
+ /**
+ * Constructor.
+ *
+ * @param algorithm Algorithm to run
+ * @param minclusters Minimum number of clusters
+ * @param outputmode Output mode: truncated hierarchy or strict partitions.
+ * @param singletons Allow producing singleton clusters.
+ */
+ public ExtractFlatClusteringFromHierarchy(HierarchicalClusteringAlgorithm<D> algorithm, int minclusters, OutputMode outputmode, boolean singletons) {
+ super();
+ this.algorithm = algorithm;
+ this.threshold = null;
+ this.minclusters = minclusters;
+ this.outputmode = outputmode;
+ this.singletons = singletons;
+ }
+
+ /**
+ * Constructor.
+ *
+ * @param algorithm Algorithm to run
+ * @param threshold Distance threshold
+ * @param outputmode Output mode: truncated hierarchy or strict partitions.
+ * @param singletons Allow producing singleton clusters.
+ */
+ public ExtractFlatClusteringFromHierarchy(HierarchicalClusteringAlgorithm<D> algorithm, D threshold, OutputMode outputmode, boolean singletons) {
+ super();
+ this.algorithm = algorithm;
+ this.threshold = threshold;
+ this.minclusters = -1;
+ this.outputmode = outputmode;
+ this.singletons = singletons;
+ }
+
+ @Override
+ public Clustering<DendrogramModel<D>> run(Database database) {
+ PointerHierarchyRepresentationResult<D> pointerresult = algorithm.run(database);
+ DBIDs ids = pointerresult.getDBIDs();
+ DBIDDataStore pi = pointerresult.getParentStore();
+ DataStore<D> lambda = pointerresult.getParentDistanceStore();
+
+ Clustering<DendrogramModel<D>> result;
+ if (lambda instanceof DoubleDistanceDataStore) {
+ result = extractClustersDouble(ids, pi, (DoubleDistanceDataStore) lambda);
+ } else {
+ result = extractClusters(ids, pi, lambda);
+ }
+ result.addChildResult(pointerresult);
+
+ return result;
+ }
+
+ /**
+ * Extract all clusters from the pi-lambda-representation.
+ *
+ * @param ids Object ids to process
+ * @param pi Pi store
+ * @param lambda Lambda store
+ *
+ * @return Hierarchical clustering
+ */
+ private Clustering<DendrogramModel<D>> extractClusters(DBIDs ids, final DBIDDataStore pi, final DataStore<D> lambda) {
+ FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Extracting clusters", ids.size(), LOG) : null;
+
+ // Sort DBIDs by lambda. We need this for two things:
+ // a) to determine the stop distance from "minclusters" parameter
+ // b) to process arrows in decreasing / increasing order
+ ArrayModifiableDBIDs order = DBIDUtil.newArray(ids);
+ order.sort(new CompareByLambda<>(lambda));
+ DBIDArrayIter it = order.iter(); // Used multiple times!
+
+ int split;
+ if (minclusters > 0) {
+ split = Math.max(ids.size() - minclusters, 0);
+ // Stop distance:
+ final D stopdist = lambda.get(order.get(split));
+
+ // Tie handling: decrement split.
+ while (split > 0) {
+ it.seek(split - 1);
+ if (stopdist.compareTo(lambda.get(it)) <= 0) {
+ split--;
+ } else {
+ break;
+ }
+ }
+ } else if (threshold != null) {
+ split = ids.size();
+ it.seek(split - 1);
+ while (threshold.compareTo(lambda.get(it)) <= 0 && it.valid()) {
+ split--;
+ it.retract();
+ }
+ } else { // full hierarchy
+ split = 0;
+ }
+
+ // Extract the child clusters
+ int expcnum = ids.size() - split;
+ WritableIntegerDataStore cluster_map = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_TEMP, -1);
+ ArrayList<ModifiableDBIDs> cluster_dbids = new ArrayList<>(expcnum);
+ ArrayList<D> cluster_dist = new ArrayList<>(expcnum);
+ ArrayModifiableDBIDs cluster_leads = DBIDUtil.newArray(expcnum);
+
+ DBIDVar succ = DBIDUtil.newVar(); // Variable for successor.
+ // Go backwards on the lower part.
+ for (it.seek(split - 1); it.valid(); it.retract()) {
+ D dist = lambda.get(it); // Distance to successor
+ pi.assignVar(it, succ); // succ = pi(it)
+ int clusterid = cluster_map.intValue(succ);
+ // Successor cluster has already been created:
+ if (clusterid >= 0) {
+ cluster_dbids.get(clusterid).add(it);
+ cluster_map.putInt(it, clusterid);
+ // Update distance to maximum encountered:
+ if (cluster_dist.get(clusterid).compareTo(dist) < 0) {
+ cluster_dist.set(clusterid, dist);
+ }
+ } else {
+ // Need to start a new cluster:
+ clusterid = cluster_dbids.size(); // next cluster number.
+ ModifiableDBIDs cids = DBIDUtil.newArray();
+ // Add element and successor as initial members:
+ cids.add(succ);
+ cluster_map.putInt(succ, clusterid);
+ cids.add(it);
+ cluster_map.putInt(it, clusterid);
+ // Store new cluster.
+ cluster_dbids.add(cids);
+ cluster_leads.add(succ);
+ cluster_dist.add(dist);
+ }
+
+ // Decrement counter
+ if (progress != null) {
+ progress.incrementProcessed(LOG);
+ }
+ }
+ final Clustering<DendrogramModel<D>> dendrogram;
+ switch(outputmode) {
+ case PARTIAL_HIERARCHY: {
+ // Build a hierarchy out of these clusters.
+ dendrogram = new Clustering<>("Hierarchical Clustering", "hierarchical-clustering");
+ Cluster<DendrogramModel<D>> root = null;
+ ArrayList<Cluster<DendrogramModel<D>>> clusters = new ArrayList<>(expcnum);
+ // Convert initial clusters to cluster objects
+ {
+ int i = 0;
+ for (DBIDIter it2 = cluster_leads.iter(); it2.valid(); it2.advance(), i++) {
+ clusters.add(makeCluster(it2, cluster_dist.get(i), cluster_dbids.get(i)));
+ }
+ cluster_dist = null; // Invalidate
+ cluster_dbids = null; // Invalidate
+ }
+ // Process the upper part, bottom-up.
+ for (it.seek(split); it.valid(); it.advance()) {
+ int clusterid = cluster_map.intValue(it);
+ // The current cluster led by the current element:
+ final Cluster<DendrogramModel<D>> clus;
+ if (clusterid >= 0) {
+ clus = clusters.get(clusterid);
+ } else if (!singletons && ids.size() != 1) {
+ clus = null;
+ } else {
+ clus = makeCluster(it, null, DBIDUtil.deref(it));
+ }
+ // The successor to join:
+ pi.assignVar(it, succ); // succ = pi(it)
+ if (DBIDUtil.equal(it, succ)) {
+ assert (root == null);
+ root = clus;
+ } else {
+ // Parent cluster:
+ int parentid = cluster_map.intValue(succ);
+ D depth = lambda.get(it);
+ // Parent cluster exists - merge as a new cluster:
+ if (parentid >= 0) {
+ final Cluster<DendrogramModel<D>> pclus = clusters.get(parentid);
+ if (pclus.getModel().getDistance().equals(depth)) {
+ if (clus == null) {
+ ((ModifiableDBIDs) pclus.getIDs()).add(it);
+ } else {
+ dendrogram.addChildCluster(pclus, clus);
+ }
+ } else {
+ // Merge at new depth:
+ ModifiableDBIDs cids = DBIDUtil.newArray(clus == null ? 1 : 0);
+ if (clus == null) {
+ cids.add(it);
+ }
+ Cluster<DendrogramModel<D>> npclus = makeCluster(succ, depth, cids);
+ if (clus != null) {
+ dendrogram.addChildCluster(npclus, clus);
+ }
+ dendrogram.addChildCluster(npclus, pclus);
+ // Replace existing parent cluster: new depth
+ clusters.set(parentid, npclus);
+ }
+ } else {
+ // Merge with parent at this depth:
+ final Cluster<DendrogramModel<D>> pclus;
+ if (!singletons) {
+ ModifiableDBIDs cids = DBIDUtil.newArray(clus == null ? 2 : 1);
+ cids.add(succ);
+ if (clus == null) {
+ cids.add(it);
+ }
+ // New cluster for parent and/or new point
+ pclus = makeCluster(succ, depth, cids);
+ } else {
+ // Create a new, one-element cluster for parent, and a merged
+ // cluster on top.
+ pclus = makeCluster(succ, depth, DBIDUtil.EMPTYDBIDS);
+ dendrogram.addChildCluster(pclus, makeCluster(succ, null, DBIDUtil.deref(succ)));
+ }
+ if (clus != null) {
+ dendrogram.addChildCluster(pclus, clus);
+ }
+ // Store cluster:
+ parentid = clusters.size();
+ clusters.add(pclus); // Remember parent cluster
+ cluster_map.putInt(succ, parentid); // Reference
+ }
+ }
+
+ // Decrement counter
+ if (progress != null) {
+ progress.incrementProcessed(LOG);
+ }
+ }
+ assert (root != null);
+ // attach root
+ dendrogram.addToplevelCluster(root);
+ break;
+ }
+ case STRICT_PARTITIONS: {
+ // Build a hierarchy out of these clusters.
+ dendrogram = new Clustering<>("Flattened Hierarchical Clustering", "flattened-hierarchical-clustering");
+ // Convert initial clusters to cluster objects
+ {
+ int i = 0;
+ for (DBIDIter it2 = cluster_leads.iter(); it2.valid(); it2.advance(), i++) {
+ dendrogram.addToplevelCluster(makeCluster(it2, cluster_dist.get(i), cluster_dbids.get(i)));
+ }
+ cluster_dist = null; // Invalidate
+ cluster_dbids = null; // Invalidate
+ }
+ // Process the upper part, bottom-up.
+ for (it.seek(split); it.valid(); it.advance()) {
+ int clusterid = cluster_map.intValue(it);
+ if (clusterid < 0) {
+ dendrogram.addToplevelCluster(makeCluster(it, null, DBIDUtil.deref(it)));
+ }
+
+ // Decrement counter
+ if (progress != null) {
+ progress.incrementProcessed(LOG);
+ }
+ }
+ break;
+ }
+ default:
+ throw new AbortException("Unsupported output mode.");
+ }
+
+ if (progress != null) {
+ progress.ensureCompleted(LOG);
+ }
+
+ return dendrogram;
+ }
+
+ /**
+ * Extract all clusters from the pi-lambda-representation.
+ *
+ * @param ids Object ids to process
+ * @param pi Pi store
+ * @param lambda Lambda store
+ *
+ * @return Hierarchical clustering
+ */
+ private Clustering<DendrogramModel<D>> extractClustersDouble(DBIDs ids, final DBIDDataStore pi, final DoubleDistanceDataStore lambda) {
+ FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Extracting clusters", ids.size(), LOG) : null;
+
+ // Sort DBIDs by lambda. We need this for two things:
+ // a) to determine the stop distance from "minclusters" parameter
+ // b) to process arrows in decreasing / increasing order
+ ArrayModifiableDBIDs order = DBIDUtil.newArray(ids);
+ order.sort(new CompareByDoubleLambda(lambda));
+ DBIDArrayIter it = order.iter(); // Used multiple times!
+
+ int split;
+ if (minclusters > 0) {
+ split = Math.max(ids.size() - minclusters, 0);
+ // Stop distance:
+ final double stopdist = lambda.doubleValue(order.get(split));
+
+ // Tie handling: decrement split.
+ while (split > 0) {
+ it.seek(split - 1);
+ if (stopdist <= lambda.doubleValue(it)) {
+ split--;
+ } else {
+ break;
+ }
+ }
+ } else if (threshold != null) {
+ split = ids.size();
+ it.seek(split - 1);
+ double stopdist = ((DoubleDistance) threshold).doubleValue();
+ while (stopdist <= lambda.doubleValue(it) && it.valid()) {
+ split--;
+ it.retract();
+ }
+ } else { // full hierarchy
+ split = 0;
+ }
+
+ // Extract the child clusters
+ int expcnum = ids.size() - split;
+ WritableIntegerDataStore cluster_map = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_TEMP, -1);
+ ArrayList<ModifiableDBIDs> cluster_dbids = new ArrayList<>(expcnum);
+ TDoubleArrayList cluster_dist = new TDoubleArrayList(expcnum);
+ ArrayModifiableDBIDs cluster_leads = DBIDUtil.newArray(expcnum);
+
+ DBIDVar succ = DBIDUtil.newVar(); // Variable for successor.
+ // Go backwards on the lower part.
+ for (it.seek(split - 1); it.valid(); it.retract()) {
+ double dist = lambda.doubleValue(it); // Distance to successor
+ pi.assignVar(it, succ); // succ = pi(it)
+ int clusterid = cluster_map.intValue(succ);
+ // Successor cluster has already been created:
+ if (clusterid >= 0) {
+ cluster_dbids.get(clusterid).add(it);
+ cluster_map.putInt(it, clusterid);
+ // Update distance to maximum encountered:
+ if (cluster_dist.get(clusterid) < dist) {
+ cluster_dist.set(clusterid, dist);
+ }
+ } else {
+ // Need to start a new cluster:
+ clusterid = cluster_dbids.size(); // next cluster number.
+ ModifiableDBIDs cids = DBIDUtil.newArray();
+ // Add element and successor as initial members:
+ cids.add(succ);
+ cluster_map.putInt(succ, clusterid);
+ cids.add(it);
+ cluster_map.putInt(it, clusterid);
+ // Store new cluster.
+ cluster_dbids.add(cids);
+ cluster_leads.add(succ);
+ cluster_dist.add(dist);
+ }
+
+ // Decrement counter
+ if (progress != null) {
+ progress.incrementProcessed(LOG);
+ }
+ }
+ final Clustering<DendrogramModel<D>> dendrogram;
+ switch(outputmode) {
+ case PARTIAL_HIERARCHY: {
+ // Build a hierarchy out of these clusters.
+ dendrogram = new Clustering<>("Hierarchical Clustering", "hierarchical-clustering");
+ Cluster<DendrogramModel<D>> root = null;
+ ArrayList<Cluster<DendrogramModel<D>>> clusters = new ArrayList<>(expcnum);
+ // Convert initial clusters to cluster objects
+ {
+ int i = 0;
+ for (DBIDIter it2 = cluster_leads.iter(); it2.valid(); it2.advance(), i++) {
+ @SuppressWarnings("unchecked")
+ D depth = (D) new DoubleDistance(cluster_dist.get(i));
+ clusters.add(makeCluster(it2, depth, cluster_dbids.get(i)));
+ }
+ cluster_dist = null; // Invalidate
+ cluster_dbids = null; // Invalidate
+ }
+ // Process the upper part, bottom-up.
+ for (it.seek(split); it.valid(); it.advance()) {
+ int clusterid = cluster_map.intValue(it);
+ // The current cluster led by the current element:
+ final Cluster<DendrogramModel<D>> clus;
+ if (clusterid >= 0) {
+ clus = clusters.get(clusterid);
+ } else if (!singletons && ids.size() != 1) {
+ clus = null;
+ } else {
+ clus = makeCluster(it, null, DBIDUtil.deref(it));
+ }
+ // The successor to join:
+ pi.assignVar(it, succ); // succ = pi(it)
+ if (DBIDUtil.equal(it, succ)) {
+ assert (root == null);
+ root = clus;
+ } else {
+ // Parent cluster:
+ int parentid = cluster_map.intValue(succ);
+ @SuppressWarnings("unchecked")
+ D depth = (D) new DoubleDistance(lambda.doubleValue(it));
+ // Parent cluster exists - merge as a new cluster:
+ if (parentid >= 0) {
+ final Cluster<DendrogramModel<D>> pclus = clusters.get(parentid);
+ if (pclus.getModel().getDistance().equals(depth)) {
+ if (clus == null) {
+ ((ModifiableDBIDs) pclus.getIDs()).add(it);
+ } else {
+ dendrogram.addChildCluster(pclus, clus);
+ }
+ } else {
+ // Merge at new depth:
+ ModifiableDBIDs cids = DBIDUtil.newArray(clus == null ? 1 : 0);
+ if (clus == null) {
+ cids.add(it);
+ }
+ Cluster<DendrogramModel<D>> npclus = makeCluster(succ, depth, cids);
+ if (clus != null) {
+ dendrogram.addChildCluster(npclus, clus);
+ }
+ dendrogram.addChildCluster(npclus, pclus);
+ // Replace existing parent cluster: new depth
+ clusters.set(parentid, npclus);
+ }
+ } else {
+ // Merge with parent at this depth:
+ final Cluster<DendrogramModel<D>> pclus;
+ if (!singletons) {
+ ModifiableDBIDs cids = DBIDUtil.newArray(clus == null ? 2 : 1);
+ cids.add(succ);
+ if (clus == null) {
+ cids.add(it);
+ }
+ // New cluster for parent and/or new point
+ pclus = makeCluster(succ, depth, cids);
+ } else {
+ // Create a new, one-element cluster for parent, and a merged
+ // cluster on top.
+ pclus = makeCluster(succ, depth, DBIDUtil.EMPTYDBIDS);
+ dendrogram.addChildCluster(pclus, makeCluster(succ, null, DBIDUtil.deref(succ)));
+ }
+ if (clus != null) {
+ dendrogram.addChildCluster(pclus, clus);
+ }
+ // Store cluster:
+ parentid = clusters.size();
+ clusters.add(pclus); // Remember parent cluster
+ cluster_map.putInt(succ, parentid); // Reference
+ }
+ }
+
+ // Decrement counter
+ if (progress != null) {
+ progress.incrementProcessed(LOG);
+ }
+ }
+ assert (root != null);
+ // attach root
+ dendrogram.addToplevelCluster(root);
+ break;
+ }
+ case STRICT_PARTITIONS: {
+ // Build a hierarchy out of these clusters.
+ dendrogram = new Clustering<>("Flattened Hierarchical Clustering", "flattened-hierarchical-clustering");
+ // Convert initial clusters to cluster objects
+ {
+ int i = 0;
+ for (DBIDIter it2 = cluster_leads.iter(); it2.valid(); it2.advance(), i++) {
+ @SuppressWarnings("unchecked")
+ D depth = (D) new DoubleDistance(cluster_dist.get(i));
+ dendrogram.addToplevelCluster(makeCluster(it2, depth, cluster_dbids.get(i)));
+ }
+ cluster_dist = null; // Invalidate
+ cluster_dbids = null; // Invalidate
+ }
+ // Process the upper part, bottom-up.
+ for (it.seek(split); it.valid(); it.advance()) {
+ int clusterid = cluster_map.intValue(it);
+ if (clusterid < 0) {
+ dendrogram.addToplevelCluster(makeCluster(it, null, DBIDUtil.deref(it)));
+ }
+
+ // Decrement counter
+ if (progress != null) {
+ progress.incrementProcessed(LOG);
+ }
+ }
+ break;
+ }
+ default:
+ throw new AbortException("Unsupported output mode.");
+ }
+
+ if (progress != null) {
+ progress.ensureCompleted(LOG);
+ }
+
+ return dendrogram;
+ }
+
+ /**
+ * Make the cluster for the given object
+ *
+ * @param lead Leading object
+ * @param depth Linkage depth
+ * @param members Member objects
+ * @return Cluster
+ */
+ private Cluster<DendrogramModel<D>> makeCluster(DBIDRef lead, D depth, DBIDs members) {
+ final String name;
+ if (members.size() == 0) {
+ name = "mrg_" + DBIDUtil.toString(lead) + "_" + depth;
+ } else if (depth != null && depth.isInfiniteDistance() || (members.size() == 1 && members.contains(lead))) {
+ name = "obj_" + DBIDUtil.toString(lead);
+ } else if (depth != null) {
+ name = "clu_" + DBIDUtil.toString(lead) + "_" + depth;
+ } else {
+ // Complete data set only?
+ name = "clu_" + DBIDUtil.toString(lead);
+ }
+ Cluster<DendrogramModel<D>> cluster = new Cluster<>(name, members, new DendrogramModel<>(depth));
+ return cluster;
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return algorithm.getInputTypeRestriction();
+ }
+
+ /**
+ * Order a DBID collection by the lambda value.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ *
+ * @param <D> Distance type
+ */
+ private static final class CompareByLambda<D extends Distance<D>> implements Comparator<DBIDRef> {
+ /**
+ * Lambda storage
+ */
+ private final DataStore<D> lambda;
+
+ /**
+ * Constructor.
+ *
+ * @param lambda Lambda storage
+ */
+ protected CompareByLambda(DataStore<D> lambda) {
+ this.lambda = lambda;
+ }
+
+ @Override
+ public int compare(DBIDRef id1, DBIDRef id2) {
+ D k1 = lambda.get(id1);
+ D k2 = lambda.get(id2);
+ assert (k1 != null);
+ assert (k2 != null);
+ return k1.compareTo(k2);
+ }
+ }
+
+ /**
+ * Order a DBID collection by the lambda value.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ private static final class CompareByDoubleLambda implements Comparator<DBIDRef> {
+ /**
+ * Lambda storage
+ */
+ private final DoubleDistanceDataStore lambda;
+
+ /**
+ * Constructor.
+ *
+ * @param lambda Lambda storage
+ */
+ protected CompareByDoubleLambda(DoubleDistanceDataStore lambda) {
+ this.lambda = lambda;
+ }
+
+ @Override
+ public int compare(DBIDRef id1, DBIDRef id2) {
+ double k1 = lambda.doubleValue(id1);
+ double k2 = lambda.doubleValue(id2);
+ return Double.compare(k1, k2);
+ }
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer<D extends Distance<D>> extends AbstractParameterizer {
+ /**
+ * Extraction mode to use.
+ */
+ public static final OptionID MODE_ID = new OptionID("hierarchical.threshold-mode", "The thresholding mode to use for extracting clusters: by desired number of clusters, or by distance threshold.");
+
+ /**
+ * The minimum number of clusters to extract.
+ */
+ public static final OptionID MINCLUSTERS_ID = new OptionID("hierarchical.minclusters", "The minimum number of clusters to extract (there may be more clusters when tied).");
+
+ /**
+ * The threshold level for which to extract the clustering.
+ */
+ public static final OptionID THRESHOLD_ID = new OptionID("hierarchical.threshold", "The threshold level for which to extract the clusters.");
+
+ /**
+ * Parameter to configure the output mode (nested or truncated clusters).
+ */
+ public static final OptionID OUTPUTMODE_ID = new OptionID("hierarchical.output-mode", "The output mode: a truncated cluster hierarchy, or a strict (flat) partitioning of the data set.");
+
+ /**
+ * Flag to produce singleton clusters.
+ */
+ public static final OptionID SINGLETONS_ID = new OptionID("hierarchical.singletons", "Do not avoid singleton clusters. This produces a more complex hierarchy.");
+
+ /**
+ * Number of clusters to extract.
+ */
+ int minclusters = -1;
+
+ /**
+ * Threshold level.
+ */
+ D threshold = null;
+
+ /**
+ * Flag to produce empty clusters to model the hierarchy above.
+ */
+ OutputMode outputmode = null;
+
+ /**
+ * The hierarchical clustering algorithm to run.
+ */
+ HierarchicalClusteringAlgorithm<D> algorithm;
+
+ /**
+ * Threshold mode.
+ */
+ ThresholdMode thresholdmode = null;
+
+ /**
+ * Also create singleton clusters.
+ */
+ boolean singletons = false;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ ObjectParameter<HierarchicalClusteringAlgorithm<D>> algorithmP = new ObjectParameter<>(AlgorithmStep.Parameterizer.ALGORITHM_ID, HierarchicalClusteringAlgorithm.class);
+ if (config.grab(algorithmP)) {
+ algorithm = algorithmP.instantiateClass(config);
+ }
+
+ EnumParameter<ThresholdMode> modeP = new EnumParameter<>(MODE_ID, ThresholdMode.class, ThresholdMode.BY_MINCLUSTERS);
+ if (config.grab(modeP)) {
+ thresholdmode = modeP.getValue();
+ }
+
+ if (thresholdmode == null || ThresholdMode.BY_MINCLUSTERS.equals(thresholdmode)) {
+ IntParameter minclustersP = new IntParameter(MINCLUSTERS_ID);
+ minclustersP.addConstraint(new GreaterEqualConstraint(1));
+ if (config.grab(minclustersP)) {
+ minclusters = minclustersP.intValue();
+ }
+ }
+
+ if (thresholdmode == null || ThresholdMode.BY_THRESHOLD.equals(thresholdmode)) {
+ // Fallback to double when no algorithm chosen yet:
+ @SuppressWarnings("unchecked")
+ final D factory = algorithm != null ? algorithm.getDistanceFactory() : (D) DoubleDistance.FACTORY;
+ DistanceParameter<D> distP = new DistanceParameter<>(THRESHOLD_ID, factory);
+ if (config.grab(distP)) {
+ threshold = distP.getValue();
+ }
+ }
+
+ if (thresholdmode == null || !ThresholdMode.NO_THRESHOLD.equals(thresholdmode)) {
+ EnumParameter<OutputMode> outputP = new EnumParameter<>(OUTPUTMODE_ID, OutputMode.class);
+ if (config.grab(outputP)) {
+ outputmode = outputP.getValue();
+ }
+ } else {
+ // This becomes full hierarchy:
+ minclusters = -1;
+ outputmode = OutputMode.PARTIAL_HIERARCHY;
+ }
+
+ Flag singletonsF = new Flag(SINGLETONS_ID);
+ if (config.grab(singletonsF)) {
+ singletons = singletonsF.isTrue();
+ }
+ }
+
+ @Override
+ protected ExtractFlatClusteringFromHierarchy<D> makeInstance() {
+ switch(thresholdmode) {
+ case NO_THRESHOLD:
+ case BY_MINCLUSTERS:
+ return new ExtractFlatClusteringFromHierarchy<>(algorithm, minclusters, outputmode, singletons);
+ case BY_THRESHOLD:
+ return new ExtractFlatClusteringFromHierarchy<>(algorithm, threshold, outputmode, singletons);
+ default:
+ throw new AbortException("Unknown extraction mode.");
+ }
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/GroupAverageLinkageMethod.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/GroupAverageLinkageMethod.java
new file mode 100644
index 00000000..079fb69b
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/GroupAverageLinkageMethod.java
@@ -0,0 +1,82 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.utilities.Alias;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+
+/**
+ * Group-average linkage clustering method.
+ *
+ * Reference:
+ * <p>
+ * A. K. Jain and R. C. Dubes<br />
+ * Algorithms for Clustering Data<br />
+ * Prentice-Hall
+ * </p>
+ *
+ * @author Erich Schubert
+ */
+@Alias({ "upgma", "average", "average-link", "average-linkage", "UPGMA" })
+@Reference(authors = "A. K. Jain and R. C. Dubes", title = "Algorithms for Clustering Data", booktitle = "Algorithms for Clustering Data, Prentice-Hall")
+public class GroupAverageLinkageMethod implements LinkageMethod {
+ /**
+ * Static instance of class.
+ */
+ public static final GroupAverageLinkageMethod STATIC = new GroupAverageLinkageMethod();
+
+ /**
+ * Constructor.
+ *
+ * @deprecated use the static instance {@link #STATIC} instead.
+ */
+ @Deprecated
+ public GroupAverageLinkageMethod() {
+ super();
+ }
+
+ @Override
+ public double combine(int sizex, double dx, int sizey, double dy, int sizej, double dxy) {
+ final double wx = sizex / (double) (sizex + sizey);
+ final double wy = sizey / (double) (sizex + sizey);
+ return wx * dx + wy * dy;
+ }
+
+ /**
+ * Class parameterizer.
+ *
+ * Returns the static instance.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ @Override
+ protected GroupAverageLinkageMethod makeInstance() {
+ return STATIC;
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/HierarchicalClusteringAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/HierarchicalClusteringAlgorithm.java
new file mode 100644
index 00000000..f3595d51
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/HierarchicalClusteringAlgorithm.java
@@ -0,0 +1,51 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+import de.lmu.ifi.dbs.elki.algorithm.Algorithm;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
+
+/**
+ * Interface for hierarchical clustering algorithms.
+ *
+ * This interface allows the algorithms to be used by e.g.
+ * {@link ExtractFlatClusteringFromHierarchy}.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.has PointerHierarchyRepresentationResult
+ *
+ * @param <D> Distance type
+ */
+public interface HierarchicalClusteringAlgorithm<D extends Distance<D>> extends Algorithm {
+ @Override
+ public PointerHierarchyRepresentationResult<D> run(Database db);
+
+ /**
+ * Return the distance type that will be used by the algorithm.
+ *
+ * @return Distance factory.
+ */
+ public D getDistanceFactory();
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/LinkageMethod.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/LinkageMethod.java
new file mode 100644
index 00000000..68d0b4d8
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/LinkageMethod.java
@@ -0,0 +1,56 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical;
+
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/**
+ * Abstract interface for implementing a new linkage method into hierarchical
+ * clustering.
+ *
+ * Reference:
+ * <p>
+ * G. N. Lance and W. T. Williams<br />
+ * A general theory of classificatory sorting strategies 1. Hierarchical systems
+ * <br/>
+ * The computer journal 9.4 (1967): 373-380.
+ * </p>
+ *
+ * @author Erich Schubert
+ */
+@Reference(authors = "G. N. Lance and W. T. Williams", title = "A general theory of classificatory sorting strategies 1. Hierarchical systems", booktitle = "The computer journal 9.4", url = "http://dx.doi.org/ 10.1093/comjnl/9.4.373")
+public interface LinkageMethod {
+ /**
+ * Compute combined linkage for two clusters.
+ *
+ * @param sizex Size of first cluster x before merging
+ * @param dx Distance of cluster x to j before merging
+ * @param sizey Size of second cluster y before merging
+ * @param dy Distance of cluster y to j before merging
+ * @param sizej Size of candidate cluster j
+ * @param dxy Distance between clusters x and y before merging
+ * @return Combined distance
+ */
+ double combine(int sizex, double dx, int sizey, double dy, int sizej, double dxy);
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/MedianLinkageMethod.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/MedianLinkageMethod.java
new file mode 100644
index 00000000..fe167cec
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/MedianLinkageMethod.java
@@ -0,0 +1,80 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+import de.lmu.ifi.dbs.elki.utilities.Alias;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+
+/**
+ * Median-linkage clustering method: Weighted pair group method using centroids
+ * (WPGMC).
+ *
+ * Reference:
+ * <p>
+ * J.C. Gower<br/>
+ * A comparison of some methods of cluster analysis<br/>
+ * Biometrics (1967): 623-637.
+ * </p>
+ *
+ * @author Erich Schubert
+ */
+@Reference(authors = "J. C. Gower", title = "A comparison of some methods of cluster analysis", booktitle = "Biometrics (1967)", url = "http://www.jstor.org/stable/10.2307/2528417")
+@Alias({ "wpgmc", "WPGMC", "weighted-centroid" })
+public class MedianLinkageMethod implements LinkageMethod {
+ /**
+ * Static instance of class.
+ */
+ public static final MedianLinkageMethod STATIC = new MedianLinkageMethod();
+
+ /**
+ * Constructor.
+ *
+ * @deprecated use the static instance {@link #STATIC} instead.
+ */
+ @Deprecated
+ public MedianLinkageMethod() {
+ super();
+ }
+
+ @Override
+ public double combine(int sizex, double dx, int sizey, double dy, int sizej, double dxy) {
+ return .5 * (dx + dy) - .25 * dxy;
+ }
+
+ /**
+ * Class parameterizer.
+ *
+ * Returns the static instance.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ @Override
+ protected MedianLinkageMethod makeInstance() {
+ return STATIC;
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/NaiveAgglomerativeHierarchicalClustering.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/NaiveAgglomerativeHierarchicalClustering.java
new file mode 100644
index 00000000..ee3052a4
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/NaiveAgglomerativeHierarchicalClustering.java
@@ -0,0 +1,303 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDBIDDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDistanceDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+
+/**
+ * This tutorial will step you through implementing a well known clustering
+ * algorithm, agglomerative hierarchical clustering, in multiple steps.
+ *
+ * This is the third step, where we add support for different linkage
+ * strategies.
+ *
+ * This is the naive O(n^3) algorithm. See {@link SLINK} for a much faster
+ * algorithm (however, only for single-linkage).
+ *
+ * Reference for the unified concept:
+ * <p>
+ * G. N. Lance and W. T. Williams<br />
+ * A general theory of classificatory sorting strategies 1. Hierarchical systems
+ * <br/>
+ * The computer journal 9.4 (1967): 373-380.
+ * </p>
+ *
+ * See also:
+ * <p>
+ * A Review of Classification<br />
+ * R. M. Cormack<br />
+ * Journal of the Royal Statistical Society. Series A, Vol. 134, No. 3
+ * </p>
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.composedOf LinkageMethod
+ *
+ * @param <O> Object type
+ */
+@Reference(authors = "G. N. Lance and W. T. Williams", title = "A general theory of classificatory sorting strategies 1. Hierarchical systems", booktitle = "The computer journal 9.4", url = "http://dx.doi.org/ 10.1093/comjnl/9.4.373")
+public class NaiveAgglomerativeHierarchicalClustering<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, PointerHierarchyRepresentationResult<DoubleDistance>> implements HierarchicalClusteringAlgorithm<DoubleDistance> {
+ /**
+ * Class logger
+ */
+ private static final Logging LOG = Logging.getLogger(NaiveAgglomerativeHierarchicalClustering.class);
+
+ /**
+ * Current linkage method in use.
+ */
+ LinkageMethod linkage = WardLinkageMethod.STATIC;
+
+ /**
+ * Constructor.
+ *
+ * @param distanceFunction Distance function to use
+ * @param linkage Linkage method
+ */
+ public NaiveAgglomerativeHierarchicalClustering(DistanceFunction<? super O, D> distanceFunction, LinkageMethod linkage) {
+ super(distanceFunction);
+ this.linkage = linkage;
+ }
+
+ /**
+ * Run the algorithm
+ *
+ * @param db Database
+ * @param relation Relation
+ * @return Clustering hierarchy
+ */
+ public PointerHierarchyRepresentationResult<DoubleDistance> run(Database db, Relation<O> relation) {
+ DistanceQuery<O, D> dq = db.getDistanceQuery(relation, getDistanceFunction());
+ ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
+ final int size = ids.size();
+
+ if (size > 0x10000) {
+ throw new AbortException("This implementation does not scale to data sets larger than " + 0x10000 + " instances (~17 GB RAM), which results in an integer overflow.");
+ }
+ if (SingleLinkageMethod.class.isInstance(linkage)) {
+ LOG.verbose("Notice: SLINK is a much faster algorithm for single-linkage clustering!");
+ }
+
+ // Compute the initial (lower triangular) distance matrix.
+ double[] scratch = new double[triangleSize(size)];
+ DBIDArrayIter ix = ids.iter(), iy = ids.iter(), ij = ids.iter();
+ // Position counter - must agree with computeOffset!
+ int pos = 0;
+ boolean square = WardLinkageMethod.class.isInstance(linkage) && !(SquaredEuclideanDistanceFunction.class.isInstance(getDistanceFunction()));
+ for (ix.seek(0); ix.valid(); ix.advance()) {
+ for (iy.seek(0); iy.getOffset() < ix.getOffset(); iy.advance()) {
+ scratch[pos] = dq.distance(ix, iy).doubleValue();
+ // Ward uses variances -- i.e. squared values
+ if (square) {
+ scratch[pos] *= scratch[pos];
+ }
+ pos++;
+ }
+ }
+
+ // Initialize space for result:
+ WritableDBIDDataStore pi = DataStoreUtil.makeDBIDStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
+ WritableDoubleDistanceDataStore lambda = DataStoreUtil.makeDoubleDistanceStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
+ WritableIntegerDataStore csize = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
+ for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
+ pi.put(it, it);
+ lambda.put(it, Double.POSITIVE_INFINITY);
+ csize.put(it, 1);
+ }
+
+ // Repeat until everything merged into 1 cluster
+ FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Agglomerative clustering", size - 1, LOG) : null;
+ for (int i = 1; i < size; i++) {
+ double mindist = Double.POSITIVE_INFINITY;
+ int x = -1, y = -1;
+ for (ix.seek(0); ix.valid(); ix.advance()) {
+ if (lambda.doubleValue(ix) < Double.POSITIVE_INFINITY) {
+ continue;
+ }
+ final int xbase = triangleSize(ix.getOffset());
+ for (iy.seek(0); iy.getOffset() < ix.getOffset(); iy.advance()) {
+ if (lambda.doubleValue(iy) < Double.POSITIVE_INFINITY) {
+ continue;
+ }
+ final int idx = xbase + iy.getOffset();
+ if (scratch[idx] <= mindist) {
+ mindist = scratch[idx];
+ x = ix.getOffset();
+ y = iy.getOffset();
+ }
+ }
+ }
+ assert (x >= 0 && y >= 0);
+ // Avoid allocating memory, by reusing existing iterators:
+ ix.seek(x);
+ iy.seek(y);
+ if (LOG.isDebuggingFine()) {
+ LOG.debugFine("Merging: " + DBIDUtil.toString(ix) + " -> " + DBIDUtil.toString(iy));
+ }
+ // Perform merge in data structure: x -> y
+ // Since y < x, prefer keeping y, dropping x.
+ lambda.put(ix, mindist);
+ pi.put(ix, iy);
+ // Merge into cluster
+ int sizex = csize.intValue(ix), sizey = csize.intValue(iy);
+ csize.put(iy, sizex + sizey);
+
+ // Update distance matrix. Note: miny < minx
+
+ // Implementation note: most will not need sizej, and could save the
+ // hashmap lookup.
+ final int xbase = triangleSize(x), ybase = triangleSize(y);
+
+ ij.seek(0);
+ // Write to (y, j), with j < y
+ for (; ij.getOffset() < y; ij.advance()) {
+ if (lambda.doubleValue(ij) < Double.POSITIVE_INFINITY) {
+ continue;
+ }
+ final int sizej = csize.intValue(ij);
+ scratch[ybase + ij.getOffset()] = linkage.combine(sizex, scratch[xbase + ij.getOffset()], sizey, scratch[ybase + ij.getOffset()], sizej, mindist);
+ }
+ ij.advance(); // Skip y
+ // Write to (j, y), with y < j < x
+ for (; ij.getOffset() < x; ij.advance()) {
+ if (lambda.doubleValue(ij) < Double.POSITIVE_INFINITY) {
+ continue;
+ }
+ final int jbase = triangleSize(ij.getOffset());
+ final int sizej = csize.intValue(ij);
+ scratch[jbase + y] = linkage.combine(sizex, scratch[xbase + ij.getOffset()], sizey, scratch[jbase + y], sizej, mindist);
+ }
+ ij.advance(); // Skip x
+ // Write to (j, y), with y < x < j
+ for (; ij.valid(); ij.advance()) {
+ if (lambda.doubleValue(ij) < Double.POSITIVE_INFINITY) {
+ continue;
+ }
+ final int sizej = csize.intValue(ij);
+ final int jbase = triangleSize(ij.getOffset());
+ scratch[jbase + y] = linkage.combine(sizex, scratch[jbase + x], sizey, scratch[jbase + y], sizej, mindist);
+ }
+ if (prog != null) {
+ prog.incrementProcessed(LOG);
+ }
+ }
+ if (prog != null) {
+ prog.ensureCompleted(LOG);
+ }
+
+ return new PointerHierarchyRepresentationResult<>(ids, pi, lambda);
+ }
+
+ /**
+ * Compute the size of a complete x by x triangle (minus diagonal)
+ *
+ * @param x Offset
+ * @return Size of complete triangle
+ */
+ protected static int triangleSize(int x) {
+ return (x * (x - 1)) >>> 1;
+ }
+
+ @Override
+ public DoubleDistance getDistanceFactory() {
+ return DoubleDistance.FACTORY;
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ // The input relation must match our distance function:
+ return TypeUtil.array(getDistanceFunction().getInputTypeRestriction());
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ *
+ * @param <O> Object type
+ * @param <D> Distance type
+ */
+ public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ /**
+ * Option ID for linkage parameter.
+ */
+ public static final OptionID LINKAGE_ID = new OptionID("hierarchical.linkage", "Linkage method to use (e.g. Ward, Single-Link)");
+
+ /**
+ * Current linkage in use.
+ */
+ protected LinkageMethod linkage;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ // We don't call super, because we want a different default distance.
+ ObjectParameter<DistanceFunction<O, D>> distanceFunctionP = makeParameterDistanceFunction(SquaredEuclideanDistanceFunction.class, DistanceFunction.class);
+ if (config.grab(distanceFunctionP)) {
+ distanceFunction = distanceFunctionP.instantiateClass(config);
+ }
+
+ ObjectParameter<LinkageMethod> linkageP = new ObjectParameter<>(LINKAGE_ID, LinkageMethod.class);
+ linkageP.setDefaultValue(WardLinkageMethod.class);
+ if (config.grab(linkageP)) {
+ linkage = linkageP.instantiateClass(config);
+ }
+ }
+
+ @Override
+ protected NaiveAgglomerativeHierarchicalClustering<O, D> makeInstance() {
+ return new NaiveAgglomerativeHierarchicalClustering<>(distanceFunction, linkage);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/PointerHierarchyRepresentationResult.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/PointerHierarchyRepresentationResult.java
new file mode 100644
index 00000000..c339fb09
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/PointerHierarchyRepresentationResult.java
@@ -0,0 +1,97 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.database.datastore.DBIDDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStore;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
+import de.lmu.ifi.dbs.elki.result.BasicResult;
+
+/**
+ * The pointer representation of a hierarchical clustering. Each object is
+ * represented by a parent object and the distance at which it joins the parent
+ * objects cluster.
+ *
+ * @author Erich Schubert
+ *
+ * @param <D> Distance type
+ */
+public class PointerHierarchyRepresentationResult<D extends Distance<D>> extends BasicResult {
+ /**
+ * The DBIDs in this result.
+ */
+ DBIDs ids;
+
+ /**
+ * The parent DBID relation.
+ */
+ DBIDDataStore parent;
+
+ /**
+ * Distance to the parent object.
+ */
+ DataStore<D> parentDistance;
+
+ /**
+ * Constructor.
+ *
+ * @param ids IDs processed.
+ * @param parent Parent pointer.
+ * @param parentDistance Distance to parent.
+ */
+ public PointerHierarchyRepresentationResult(DBIDs ids, DBIDDataStore parent, DataStore<D> parentDistance) {
+ super("Pointer Representation", "pointer-representation");
+ this.ids = ids;
+ this.parent = parent;
+ this.parentDistance = parentDistance;
+ }
+
+ /**
+ * Get the clustered DBIDs.
+ *
+ * @return DBIDs
+ */
+ public DBIDs getDBIDs() {
+ return ids;
+ }
+
+ /**
+ * Get the parent DBID relation.
+ *
+ * @return Parent relation.
+ */
+ public DBIDDataStore getParentStore() {
+ return parent;
+ }
+
+ /**
+ * Get the distance to the parent.
+ *
+ * @return Parent distance.
+ */
+ public DataStore<D> getParentDistanceStore() {
+ return parentDistance;
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/SLINK.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/SLINK.java
new file mode 100644
index 00000000..f1b58868
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/SLINK.java
@@ -0,0 +1,368 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDBIDDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDistanceDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDVar;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.DistanceUtil;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.utilities.Alias;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
+
+/**
+ * Implementation of the efficient Single-Link Algorithm SLINK of R. Sibson.
+ *
+ * <p>
+ * Reference:<br />
+ * R. Sibson: SLINK: An optimally efficient algorithm for the single-link
+ * cluster method. <br/>
+ * In: The Computer Journal 16 (1973), No. 1, p. 30-34.
+ * </p>
+ *
+ * @author Elke Achtert
+ * @author Erich Schubert
+ *
+ * @apiviz.has SingleLinkageMethod
+ *
+ * @param <O> the type of DatabaseObject the algorithm is applied on
+ * @param <D> the type of Distance used
+ */
+@Title("SLINK: Single Link Clustering")
+@Description("Hierarchical clustering algorithm based on single-link connectivity.")
+@Reference(authors = "R. Sibson", title = "SLINK: An optimally efficient algorithm for the single-link cluster method", booktitle = "The Computer Journal 16 (1973), No. 1, p. 30-34.", url = "http://dx.doi.org/10.1093/comjnl/16.1.30")
+@Alias(value = { "de.lmu.ifi.dbs.elki.algorithm.clustering.SLINK", "clustering.SLINK", "SLINK", "single-link", "single-linkage" })
+public class SLINK<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorithm<O, D, PointerHierarchyRepresentationResult<D>> implements HierarchicalClusteringAlgorithm<D> {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging LOG = Logging.getLogger(SLINK.class);
+
+ /**
+ * Constructor.
+ *
+ * @param distanceFunction Distance function
+ */
+ public SLINK(DistanceFunction<? super O, D> distanceFunction) {
+ super(distanceFunction);
+ }
+
+ /**
+ * Performs the SLINK algorithm on the given database.
+ */
+ public PointerHierarchyRepresentationResult<D> run(Database database, Relation<O> relation) {
+ DBIDs ids = relation.getDBIDs();
+ DistanceQuery<O, D> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
+ @SuppressWarnings("unchecked")
+ Class<D> distCls = (Class<D>) getDistanceFunction().getDistanceFactory().getClass();
+ WritableDBIDDataStore pi = DataStoreUtil.makeDBIDStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
+ WritableDataStore<D> lambda = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, distCls);
+ // Temporary storage for m.
+ WritableDataStore<D> m = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, distCls);
+
+ FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Running SLINK", ids.size(), LOG) : null;
+ // has to be an array for monotonicity reasons!
+ ModifiableDBIDs processedIDs = DBIDUtil.newArray(ids.size());
+
+ // Optimized code path for double distances
+ if (getDistanceFunction() instanceof PrimitiveDoubleDistanceFunction && lambda instanceof WritableDoubleDistanceDataStore && m instanceof WritableDoubleDistanceDataStore) {
+ @SuppressWarnings("unchecked")
+ PrimitiveDoubleDistanceFunction<? super O> dist = (PrimitiveDoubleDistanceFunction<? super O>) getDistanceFunction();
+ WritableDoubleDistanceDataStore lambdad = (WritableDoubleDistanceDataStore) lambda;
+ WritableDoubleDistanceDataStore md = (WritableDoubleDistanceDataStore) m;
+ // apply the algorithm
+ for (DBIDIter id = ids.iter(); id.valid(); id.advance()) {
+ step1double(id, pi, lambdad);
+ step2double(id, processedIDs, distQuery.getRelation(), dist, md);
+ step3double(id, pi, lambdad, processedIDs, md);
+ step4double(id, pi, lambdad, processedIDs);
+
+ processedIDs.add(id);
+
+ if (progress != null) {
+ progress.incrementProcessed(LOG);
+ }
+ }
+ } else {
+ // apply the algorithm
+ for (DBIDIter id = ids.iter(); id.valid(); id.advance()) {
+ step1(id, pi, lambda);
+ step2(id, processedIDs, distQuery, m);
+ step3(id, pi, lambda, processedIDs, m);
+ step4(id, pi, lambda, processedIDs);
+
+ processedIDs.add(id);
+
+ if (progress != null) {
+ progress.incrementProcessed(LOG);
+ }
+ }
+ }
+
+ if (progress != null) {
+ progress.ensureCompleted(LOG);
+ }
+ // We don't need m anymore.
+ m.destroy();
+ m = null;
+
+ return new PointerHierarchyRepresentationResult<>(ids, pi, lambda);
+ }
+
+ /**
+ * First step: Initialize P(id) = id, L(id) = infinity.
+ *
+ * @param id the id of the object to be inserted into the pointer
+ * representation
+ * @param pi Pi data store
+ * @param lambda Lambda data store
+ */
+ private void step1(DBIDRef id, WritableDBIDDataStore pi, WritableDataStore<D> lambda) {
+ // P(n+1) = n+1:
+ pi.put(id, id);
+ // L(n+1) = infinity
+ lambda.put(id, getDistanceFunction().getDistanceFactory().infiniteDistance());
+ }
+
+ /**
+ * Second step: Determine the pairwise distances from all objects in the
+ * pointer representation to the new object with the specified id.
+ *
+ * @param id the id of the object to be inserted into the pointer
+ * representation
+ * @param processedIDs the already processed ids
+ * @param m Data store
+ * @param distFunc Distance function to use
+ */
+ private void step2(DBIDRef id, DBIDs processedIDs, DistanceQuery<O, D> distFunc, WritableDataStore<D> m) {
+ O newObj = distFunc.getRelation().get(id);
+ for (DBIDIter it = processedIDs.iter(); it.valid(); it.advance()) {
+ // M(i) = dist(i, n+1)
+ m.put(it, distFunc.distance(it, newObj));
+ }
+ }
+
+ /**
+ * Third step: Determine the values for P and L
+ *
+ * @param id the id of the object to be inserted into the pointer
+ * representation
+ * @param pi Pi data store
+ * @param lambda Lambda data store
+ * @param processedIDs the already processed ids
+ * @param m Data store
+ */
+ private void step3(DBIDRef id, WritableDBIDDataStore pi, WritableDataStore<D> lambda, DBIDs processedIDs, WritableDataStore<D> m) {
+ DBIDVar p_i = DBIDUtil.newVar();
+ // for i = 1..n
+ for (DBIDIter it = processedIDs.iter(); it.valid(); it.advance()) {
+ D l_i = lambda.get(it);
+ D m_i = m.get(it);
+ pi.assignVar(it, p_i); // p_i = pi(it)
+ D mp_i = m.get(p_i);
+
+ // if L(i) >= M(i)
+ if (l_i.compareTo(m_i) >= 0) {
+ // M(P(i)) = min { M(P(i)), L(i) }
+ m.put(p_i, DistanceUtil.min(mp_i, l_i));
+
+ // L(i) = M(i)
+ lambda.put(it, m_i);
+
+ // P(i) = n+1;
+ pi.put(it, id);
+ } else {
+ // M(P(i)) = min { M(P(i)), M(i) }
+ m.put(p_i, DistanceUtil.min(mp_i, m_i));
+ }
+ }
+ }
+
+ /**
+ * Fourth step: Actualize the clusters if necessary
+ *
+ * @param id the id of the current object
+ * @param pi Pi data store
+ * @param lambda Lambda data store
+ * @param processedIDs the already processed ids
+ */
+ private void step4(DBIDRef id, WritableDBIDDataStore pi, WritableDataStore<D> lambda, DBIDs processedIDs) {
+ DBIDVar p_i = DBIDUtil.newVar();
+ // for i = 1..n
+ for (DBIDIter it = processedIDs.iter(); it.valid(); it.advance()) {
+ D l_i = lambda.get(it);
+ pi.assignVar(it, p_i); // p_i = pi(it)
+ D lp_i = lambda.get(p_i);
+
+ // if L(i) >= L(P(i))
+ if (l_i.compareTo(lp_i) >= 0) {
+ // P(i) = n+1
+ pi.put(it, id);
+ }
+ }
+ }
+
+ /**
+ * First step: Initialize P(id) = id, L(id) = infinity.
+ *
+ * @param id the id of the object to be inserted into the pointer
+ * representation
+ * @param pi Pi data store
+ * @param lambda Lambda data store
+ */
+ private void step1double(DBIDRef id, WritableDBIDDataStore pi, WritableDoubleDistanceDataStore lambda) {
+ // P(n+1) = n+1:
+ pi.put(id, id);
+ // L(n+1) = infinity
+ lambda.putDouble(id, Double.POSITIVE_INFINITY);
+ }
+
+ /**
+ * Second step: Determine the pairwise distances from all objects in the
+ * pointer representation to the new object with the specified id.
+ *
+ * @param id the id of the object to be inserted into the pointer
+ * representation
+ * @param processedIDs the already processed ids
+ * @param m Data store
+ * @param relation Data relation
+ * @param distFunc Distance function to use
+ */
+ private void step2double(DBIDRef id, DBIDs processedIDs, Relation<? extends O> relation, PrimitiveDoubleDistanceFunction<? super O> distFunc, WritableDoubleDistanceDataStore m) {
+ O newObj = relation.get(id);
+ for (DBIDIter it = processedIDs.iter(); it.valid(); it.advance()) {
+ // M(i) = dist(i, n+1)
+ m.putDouble(it, distFunc.doubleDistance(relation.get(it), newObj));
+ }
+ }
+
+ /**
+ * Third step: Determine the values for P and L
+ *
+ * @param id the id of the object to be inserted into the pointer
+ * representation
+ * @param pi Pi data store
+ * @param lambda Lambda data store
+ * @param processedIDs the already processed ids
+ * @param m Data store
+ */
+ private void step3double(DBIDRef id, WritableDBIDDataStore pi, WritableDoubleDistanceDataStore lambda, DBIDs processedIDs, WritableDoubleDistanceDataStore m) {
+ DBIDVar p_i = DBIDUtil.newVar();
+ // for i = 1..n
+ for (DBIDIter it = processedIDs.iter(); it.valid(); it.advance()) {
+ double l_i = lambda.doubleValue(it);
+ double m_i = m.doubleValue(it);
+ pi.assignVar(it, p_i); // p_i = pi(it)
+ double mp_i = m.doubleValue(p_i);
+
+ // if L(i) >= M(i)
+ if (l_i >= m_i) {
+ // M(P(i)) = min { M(P(i)), L(i) }
+ m.putDouble(p_i, Math.min(mp_i, l_i));
+
+ // L(i) = M(i)
+ lambda.putDouble(it, m_i);
+
+ // P(i) = n+1;
+ pi.put(it, id);
+ } else {
+ // M(P(i)) = min { M(P(i)), M(i) }
+ m.putDouble(p_i, Math.min(mp_i, m_i));
+ }
+ }
+ }
+
+ /**
+ * Fourth step: Actualize the clusters if necessary
+ *
+ * @param id the id of the current object
+ * @param pi Pi data store
+ * @param lambda Lambda data store
+ * @param processedIDs the already processed ids
+ */
+ private void step4double(DBIDRef id, WritableDBIDDataStore pi, WritableDoubleDistanceDataStore lambda, DBIDs processedIDs) {
+ DBIDVar p_i = DBIDUtil.newVar();
+ // for i = 1..n
+ for (DBIDIter it = processedIDs.iter(); it.valid(); it.advance()) {
+ double l_i = lambda.doubleValue(it);
+ pi.assignVar(it, p_i); // p_i = pi(it)
+ double lp_i = lambda.doubleValue(p_i);
+
+ // if L(i) >= L(P(i))
+ if (l_i >= lp_i) {
+ // P(i) = n+1
+ pi.put(it, id);
+ }
+ }
+ }
+
+ @Override
+ public D getDistanceFactory() {
+ return getDistanceFunction().getDistanceFactory();
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(getDistanceFunction().getInputTypeRestriction());
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ @Override
+ protected SLINK<O, D> makeInstance() {
+ return new SLINK<>(distanceFunction);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/SingleLinkageMethod.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/SingleLinkageMethod.java
new file mode 100644
index 00000000..7ef81692
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/SingleLinkageMethod.java
@@ -0,0 +1,80 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.utilities.Alias;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+
+/**
+ * Single-linkage clustering method.
+ *
+ * Reference:
+ * <p>
+ * K. Florek and J. Łukaszewicz and J. Perkal and H. Steinhaus and S. Zubrzycki<br/>
+ * Sur la liaison et la division des points d'un ensemble fini<br />
+ * In Colloquium Mathematicae (Vol. 2, No. 3-4)
+ * </p>
+ *
+ * @author Erich Schubert
+ */
+@Reference(authors = "K. Florek and J. Łukaszewicz and J. Perkal and H. Steinhaus and S. Zubrzycki", title = "Sur la liaison et la division des points d'un ensemble fini", booktitle = "Colloquium Mathematicae (Vol. 2, No. 3-4)")
+@Alias({ "single-link", "single", "slink", "nearest", "nearest-neighbor" })
+public class SingleLinkageMethod implements LinkageMethod {
+ /**
+ * Static instance of class.
+ */
+ public static final SingleLinkageMethod STATIC = new SingleLinkageMethod();
+
+ /**
+ * Constructor.
+ *
+ * @deprecated use the static instance {@link #STATIC} instead.
+ */
+ @Deprecated
+ public SingleLinkageMethod() {
+ super();
+ }
+
+ @Override
+ public double combine(int sizex, double dx, int sizey, double dy, int sizej, double dxy) {
+ return Math.min(dx, dy);
+ }
+
+ /**
+ * Class parameterizer.
+ *
+ * Returns the static instance.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ @Override
+ protected SingleLinkageMethod makeInstance() {
+ return STATIC;
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/WardLinkageMethod.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/WardLinkageMethod.java
new file mode 100644
index 00000000..488f011c
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/WardLinkageMethod.java
@@ -0,0 +1,86 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.Alias;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+
+/**
+ * Ward's method clustering method.
+ *
+ * This criterion minimizes variances, and makes most sense when used with
+ * squared Euclidean distance, see
+ * {@link de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction}
+ *
+ * Reference:
+ * <p>
+ * Ward Jr, Joe H.<br />
+ * Hierarchical grouping to optimize an objective function<br />
+ * Journal of the American statistical association 58.301 (1963): 236-244.
+ * </p>
+ *
+ * @author Erich Schubert
+ */
+@Reference(authors = "J. H. Ward Jr", title = "Hierarchical grouping to optimize an objective function", booktitle = "Journal of the American statistical association 58.301", url = "http://dx.doi.org/10.1080/01621459.1963.10500845")
+@Alias({ "ward", "variance" })
+public class WardLinkageMethod implements LinkageMethod {
+ /**
+ * Static instance of class.
+ */
+ public static final WardLinkageMethod STATIC = new WardLinkageMethod();
+
+ /**
+ * Constructor.
+ *
+ * @deprecated use the static instance {@link #STATIC} instead.
+ */
+ @Deprecated
+ public WardLinkageMethod() {
+ super();
+ }
+
+ @Override
+ public double combine(int sizex, double dx, int sizey, double dy, int sizej, double dxy) {
+ final double wx = (sizex + sizej) / (double) (sizex + sizey + sizej);
+ final double wy = (sizey + sizej) / (double) (sizex + sizey + sizej);
+ final double beta = sizej / (double) (sizex + sizey + sizej);
+ return wx * dx + wy * dy - beta * dxy;
+ }
+
+ /**
+ * Class parameterizer.
+ *
+ * Returns the static instance.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ @Override
+ protected WardLinkageMethod makeInstance() {
+ return STATIC;
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/WeightedAverageLinkageMethod.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/WeightedAverageLinkageMethod.java
new file mode 100644
index 00000000..ac0b17f5
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/WeightedAverageLinkageMethod.java
@@ -0,0 +1,84 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+import de.lmu.ifi.dbs.elki.utilities.Alias;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+
+/**
+ * Weighted average linkage clustering method.
+ *
+ * This is somewhat a misnomer, as it actually ignores that the clusters should
+ * likely be weighted differently according to their size when computing the
+ * average linkage. See {@link GroupAverageLinkageMethod} for the UPGMA method
+ * that uses the group size to weight the objects the same way.
+ *
+ * Reference:
+ * <p>
+ * A. K. Jain and R. C. Dubes<br />
+ * Algorithms for Clustering Data<br />
+ * Prentice-Hall
+ * </p>
+ *
+ * @author Erich Schubert
+ */
+@Reference(authors = "A. K. Jain and R. C. Dubes", title = "Algorithms for Clustering Data", booktitle = "Algorithms for Clustering Data, Prentice-Hall")
+@Alias({ "wpgma", "WPGMA" })
+public class WeightedAverageLinkageMethod implements LinkageMethod {
+ /**
+ * Static instance of class.
+ */
+ public static final WeightedAverageLinkageMethod STATIC = new WeightedAverageLinkageMethod();
+
+ /**
+ * Constructor.
+ *
+ * @deprecated use the static instance {@link #STATIC} instead.
+ */
+ @Deprecated
+ public WeightedAverageLinkageMethod() {
+ super();
+ }
+
+ @Override
+ public double combine(int sizex, double dx, int sizey, double dy, int sizej, double dxy) {
+ return .5 * (dx + dy);
+ }
+
+ /**
+ * Class parameterizer.
+ *
+ * Returns the static instance.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ @Override
+ protected WeightedAverageLinkageMethod makeInstance() {
+ return STATIC;
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeans.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeans.java
index 47855aad..dc1fa47c 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeans.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeans.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -43,9 +43,17 @@ import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
+import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
import de.lmu.ifi.dbs.elki.utilities.datastructures.QuickSelect;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
/**
* Abstract base class for k-means implementations.
@@ -59,7 +67,7 @@ import de.lmu.ifi.dbs.elki.utilities.datastructures.QuickSelect;
* @param <D> Distance type
* @param <M> Cluster model type
*/
-public abstract class AbstractKMeans<V extends NumberVector<?>, D extends Distance<D>, M extends MeanModel<V>> extends AbstractPrimitiveDistanceBasedAlgorithm<NumberVector<?>, D, Clustering<M>> implements KMeans, ClusteringAlgorithm<Clustering<M>> {
+public abstract class AbstractKMeans<V extends NumberVector<?>, D extends Distance<D>, M extends MeanModel<V>> extends AbstractPrimitiveDistanceBasedAlgorithm<NumberVector<?>, D, Clustering<M>> implements KMeans<V, D, M>, ClusteringAlgorithm<Clustering<M>> {
/**
* Holds the value of {@link #K_ID}.
*/
@@ -102,54 +110,53 @@ public abstract class AbstractKMeans<V extends NumberVector<?>, D extends Distan
protected boolean assignToNearestCluster(Relation<V> relation, List<? extends NumberVector<?>> means, List<? extends ModifiableDBIDs> clusters) {
boolean changed = false;
- if(getDistanceFunction() instanceof PrimitiveDoubleDistanceFunction) {
+ if (getDistanceFunction() instanceof PrimitiveDoubleDistanceFunction) {
@SuppressWarnings("unchecked")
final PrimitiveDoubleDistanceFunction<? super NumberVector<?>> df = (PrimitiveDoubleDistanceFunction<? super NumberVector<?>>) getDistanceFunction();
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double mindist = Double.POSITIVE_INFINITY;
V fv = relation.get(iditer);
int minIndex = 0;
- for(int i = 0; i < k; i++) {
+ for (int i = 0; i < k; i++) {
double dist = df.doubleDistance(fv, means.get(i));
- if(dist < mindist) {
+ if (dist < mindist) {
minIndex = i;
mindist = dist;
}
}
- if(clusters.get(minIndex).add(iditer)) {
+ if (clusters.get(minIndex).add(iditer)) {
changed = true;
// Remove from previous cluster
// TODO: keep a list of cluster assignments to save this search?
- for(int i = 0; i < k; i++) {
- if(i != minIndex) {
- if(clusters.get(i).remove(iditer)) {
+ for (int i = 0; i < k; i++) {
+ if (i != minIndex) {
+ if (clusters.get(i).remove(iditer)) {
break;
}
}
}
}
}
- }
- else {
+ } else {
final PrimitiveDistanceFunction<? super NumberVector<?>, D> df = getDistanceFunction();
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
D mindist = df.getDistanceFactory().infiniteDistance();
V fv = relation.get(iditer);
int minIndex = 0;
- for(int i = 0; i < k; i++) {
+ for (int i = 0; i < k; i++) {
D dist = df.distance(fv, means.get(i));
- if(dist.compareTo(mindist) < 0) {
+ if (dist.compareTo(mindist) < 0) {
minIndex = i;
mindist = dist;
}
}
- if(clusters.get(minIndex).add(iditer)) {
+ if (clusters.get(minIndex).add(iditer)) {
changed = true;
// Remove from previous cluster
// TODO: keep a list of cluster assignments to save this search?
- for(int i = 0; i < k; i++) {
- if(i != minIndex) {
- if(clusters.get(i).remove(iditer)) {
+ for (int i = 0; i < k; i++) {
+ if (i != minIndex) {
+ if (clusters.get(i).remove(iditer)) {
break;
}
}
@@ -174,21 +181,24 @@ public abstract class AbstractKMeans<V extends NumberVector<?>, D extends Distan
* @return the mean vectors of the given clusters in the given database
*/
protected List<Vector> means(List<? extends ModifiableDBIDs> clusters, List<? extends NumberVector<?>> means, Relation<V> database) {
- List<Vector> newMeans = new ArrayList<Vector>(k);
- for(int i = 0; i < k; i++) {
+ List<Vector> newMeans = new ArrayList<>(k);
+ for (int i = 0; i < k; i++) {
ModifiableDBIDs list = clusters.get(i);
Vector mean = null;
- if(list.size() > 0) {
+ if (list.size() > 0) {
double s = 1.0 / list.size();
DBIDIter iter = list.iter();
assert (iter.valid());
mean = database.get(iter).getColumnVector().timesEquals(s);
+ double[] raw = mean.getArrayRef();
iter.advance();
- for(; iter.valid(); iter.advance()) {
- mean.plusTimesEquals(database.get(iter).getColumnVector(), s);
+ for (; iter.valid(); iter.advance()) {
+ NumberVector<?> vec = database.get(iter);
+ for (int j = 0; j < mean.getDimensionality(); j++) {
+ raw[j] += s * vec.doubleValue(j);
+ }
}
- }
- else {
+ } else {
mean = means.get(i).getColumnVector();
}
newMeans.add(mean);
@@ -207,19 +217,18 @@ public abstract class AbstractKMeans<V extends NumberVector<?>, D extends Distan
protected List<NumberVector<?>> medians(List<? extends ModifiableDBIDs> clusters, List<? extends NumberVector<?>> medians, Relation<V> database) {
final int dim = medians.get(0).getDimensionality();
final SortDBIDsBySingleDimension sorter = new SortDBIDsBySingleDimension(database);
- List<NumberVector<?>> newMedians = new ArrayList<NumberVector<?>>(k);
- for(int i = 0; i < k; i++) {
+ List<NumberVector<?>> newMedians = new ArrayList<>(k);
+ for (int i = 0; i < k; i++) {
ArrayModifiableDBIDs list = DBIDUtil.newArray(clusters.get(i));
- if(list.size() > 0) {
+ if (list.size() > 0) {
Vector mean = new Vector(dim);
- for(int d = 0; d < dim; d++) {
+ for (int d = 0; d < dim; d++) {
sorter.setDimension(d);
DBID id = QuickSelect.median(list, sorter);
mean.set(d, database.get(id).doubleValue(d));
}
newMedians.add(mean);
- }
- else {
+ } else {
newMedians.add((NumberVector<?>) medians.get(i));
}
}
@@ -235,7 +244,7 @@ public abstract class AbstractKMeans<V extends NumberVector<?>, D extends Distan
* @param op Cluster size change / Weight change
*/
protected void incrementalUpdateMean(Vector mean, V vec, int newsize, double op) {
- if(newsize == 0) {
+ if (newsize == 0) {
return; // Keep old mean
}
Vector delta = vec.getColumnVector();
@@ -256,65 +265,62 @@ public abstract class AbstractKMeans<V extends NumberVector<?>, D extends Distan
protected boolean macQueenIterate(Relation<V> relation, List<Vector> means, List<ModifiableDBIDs> clusters) {
boolean changed = false;
- if(getDistanceFunction() instanceof PrimitiveDoubleDistanceFunction) {
+ if (getDistanceFunction() instanceof PrimitiveDoubleDistanceFunction) {
// Raw distance function
@SuppressWarnings("unchecked")
final PrimitiveDoubleDistanceFunction<? super NumberVector<?>> df = (PrimitiveDoubleDistanceFunction<? super NumberVector<?>>) getDistanceFunction();
// Incremental update
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double mindist = Double.POSITIVE_INFINITY;
V fv = relation.get(iditer);
int minIndex = 0;
- for(int i = 0; i < k; i++) {
+ for (int i = 0; i < k; i++) {
double dist = df.doubleDistance(fv, means.get(i));
- if(dist < mindist) {
+ if (dist < mindist) {
minIndex = i;
mindist = dist;
}
}
// Update the cluster mean incrementally:
- for(int i = 0; i < k; i++) {
+ for (int i = 0; i < k; i++) {
ModifiableDBIDs ci = clusters.get(i);
- if(i == minIndex) {
- if(ci.add(iditer)) {
+ if (i == minIndex) {
+ if (ci.add(iditer)) {
incrementalUpdateMean(means.get(i), fv, ci.size(), +1);
changed = true;
}
- }
- else if(ci.remove(iditer)) {
+ } else if (ci.remove(iditer)) {
incrementalUpdateMean(means.get(i), fv, ci.size() + 1, -1);
changed = true;
}
}
}
- }
- else {
+ } else {
// Raw distance function
final PrimitiveDistanceFunction<? super NumberVector<?>, D> df = getDistanceFunction();
// Incremental update
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
D mindist = df.getDistanceFactory().infiniteDistance();
V fv = relation.get(iditer);
int minIndex = 0;
- for(int i = 0; i < k; i++) {
+ for (int i = 0; i < k; i++) {
D dist = df.distance(fv, means.get(i));
- if(dist.compareTo(mindist) < 0) {
+ if (dist.compareTo(mindist) < 0) {
minIndex = i;
mindist = dist;
}
}
// Update the cluster mean incrementally:
- for(int i = 0; i < k; i++) {
+ for (int i = 0; i < k; i++) {
ModifiableDBIDs ci = clusters.get(i);
- if(i == minIndex) {
- if(ci.add(iditer)) {
+ if (i == minIndex) {
+ if (ci.add(iditer)) {
incrementalUpdateMean(means.get(i), fv, ci.size(), +1);
changed = true;
}
- }
- else if(ci.remove(iditer)) {
+ } else if (ci.remove(iditer)) {
incrementalUpdateMean(means.get(i), fv, ci.size() + 1, -1);
changed = true;
}
@@ -323,4 +329,76 @@ public abstract class AbstractKMeans<V extends NumberVector<?>, D extends Distan
}
return changed;
}
+
+ @Override
+ public void setK(int k) {
+ this.k = k;
+ }
+
+ @Override
+ public void setDistanceFunction(PrimitiveDistanceFunction<? super NumberVector<?>, D> distanceFunction) {
+ this.distanceFunction = distanceFunction;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public abstract static class Parameterizer<V extends NumberVector<?>, D extends Distance<D>> extends AbstractPrimitiveDistanceBasedAlgorithm.Parameterizer<NumberVector<?>, D> {
+ /**
+ * k Parameter.
+ */
+ protected int k;
+
+ /**
+ * Maximum number of iterations.
+ */
+ protected int maxiter;
+
+ /**
+ * Initialization method.
+ */
+ protected KMeansInitialization<V> initializer;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ ObjectParameter<PrimitiveDistanceFunction<NumberVector<?>, D>> distanceFunctionP = makeParameterDistanceFunction(SquaredEuclideanDistanceFunction.class, PrimitiveDistanceFunction.class);
+ if (config.grab(distanceFunctionP)) {
+ distanceFunction = distanceFunctionP.instantiateClass(config);
+ if (!(distanceFunction instanceof EuclideanDistanceFunction) && !(distanceFunction instanceof SquaredEuclideanDistanceFunction)) {
+ getLogger().warning("k-means optimizes the sum of squares - it should be used with squared euclidean distance and may stop converging otherwise!");
+ }
+ }
+
+ IntParameter kP = new IntParameter(K_ID);
+ kP.addConstraint(new GreaterConstraint(0));
+ if (config.grab(kP)) {
+ k = kP.getValue();
+ }
+
+ ObjectParameter<KMeansInitialization<V>> initialP = new ObjectParameter<>(INIT_ID, KMeansInitialization.class, RandomlyChosenInitialMeans.class);
+ if (config.grab(initialP)) {
+ initializer = initialP.instantiateClass(config);
+ }
+
+ IntParameter maxiterP = new IntParameter(MAXITER_ID, 0);
+ maxiterP.addConstraint(new GreaterEqualConstraint(0));
+ if (config.grab(maxiterP)) {
+ maxiter = maxiterP.getValue();
+ }
+ }
+
+ /**
+ * Get class logger.
+ *
+ * @return Logger
+ */
+ abstract protected Logging getLogger();
+
+ @Override
+ abstract protected AbstractKMeans<V, D, ?> makeInstance();
+ }
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeansInitialization.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeansInitialization.java
index 3a69c806..9e3eb478 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeansInitialization.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeansInitialization.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/BestOfMultipleKMeans.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/BestOfMultipleKMeans.java
new file mode 100644
index 00000000..30bb640c
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/BestOfMultipleKMeans.java
@@ -0,0 +1,219 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
+
+/*
+ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.quality.KMeansQualityMeasure;
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.model.MeanModel;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+
+/**
+ * Run K-Means multiple times, and keep the best run.
+ *
+ * @author Stephan Baier
+ * @author Erich Schubert
+ *
+ * @param <V> Vector type
+ * @param <D> Distance type
+ * @param <M> Model type
+ */
+public class BestOfMultipleKMeans<V extends NumberVector<?>, D extends Distance<?>, M extends MeanModel<V>> extends AbstractAlgorithm<Clustering<M>> implements KMeans<V, D, M> {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging LOG = Logging.getLogger(BestOfMultipleKMeans.class);
+
+ /**
+ * Number of trials to do.
+ */
+ private int trials;
+
+ /**
+ * Variant of kMeans for the bisecting step.
+ */
+ private KMeans<V, D, M> innerkMeans;
+
+ /**
+ * Quality measure which should be used.
+ */
+ private KMeansQualityMeasure<? super V, ? super D> qualityMeasure;
+
+ /**
+ * Constructor.
+ *
+ * @param trials Number of trials to do.
+ * @param innerkMeans K-Means variant to actually use.
+ * @param qualityMeasure Quality measure
+ */
+ public BestOfMultipleKMeans(int trials, KMeans<V, D, M> innerkMeans, KMeansQualityMeasure<? super V, ? super D> qualityMeasure) {
+ super();
+ this.trials = trials;
+ this.innerkMeans = innerkMeans;
+ this.qualityMeasure = qualityMeasure;
+ }
+
+ @Override
+ public Clustering<M> run(Database database, Relation<V> relation) {
+ if (!(innerkMeans.getDistanceFunction() instanceof PrimitiveDistanceFunction)) {
+ throw new AbortException("K-Means results can only be evaluated for primitive distance functions, got: " + innerkMeans.getDistanceFunction().getClass());
+ }
+ final PrimitiveDistanceFunction<? super V, D> df = (PrimitiveDistanceFunction<? super V, D>) innerkMeans.getDistanceFunction();
+ Clustering<M> bestResult = null;
+ if (trials > 1) {
+ double bestCost = Double.POSITIVE_INFINITY;
+ FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("K-means iterations", trials, LOG) : null;
+ for (int i = 0; i < trials; i++) {
+ Clustering<M> currentCandidate = innerkMeans.run(database, relation);
+ double currentCost = qualityMeasure.calculateCost(currentCandidate, df, relation);
+
+ if (LOG.isVerbose()) {
+ LOG.verbose("Cost of candidate " + i + ": " + currentCost);
+ }
+
+ if (currentCost < bestCost) {
+ bestResult = currentCandidate;
+ bestCost = currentCost;
+ }
+ if (prog != null) {
+ prog.incrementProcessed(LOG);
+ }
+ }
+ if (prog != null) {
+ prog.ensureCompleted(LOG);
+ }
+ } else {
+ bestResult = innerkMeans.run(database);
+ }
+
+ return bestResult;
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return innerkMeans.getInputTypeRestriction();
+ }
+
+ @Override
+ public DistanceFunction<? super V, D> getDistanceFunction() {
+ return innerkMeans.getDistanceFunction();
+ }
+
+ @Override
+ public void setK(int k) {
+ innerkMeans.setK(k);
+ }
+
+ @Override
+ public void setDistanceFunction(PrimitiveDistanceFunction<? super NumberVector<?>, D> distanceFunction) {
+ innerkMeans.setDistanceFunction(distanceFunction);
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Stephan Baier
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ *
+ * @param <V> Vector type
+ * @param <D> Distance type
+ * @param <M> Model type
+ */
+ public static class Parameterizer<V extends NumberVector<?>, D extends Distance<D>, M extends MeanModel<V>> extends AbstractParameterizer {
+ /**
+ * Parameter to specify the iterations of the bisecting step.
+ */
+ public static final OptionID TRIALS_ID = new OptionID("kmeans.trials", "The number of trials to run.");
+
+ /**
+ * Parameter to specify the kMeans variant.
+ */
+ public static final OptionID KMEANS_ID = new OptionID("kmeans.algorithm", "KMeans variant to run multiple times.");
+
+ /**
+ * Parameter to specify the variant of quality measure.
+ */
+ public static final OptionID QUALITYMEASURE_ID = new OptionID("kmeans.qualitymeasure", "Quality measure variant for deciding which run to keep.");
+
+ /**
+ * Number of trials to perform.
+ */
+ protected int trials;
+
+ /**
+ * Variant of kMeans to use.
+ */
+ protected KMeans<V, D, M> kMeansVariant;
+
+ /**
+ * Quality measure.
+ */
+ protected KMeansQualityMeasure<? super V, ? super D> qualityMeasure;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ IntParameter trialsP = new IntParameter(TRIALS_ID);
+ trialsP.addConstraint(new GreaterEqualConstraint(1));
+ if (config.grab(trialsP)) {
+ trials = trialsP.intValue();
+ }
+
+ ObjectParameter<KMeans<V, D, M>> kMeansVariantP = new ObjectParameter<>(KMEANS_ID, KMeans.class);
+ if (config.grab(kMeansVariantP)) {
+ kMeansVariant = kMeansVariantP.instantiateClass(config);
+ }
+
+ ObjectParameter<KMeansQualityMeasure<V, ? super D>> qualityMeasureP = new ObjectParameter<>(QUALITYMEASURE_ID, KMeansQualityMeasure.class);
+ if (config.grab(qualityMeasureP)) {
+ qualityMeasure = qualityMeasureP.instantiateClass(config);
+ }
+ }
+
+ @Override
+ protected BestOfMultipleKMeans<V, D, M> makeInstance() {
+ return new BestOfMultipleKMeans<>(trials, kMeansVariant, qualityMeasure);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/FarthestPointsInitialMeans.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/FarthestPointsInitialMeans.java
new file mode 100644
index 00000000..a018c04b
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/FarthestPointsInitialMeans.java
@@ -0,0 +1,186 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDVar;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag;
+
+/**
+ * K-Means initialization by repeatedly choosing the farthest point.
+ *
+ * Note: this is less random than other initializations, so running multiple
+ * times will be more likely to return the same local minima.
+ *
+ * @author Erich Schubert
+ *
+ * @param <V> Vector type
+ * @param <D> Distance type
+ */
+public class FarthestPointsInitialMeans<V, D extends NumberDistance<D, ?>> extends AbstractKMeansInitialization<V> implements KMedoidsInitialization<V> {
+ /**
+ * Discard the first vector.
+ */
+ boolean dropfirst = true;
+
+ /**
+ * Constructor.
+ *
+ * @param rnd Random generator.
+ * @param dropfirst Flag to discard the first vector.
+ */
+ public FarthestPointsInitialMeans(RandomFactory rnd, boolean dropfirst) {
+ super(rnd);
+ this.dropfirst = dropfirst;
+ }
+
+ @Override
+ public List<V> chooseInitialMeans(Database database, Relation<V> relation, int k, PrimitiveDistanceFunction<? super NumberVector<?>, ?> distanceFunction) {
+ // Get a distance query
+ if (!(distanceFunction.getDistanceFactory() instanceof NumberDistance)) {
+ throw new AbortException("Farthest points K-Means initialization can only be used with numerical distances.");
+ }
+ @SuppressWarnings("unchecked")
+ final PrimitiveDistanceFunction<? super V, D> distF = (PrimitiveDistanceFunction<? super V, D>) distanceFunction;
+ DistanceQuery<V, D> distQ = database.getDistanceQuery(relation, distF);
+
+ // Chose first mean
+ List<V> means = new ArrayList<>(k);
+
+ Random random = rnd.getRandom();
+ DBIDIter first = DBIDUtil.randomSample(relation.getDBIDs(), 1, new Random(random.nextLong())).iter();
+ means.add(relation.get(first));
+
+ DBIDVar best = DBIDUtil.newVar(first);
+ for (int i = (dropfirst ? 0 : 1); i < k; i++) {
+ // Find farthest object:
+ double maxdist = Double.NEGATIVE_INFINITY;
+ for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
+ double dsum = 0.;
+ for (V ex : means) {
+ dsum += distQ.distance(ex, it).doubleValue();
+ }
+ if (dsum > maxdist) {
+ maxdist = dsum;
+ best.set(it);
+ }
+ }
+ // Add new mean:
+ if (k == 0) {
+ means.clear(); // Remove temporary first element.
+ }
+ means.add(relation.get(best));
+ }
+
+ return means;
+ }
+
+ @Override
+ public DBIDs chooseInitialMedoids(int k, DistanceQuery<? super V, ?> distQ2) {
+ if (!(distQ2.getDistanceFactory() instanceof NumberDistance)) {
+ throw new AbortException("Farthest points K-Means initialization can only be used with numerical distances.");
+ }
+ @SuppressWarnings("unchecked")
+ DistanceQuery<? super V, D> distQ = (DistanceQuery<? super V, D>) distQ2;
+ final Relation<?> relation = distQ.getRelation();
+ // Chose first mean
+ ArrayModifiableDBIDs means = DBIDUtil.newArray(k);
+
+ Random random = rnd.getRandom();
+ DBIDIter first = DBIDUtil.randomSample(relation.getDBIDs(), 1, new Random(random.nextLong())).iter();
+ means.add(first);
+
+ DBIDVar best = DBIDUtil.newVar(first);
+ for (int i = (dropfirst ? 0 : 1); i < k; i++) {
+ // Find farthest object:
+ double maxdist = Double.NEGATIVE_INFINITY;
+ for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
+ double dsum = 0.;
+ for (DBIDIter ex = means.iter(); ex.valid(); ex.advance()) {
+ dsum += distQ.distance(ex, it).doubleValue();
+ }
+ if (dsum > maxdist) {
+ maxdist = dsum;
+ best.set(it);
+ }
+ }
+ // Add new mean:
+ if (k == 0) {
+ means.clear(); // Remove temporary first element.
+ }
+ means.add(best);
+ }
+
+ return means;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer<V, D extends NumberDistance<D, ?>> extends AbstractKMeansInitialization.Parameterizer<V> {
+ /**
+ * Option ID to control the handling of the first object chosen.
+ */
+ public static final OptionID DROPFIRST_ID = new OptionID("farthest.dropfirst", "Drop the first object chosen (which is chosen randomly) for the farthest points heuristic.");
+
+ /**
+ * Flag for discarding the first object chosen.
+ */
+ protected boolean dropfirst = true;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ Flag dropfirstP = new Flag(DROPFIRST_ID);
+ if (config.grab(dropfirstP)) {
+ dropfirst = dropfirstP.isTrue();
+ }
+ }
+
+ @Override
+ protected FarthestPointsInitialMeans<V, D> makeInstance() {
+ return new FarthestPointsInitialMeans<>(rnd, dropfirst);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/FirstKInitialMeans.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/FirstKInitialMeans.java
index 1e51f4d6..08e2f116 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/FirstKInitialMeans.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/FirstKInitialMeans.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -26,6 +26,7 @@ import java.util.ArrayList;
import java.util.List;
import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
@@ -51,9 +52,9 @@ public class FirstKInitialMeans<V> implements KMeansInitialization<V>, KMedoidsI
}
@Override
- public List<V> chooseInitialMeans(Relation<V> relation, int k, PrimitiveDistanceFunction<? super V, ?> distanceFunction) {
+ public List<V> chooseInitialMeans(Database database, Relation<V> relation, int k, PrimitiveDistanceFunction<? super NumberVector<?>, ?> distanceFunction) {
DBIDIter iter = relation.iterDBIDs();
- List<V> means = new ArrayList<V>(k);
+ List<V> means = new ArrayList<>(k);
for(int i = 0; i < k && iter.valid(); i++, iter.advance()) {
means.add(relation.get(iter));
}
@@ -80,7 +81,7 @@ public class FirstKInitialMeans<V> implements KMeansInitialization<V>, KMedoidsI
public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
@Override
protected FirstKInitialMeans<V> makeInstance() {
- return new FirstKInitialMeans<V>();
+ return new FirstKInitialMeans<>();
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeans.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeans.java
index 68fc4e48..29c0a5c8 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeans.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeans.java
@@ -1,12 +1,10 @@
package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -25,12 +23,27 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+import de.lmu.ifi.dbs.elki.algorithm.DistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm;
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.model.MeanModel;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+
/**
* Some constants and options shared among kmeans family algorithms.
*
* @author Erich Schubert
+ *
+ * @param <V> Number vector type
+ * @param <D> Distance type
+ * @param <M> Actual model type
*/
-public interface KMeans {
+public interface KMeans<V extends NumberVector<?>, D extends Distance<?>, M extends MeanModel<V>> extends ClusteringAlgorithm<Clustering<M>>, DistanceBasedAlgorithm<V, D> {
/**
* Parameter to specify the initialization method
*/
@@ -52,4 +65,27 @@ public interface KMeans {
* Parameter to specify the random generator seed.
*/
public static final OptionID SEED_ID = new OptionID("kmeans.seed", "The random number generator seed.");
-} \ No newline at end of file
+
+ /**
+ * Run the clustering algorithm.
+ *
+ * @param database Database to run on.
+ * @param rel Relation to process.
+ * @return Clustering result
+ */
+ Clustering<M> run(Database database, Relation<V> rel);
+
+ /**
+ * Set the value of k. Needed for some types of nested k-means.
+ *
+ * @param k K parameter
+ */
+ void setK(int k);
+
+ /**
+ * Set the distance function to use.
+ *
+ * @param distanceFunction Distance function.
+ */
+ void setDistanceFunction(PrimitiveDistanceFunction<? super NumberVector<?>, D> distanceFunction);
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansBisecting.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansBisecting.java
new file mode 100644
index 00000000..37071d36
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansBisecting.java
@@ -0,0 +1,231 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
+
+/*
+ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.LinkedList;
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
+import de.lmu.ifi.dbs.elki.data.Cluster;
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.model.MeanModel;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.ProxyDatabase;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ChainedParameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+
+/**
+ * The bisecting k-means algorithm works by starting with an initial
+ * partitioning into two clusters, then repeated splitting of the largest
+ * cluster to get additional clusters.
+ *
+ * Reference:<br>
+ * <p>
+ * M. Steinbach, G. Karypis, V. Kumar:<br />
+ * A Comparison of Document Clustering Techniques<br />
+ * KDD workshop on text mining. Vol. 400. No. 1
+ * </p>
+ *
+ * @author Stephan Baier
+ *
+ * @param <V> Vector type
+ * @param <D> Distance type
+ * @param <M> Model type
+ */
+@Reference(authors = "M. Steinbach, G. Karypis, V. Kumar", title = "A Comparison of Document Clustering Techniques", booktitle = "KDD workshop on text mining. Vol. 400. No. 1")
+public class KMeansBisecting<V extends NumberVector<?>, D extends Distance<?>, M extends MeanModel<V>> extends AbstractAlgorithm<Clustering<M>> implements KMeans<V, D, M> {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging LOG = Logging.getLogger(KMeansBisecting.class);
+
+ /**
+ * Variant of kMeans for the bisecting step.
+ */
+ private KMeans<V, D, M> innerkMeans;
+
+ /**
+ * Desired value of k.
+ */
+ private int k;
+
+ /**
+ * Constructor.
+ *
+ * @param k k parameter - number of result clusters
+ * @param innerkMeans KMeans variant parameter - for bisecting step
+ */
+ public KMeansBisecting(int k, KMeans<V, D, M> innerkMeans) {
+ super();
+ this.k = k;
+ this.innerkMeans = innerkMeans;
+ }
+
+ @Override
+ public Clustering<M> run(Database database, Relation<V> relation) {
+ ProxyDatabase proxyDB = new ProxyDatabase(relation.getDBIDs(), database);
+
+ // Linked list is preferrable for scratch, as we will A) not need that many
+ // clusters and B) be doing random removals of the largest cluster (often at
+ // the head)
+ LinkedList<Cluster<M>> currentClusterList = new LinkedList<>();
+
+ FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Bisecting k-means", k - 1, LOG) : null;
+
+ for (int j = 0; j < this.k - 1; j++) {
+ // Choose a cluster to split and project database to cluster
+ if (currentClusterList.size() == 0) {
+ proxyDB = new ProxyDatabase(relation.getDBIDs(), database);
+ } else {
+ Cluster<M> largestCluster = null;
+ for (Cluster<M> cluster : currentClusterList) {
+ if (largestCluster == null || cluster.size() > largestCluster.size()) {
+ largestCluster = cluster;
+ }
+ }
+ currentClusterList.remove(largestCluster);
+ proxyDB.setDBIDs(largestCluster.getIDs());
+ }
+
+ // Run the inner k-means algorithm:
+ // FIXME: ensure we run on the correct relation in a multirelational
+ // setting!
+ Clustering<M> innerResult = innerkMeans.run(proxyDB);
+ // Add resulting clusters to current result.
+ currentClusterList.addAll(innerResult.getAllClusters());
+
+ if (prog != null) {
+ prog.incrementProcessed(LOG);
+ }
+ if (LOG.isVerbose()) {
+ LOG.verbose("Iteration " + j);
+ }
+ }
+ if (prog != null) {
+ prog.ensureCompleted(LOG);
+ }
+
+ // add all current clusters to the result
+ Clustering<M> result = new Clustering<>("Bisecting k-Means Result", "Bisecting-k-means");
+ for (Cluster<M> cluster : currentClusterList) {
+ result.addToplevelCluster(cluster);
+ }
+ return result;
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return innerkMeans.getInputTypeRestriction();
+ }
+
+ @Override
+ public DistanceFunction<? super V, D> getDistanceFunction() {
+ return innerkMeans.getDistanceFunction();
+ }
+
+ @Override
+ public void setK(int k) {
+ this.k = k;
+ }
+
+ @Override
+ public void setDistanceFunction(PrimitiveDistanceFunction<? super NumberVector<?>, D> distanceFunction) {
+ innerkMeans.setDistanceFunction(distanceFunction);
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Stephan Baier
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ *
+ * @param <V> Vector type
+ * @param <D> Distance type
+ * @param <M> Model type
+ */
+ public static class Parameterizer<V extends NumberVector<?>, D extends Distance<?>, M extends MeanModel<V>> extends AbstractParameterizer {
+ /**
+ * Parameter to specify the kMeans variant.
+ */
+ public static final OptionID KMEANS_ID = new OptionID("bisecting.kmeansvariant", "KMeans variant");
+
+ /**
+ * Variant of kMeans
+ */
+ protected KMeans<V, D, M> kMeansVariant;
+
+ /**
+ * Desired number of clusters.
+ */
+ protected int k;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ IntParameter kP = new IntParameter(KMeans.K_ID);
+ kP.addConstraint(new GreaterConstraint(1));
+ if (config.grab(kP)) {
+ k = kP.intValue();
+ }
+
+ ObjectParameter<KMeans<V, D, M>> kMeansVariantP = new ObjectParameter<>(KMEANS_ID, KMeans.class, BestOfMultipleKMeans.class);
+ if (config.grab(kMeansVariantP)) {
+ ListParameterization kMeansVariantParameters = new ListParameterization();
+
+ // We will always invoke this with k=2!
+ kMeansVariantParameters.addParameter(KMeans.K_ID, 2);
+
+ ChainedParameterization combinedConfig = new ChainedParameterization(kMeansVariantParameters, config);
+ combinedConfig.errorsTo(config);
+ kMeansVariant = kMeansVariantP.instantiateClass(combinedConfig);
+ }
+ }
+
+ @Override
+ protected KMeansBisecting<V, D, M> makeInstance() {
+ return new KMeansBisecting<>(k, kMeansVariant);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansInitialization.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansInitialization.java
index 54b3a2ce..06fb10c1 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansInitialization.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansInitialization.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -24,6 +24,8 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
*/
import java.util.List;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
@@ -31,7 +33,7 @@ import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
* Interface for initializing K-Means
*
* @author Erich Schubert
- *
+ *
* @apiviz.landmark
*
* @param <V> Object type
@@ -40,10 +42,12 @@ public interface KMeansInitialization<V> {
/**
* Choose initial means
*
+ * @param database Database context
* @param relation Relation
* @param k Parameter k
- * @param distanceFunction Distance function
+ * @param distanceFunction Distance function
+ *
* @return List of chosen means for k-means
*/
- public abstract List<V> chooseInitialMeans(Relation<V> relation, int k, PrimitiveDistanceFunction<? super V, ?> distanceFunction);
+ public abstract List<V> chooseInitialMeans(Database database, Relation<V> relation, int k, PrimitiveDistanceFunction<? super NumberVector<?>, ?> distanceFunction);
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansLloyd.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansLloyd.java
index f43c2277..e692293c 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansLloyd.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansLloyd.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -26,7 +26,6 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
import java.util.ArrayList;
import java.util.List;
-import de.lmu.ifi.dbs.elki.algorithm.AbstractPrimitiveDistanceBasedAlgorithm;
import de.lmu.ifi.dbs.elki.data.Cluster;
import de.lmu.ifi.dbs.elki.data.Clustering;
import de.lmu.ifi.dbs.elki.data.NumberVector;
@@ -36,19 +35,13 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.SquaredEuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
/**
* Provides the k-means algorithm, using Lloyd-style bulk iterations.
@@ -90,28 +83,23 @@ public class KMeansLloyd<V extends NumberVector<?>, D extends Distance<D>> exten
super(distanceFunction, k, maxiter, initializer);
}
- /**
- * Run k-means.
- *
- * @param database Database
- * @param relation relation to use
- * @return result
- */
+ @Override
public Clustering<KMeansModel<V>> run(Database database, Relation<V> relation) {
if (relation.size() <= 0) {
- return new Clustering<KMeansModel<V>>("k-Means Clustering", "kmeans-clustering");
+ return new Clustering<>("k-Means Clustering", "kmeans-clustering");
}
// Choose initial means
- List<? extends NumberVector<?>> means = initializer.chooseInitialMeans(relation, k, getDistanceFunction());
+ List<? extends NumberVector<?>> means = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction());
// Setup cluster assignment store
- List<ModifiableDBIDs> clusters = new ArrayList<ModifiableDBIDs>();
+ List<ModifiableDBIDs> clusters = new ArrayList<>();
for (int i = 0; i < k; i++) {
clusters.add(DBIDUtil.newHashSet(relation.size() / k));
}
+ IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Means iteration", LOG) : null;
for (int iteration = 0; maxiter <= 0 || iteration < maxiter; iteration++) {
- if (LOG.isVerbose()) {
- LOG.verbose("K-Means iteration " + (iteration + 1));
+ if (prog != null) {
+ prog.incrementProcessed(LOG);
}
boolean changed = assignToNearestCluster(relation, means, clusters);
// Stop if no cluster assignment changed.
@@ -121,12 +109,16 @@ public class KMeansLloyd<V extends NumberVector<?>, D extends Distance<D>> exten
// Recompute means.
means = means(clusters, means, relation);
}
+ if (prog != null) {
+ prog.setCompleted(LOG);
+ }
+
// Wrap result
final NumberVector.Factory<V, ?> factory = RelationUtil.getNumberVectorFactory(relation);
- Clustering<KMeansModel<V>> result = new Clustering<KMeansModel<V>>("k-Means Clustering", "kmeans-clustering");
+ Clustering<KMeansModel<V>> result = new Clustering<>("k-Means Clustering", "kmeans-clustering");
for (int i = 0; i < clusters.size(); i++) {
- KMeansModel<V> model = new KMeansModel<V>(factory.newNumberVector(means.get(i).getColumnVector().getArrayRef()));
- result.addCluster(new Cluster<KMeansModel<V>>(clusters.get(i), model));
+ KMeansModel<V> model = new KMeansModel<>(factory.newNumberVector(means.get(i).getColumnVector().getArrayRef()));
+ result.addToplevelCluster(new Cluster<>(clusters.get(i), model));
}
return result;
}
@@ -143,53 +135,15 @@ public class KMeansLloyd<V extends NumberVector<?>, D extends Distance<D>> exten
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<?>, D extends Distance<D>> extends AbstractPrimitiveDistanceBasedAlgorithm.Parameterizer<NumberVector<?>, D> {
- /**
- * k Parameter.
- */
- protected int k;
-
- /**
- * Number of iterations.
- */
- protected int maxiter;
-
- /**
- * Initialization method.
- */
- protected KMeansInitialization<V> initializer;
-
+ public static class Parameterizer<V extends NumberVector<?>, D extends Distance<D>> extends AbstractKMeans.Parameterizer<V, D> {
@Override
- protected void makeOptions(Parameterization config) {
- ObjectParameter<PrimitiveDistanceFunction<NumberVector<?>, D>> distanceFunctionP = makeParameterDistanceFunction(SquaredEuclideanDistanceFunction.class, PrimitiveDistanceFunction.class);
- if(config.grab(distanceFunctionP)) {
- distanceFunction = distanceFunctionP.instantiateClass(config);
- if (!(distanceFunction instanceof EuclideanDistanceFunction) && !(distanceFunction instanceof SquaredEuclideanDistanceFunction)) {
- LOG.warning("k-means optimizes the sum of squares - it should be used with squared euclidean distance and may stop converging otherwise!");
- }
- }
-
- IntParameter kP = new IntParameter(K_ID);
- kP.addConstraint(new GreaterConstraint(0));
- if (config.grab(kP)) {
- k = kP.getValue();
- }
-
- ObjectParameter<KMeansInitialization<V>> initialP = new ObjectParameter<KMeansInitialization<V>>(INIT_ID, KMeansInitialization.class, RandomlyGeneratedInitialMeans.class);
- if (config.grab(initialP)) {
- initializer = initialP.instantiateClass(config);
- }
-
- IntParameter maxiterP = new IntParameter(MAXITER_ID, 0);
- maxiterP.addConstraint(new GreaterEqualConstraint(0));
- if (config.grab(maxiterP)) {
- maxiter = maxiterP.intValue();
- }
+ protected Logging getLogger() {
+ return LOG;
}
@Override
protected KMeansLloyd<V, D> makeInstance() {
- return new KMeansLloyd<V, D>(distanceFunction, k, maxiter, initializer);
+ return new KMeansLloyd<>(distanceFunction, k, maxiter, initializer);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansMacQueen.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansMacQueen.java
index 0cc7c363..bb689bd3 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansMacQueen.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansMacQueen.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -26,7 +26,6 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
import java.util.ArrayList;
import java.util.List;
-import de.lmu.ifi.dbs.elki.algorithm.AbstractPrimitiveDistanceBasedAlgorithm;
import de.lmu.ifi.dbs.elki.data.Cluster;
import de.lmu.ifi.dbs.elki.data.Clustering;
import de.lmu.ifi.dbs.elki.data.NumberVector;
@@ -37,20 +36,14 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.SquaredEuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
/**
* Provides the k-means algorithm, using MacQueen style incremental updates.
@@ -89,24 +82,18 @@ public class KMeansMacQueen<V extends NumberVector<?>, D extends Distance<D>> ex
super(distanceFunction, k, maxiter, initializer);
}
- /**
- * Run k-means.
- *
- * @param database Database
- * @param relation relation to use
- * @return Clustering result
- */
+ @Override
public Clustering<KMeansModel<V>> run(Database database, Relation<V> relation) {
if (relation.size() <= 0) {
- return new Clustering<KMeansModel<V>>("k-Means Clustering", "kmeans-clustering");
+ return new Clustering<>("k-Means Clustering", "kmeans-clustering");
}
// Choose initial means
- List<Vector> means = new ArrayList<Vector>(k);
- for (NumberVector<?> nv : initializer.chooseInitialMeans(relation, k, getDistanceFunction())) {
+ List<Vector> means = new ArrayList<>(k);
+ for (NumberVector<?> nv : initializer.chooseInitialMeans(database, relation, k, getDistanceFunction())) {
means.add(nv.getColumnVector());
}
// Initialize cluster and assign objects
- List<ModifiableDBIDs> clusters = new ArrayList<ModifiableDBIDs>();
+ List<ModifiableDBIDs> clusters = new ArrayList<>();
for (int i = 0; i < k; i++) {
clusters.add(DBIDUtil.newHashSet(relation.size() / k));
}
@@ -114,22 +101,27 @@ public class KMeansMacQueen<V extends NumberVector<?>, D extends Distance<D>> ex
// Initial recomputation of the means.
means = means(clusters, means, relation);
+ IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Means iteration", LOG) : null;
// Refine result
for (int iteration = 0; maxiter <= 0 || iteration < maxiter; iteration++) {
- if (LOG.isVerbose()) {
- LOG.verbose("K-Means iteration " + (iteration + 1));
+ if (prog != null) {
+ prog.incrementProcessed(LOG);
}
boolean changed = macQueenIterate(relation, means, clusters);
if (!changed) {
break;
}
}
+ if (prog != null) {
+ prog.setCompleted(LOG);
+ }
+
final NumberVector.Factory<V, ?> factory = RelationUtil.getNumberVectorFactory(relation);
- Clustering<KMeansModel<V>> result = new Clustering<KMeansModel<V>>("k-Means Clustering", "kmeans-clustering");
+ Clustering<KMeansModel<V>> result = new Clustering<>("k-Means Clustering", "kmeans-clustering");
for (int i = 0; i < clusters.size(); i++) {
DBIDs ids = clusters.get(i);
- KMeansModel<V> model = new KMeansModel<V>(factory.newNumberVector(means.get(i).getArrayRef()));
- result.addCluster(new Cluster<KMeansModel<V>>(ids, model));
+ KMeansModel<V> model = new KMeansModel<>(factory.newNumberVector(means.get(i).getArrayRef()));
+ result.addToplevelCluster(new Cluster<>(ids, model));
}
return result;
}
@@ -146,53 +138,15 @@ public class KMeansMacQueen<V extends NumberVector<?>, D extends Distance<D>> ex
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<?>, D extends Distance<D>> extends AbstractPrimitiveDistanceBasedAlgorithm.Parameterizer<NumberVector<?>, D> {
- /**
- * k Parameter.
- */
- protected int k;
-
- /**
- * Maximum number of iterations.
- */
- protected int maxiter;
-
- /**
- * Initialization method.
- */
- protected KMeansInitialization<V> initializer;
-
+ public static class Parameterizer<V extends NumberVector<?>, D extends Distance<D>> extends AbstractKMeans.Parameterizer<V, D> {
@Override
- protected void makeOptions(Parameterization config) {
- ObjectParameter<PrimitiveDistanceFunction<NumberVector<?>, D>> distanceFunctionP = makeParameterDistanceFunction(SquaredEuclideanDistanceFunction.class, PrimitiveDistanceFunction.class);
- if (config.grab(distanceFunctionP)) {
- distanceFunction = distanceFunctionP.instantiateClass(config);
- if (!(distanceFunction instanceof EuclideanDistanceFunction) && !(distanceFunction instanceof SquaredEuclideanDistanceFunction)) {
- LOG.warning("k-means optimizes the sum of squares - it should be used with squared euclidean distance and may stop converging otherwise!");
- }
- }
-
- IntParameter kP = new IntParameter(K_ID);
- kP.addConstraint(new GreaterConstraint(0));
- if (config.grab(kP)) {
- k = kP.getValue();
- }
-
- ObjectParameter<KMeansInitialization<V>> initialP = new ObjectParameter<KMeansInitialization<V>>(INIT_ID, KMeansInitialization.class, RandomlyGeneratedInitialMeans.class);
- if (config.grab(initialP)) {
- initializer = initialP.instantiateClass(config);
- }
-
- IntParameter maxiterP = new IntParameter(MAXITER_ID, 0);
- maxiterP.addConstraint(new GreaterEqualConstraint(0));
- if (config.grab(maxiterP)) {
- maxiter = maxiterP.getValue();
- }
+ protected Logging getLogger() {
+ return LOG;
}
@Override
protected KMeansMacQueen<V, D> makeInstance() {
- return new KMeansMacQueen<V, D>(distanceFunction, k, maxiter, initializer);
+ return new KMeansMacQueen<>(distanceFunction, k, maxiter, initializer);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansPlusPlusInitialMeans.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansPlusPlusInitialMeans.java
index a07953da..302ca86b 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansPlusPlusInitialMeans.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansPlusPlusInitialMeans.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -26,6 +26,8 @@ import java.util.ArrayList;
import java.util.List;
import java.util.Random;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
@@ -70,17 +72,17 @@ public class KMeansPlusPlusInitialMeans<V, D extends NumberDistance<D, ?>> exten
}
@Override
- public List<V> chooseInitialMeans(Relation<V> relation, int k, PrimitiveDistanceFunction<? super V, ?> distanceFunction) {
+ public List<V> chooseInitialMeans(Database database, Relation<V> relation, int k, PrimitiveDistanceFunction<? super NumberVector<?>, ?> distanceFunction) {
// Get a distance query
if(!(distanceFunction.getDistanceFactory() instanceof NumberDistance)) {
throw new AbortException("K-Means++ initialization can only be used with numerical distances.");
}
@SuppressWarnings("unchecked")
final PrimitiveDistanceFunction<? super V, D> distF = (PrimitiveDistanceFunction<? super V, D>) distanceFunction;
- DistanceQuery<V, D> distQ = relation.getDatabase().getDistanceQuery(relation, distF);
+ DistanceQuery<V, D> distQ = database.getDistanceQuery(relation, distF);
// Chose first mean
- List<V> means = new ArrayList<V>(k);
+ List<V> means = new ArrayList<>(k);
Random random = rnd.getRandom();
DBID first = DBIDUtil.deref(DBIDUtil.randomSample(relation.getDBIDs(), 1, new Random(random.nextLong())).iter());
@@ -99,7 +101,7 @@ public class KMeansPlusPlusInitialMeans<V, D extends NumberDistance<D, ?>> exten
}
double r = random.nextDouble() * weightsum;
int pos = 0;
- while(r > 0 && pos < weights.length) {
+ while(r > 0 && pos < weights.length - 1) {
r -= weights[pos];
pos++;
}
@@ -125,7 +127,7 @@ public class KMeansPlusPlusInitialMeans<V, D extends NumberDistance<D, ?>> exten
@Override
public DBIDs chooseInitialMedoids(int k, DistanceQuery<? super V, ?> distQ2) {
if(!(distQ2.getDistanceFactory() instanceof NumberDistance)) {
- throw new AbortException("PAM initialization can only be used with numerical distances.");
+ throw new AbortException("K-Means++ initialization initialization can only be used with numerical distances.");
}
@SuppressWarnings("unchecked")
DistanceQuery<? super V, D> distQ = (DistanceQuery<? super V, D>) distQ2;
@@ -244,7 +246,7 @@ public class KMeansPlusPlusInitialMeans<V, D extends NumberDistance<D, ?>> exten
public static class Parameterizer<V, D extends NumberDistance<D, ?>> extends AbstractKMeansInitialization.Parameterizer<V> {
@Override
protected KMeansPlusPlusInitialMeans<V, D> makeInstance() {
- return new KMeansPlusPlusInitialMeans<V, D>(rnd);
+ return new KMeansPlusPlusInitialMeans<>(rnd);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMediansLloyd.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMediansLloyd.java
index 9917337e..cc7aaa9e 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMediansLloyd.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMediansLloyd.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -26,7 +26,6 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
import java.util.ArrayList;
import java.util.List;
-import de.lmu.ifi.dbs.elki.algorithm.AbstractPrimitiveDistanceBasedAlgorithm;
import de.lmu.ifi.dbs.elki.data.Cluster;
import de.lmu.ifi.dbs.elki.data.Clustering;
import de.lmu.ifi.dbs.elki.data.NumberVector;
@@ -39,13 +38,9 @@ import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
/**
* Provides the k-medians clustering algorithm, using Lloyd-style bulk
@@ -83,28 +78,23 @@ public class KMediansLloyd<V extends NumberVector<?>, D extends Distance<D>> ext
super(distanceFunction, k, maxiter, initializer);
}
- /**
- * Run k-medians.
- *
- * @param database Database
- * @param relation relation to use
- * @return result
- */
+ @Override
public Clustering<MeanModel<V>> run(Database database, Relation<V> relation) {
if (relation.size() <= 0) {
- return new Clustering<MeanModel<V>>("k-Medians Clustering", "kmedians-clustering");
+ return new Clustering<>("k-Medians Clustering", "kmedians-clustering");
}
// Choose initial medians
- List<? extends NumberVector<?>> medians = initializer.chooseInitialMeans(relation, k, getDistanceFunction());
+ List<? extends NumberVector<?>> medians = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction());
// Setup cluster assignment store
- List<ModifiableDBIDs> clusters = new ArrayList<ModifiableDBIDs>();
+ List<ModifiableDBIDs> clusters = new ArrayList<>();
for (int i = 0; i < k; i++) {
clusters.add(DBIDUtil.newHashSet(relation.size() / k));
}
+ IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Medians iteration", LOG) : null;
for (int iteration = 0; maxiter <= 0 || iteration < maxiter; iteration++) {
- if (LOG.isVerbose()) {
- LOG.verbose("K-Medians iteration " + (iteration + 1));
+ if (prog != null) {
+ prog.incrementProcessed(LOG);
}
boolean changed = assignToNearestCluster(relation, medians, clusters);
// Stop if no cluster assignment changed.
@@ -114,12 +104,15 @@ public class KMediansLloyd<V extends NumberVector<?>, D extends Distance<D>> ext
// Recompute medians.
medians = medians(clusters, medians, relation);
}
+ if (prog != null) {
+ prog.setCompleted(LOG);
+ }
// Wrap result
final NumberVector.Factory<V, ?> factory = RelationUtil.getNumberVectorFactory(relation);
- Clustering<MeanModel<V>> result = new Clustering<MeanModel<V>>("k-Medians Clustering", "kmedians-clustering");
+ Clustering<MeanModel<V>> result = new Clustering<>("k-Medians Clustering", "kmedians-clustering");
for (int i = 0; i < clusters.size(); i++) {
- MeanModel<V> model = new MeanModel<V>(factory.newNumberVector(medians.get(i).getColumnVector().getArrayRef()));
- result.addCluster(new Cluster<MeanModel<V>>(clusters.get(i), model));
+ MeanModel<V> model = new MeanModel<>(factory.newNumberVector(medians.get(i).getColumnVector().getArrayRef()));
+ result.addToplevelCluster(new Cluster<>(clusters.get(i), model));
}
return result;
}
@@ -136,46 +129,15 @@ public class KMediansLloyd<V extends NumberVector<?>, D extends Distance<D>> ext
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<?>, D extends Distance<D>> extends AbstractPrimitiveDistanceBasedAlgorithm.Parameterizer<NumberVector<?>, D> {
- /**
- * k Parameter.
- */
- protected int k;
-
- /**
- * Maximum number of iterations.
- */
- protected int maxiter;
-
- /**
- * Initialization method.
- */
- protected KMeansInitialization<V> initializer;
-
+ public static class Parameterizer<V extends NumberVector<?>, D extends Distance<D>> extends AbstractKMeans.Parameterizer<V, D> {
@Override
- protected void makeOptions(Parameterization config) {
- super.makeOptions(config);
- IntParameter kP = new IntParameter(K_ID);
- kP.addConstraint(new GreaterConstraint(0));
- if (config.grab(kP)) {
- k = kP.intValue();
- }
-
- ObjectParameter<KMeansInitialization<V>> initialP = new ObjectParameter<KMeansInitialization<V>>(INIT_ID, KMeansInitialization.class, RandomlyGeneratedInitialMeans.class);
- if (config.grab(initialP)) {
- initializer = initialP.instantiateClass(config);
- }
-
- IntParameter maxiterP = new IntParameter(MAXITER_ID, 0);
- maxiterP.addConstraint(new GreaterEqualConstraint(0));
- if (config.grab(maxiterP)) {
- maxiter = maxiterP.intValue();
- }
+ protected Logging getLogger() {
+ return LOG;
}
@Override
protected KMediansLloyd<V, D> makeInstance() {
- return new KMediansLloyd<V, D>(distanceFunction, k, maxiter, initializer);
+ return new KMediansLloyd<>(distanceFunction, k, maxiter, initializer);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsEM.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsEM.java
index f4398458..87a0c7ae 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsEM.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsEM.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -46,6 +46,7 @@ import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress;
import de.lmu.ifi.dbs.elki.math.Mean;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
@@ -119,13 +120,13 @@ public class KMedoidsEM<V, D extends NumberDistance<D, ?>> extends AbstractDista
*/
public Clustering<MedoidModel> run(Database database, Relation<V> relation) {
if (relation.size() <= 0) {
- return new Clustering<MedoidModel>("k-Medoids Clustering", "kmedoids-clustering");
+ return new Clustering<>("k-Medoids Clustering", "kmedoids-clustering");
}
DistanceQuery<V, D> distQ = database.getDistanceQuery(relation, getDistanceFunction());
// Choose initial medoids
ArrayModifiableDBIDs medoids = DBIDUtil.newArray(initializer.chooseInitialMedoids(k, distQ));
// Setup cluster assignment store
- List<ModifiableDBIDs> clusters = new ArrayList<ModifiableDBIDs>();
+ List<ModifiableDBIDs> clusters = new ArrayList<>();
for (int i = 0; i < k; i++) {
clusters.add(DBIDUtil.newHashSet(relation.size() / k));
}
@@ -135,9 +136,13 @@ public class KMedoidsEM<V, D extends NumberDistance<D, ?>> extends AbstractDista
// TODO: reuse this information, from the build phase, when possible?
assignToNearestCluster(medoids, mdists, clusters, distQ);
+ IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Medoids iteration", LOG) : null;
// Swap phase
boolean changed = true;
while (changed) {
+ if (prog != null) {
+ prog.incrementProcessed(LOG);
+ }
changed = false;
// Try to swap the medoid with a better cluster member:
int i = 0;
@@ -168,12 +173,15 @@ public class KMedoidsEM<V, D extends NumberDistance<D, ?>> extends AbstractDista
assignToNearestCluster(medoids, mdists, clusters, distQ);
}
}
+ if (prog != null) {
+ prog.setCompleted(LOG);
+ }
// Wrap result
- Clustering<MedoidModel> result = new Clustering<MedoidModel>("k-Medoids Clustering", "kmedoids-clustering");
+ Clustering<MedoidModel> result = new Clustering<>("k-Medoids Clustering", "kmedoids-clustering");
for (int i = 0; i < clusters.size(); i++) {
MedoidModel model = new MedoidModel(medoids.get(i));
- result.addCluster(new Cluster<MedoidModel>(clusters.get(i), model));
+ result.addToplevelCluster(new Cluster<>(clusters.get(i), model));
}
return result;
}
@@ -256,7 +264,7 @@ public class KMedoidsEM<V, D extends NumberDistance<D, ?>> extends AbstractDista
k = kP.intValue();
}
- ObjectParameter<KMedoidsInitialization<V>> initialP = new ObjectParameter<KMedoidsInitialization<V>>(KMeans.INIT_ID, KMedoidsInitialization.class, PAMInitialMeans.class);
+ ObjectParameter<KMedoidsInitialization<V>> initialP = new ObjectParameter<>(KMeans.INIT_ID, KMedoidsInitialization.class, PAMInitialMeans.class);
if (config.grab(initialP)) {
initializer = initialP.instantiateClass(config);
}
@@ -270,7 +278,7 @@ public class KMedoidsEM<V, D extends NumberDistance<D, ?>> extends AbstractDista
@Override
protected KMedoidsEM<V, D> makeInstance() {
- return new KMedoidsEM<V, D>(distanceFunction, k, maxiter, initializer);
+ return new KMedoidsEM<>(distanceFunction, k, maxiter, initializer);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsInitialization.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsInitialization.java
index 269e7e9e..136a4129 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsInitialization.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsInitialization.java
@@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsPAM.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsPAM.java
index 906501e4..1feda867 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsPAM.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsPAM.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -50,6 +50,7 @@ import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
@@ -124,14 +125,14 @@ public class KMedoidsPAM<V, D extends NumberDistance<D, ?>> extends AbstractDist
*/
public Clustering<MedoidModel> run(Database database, Relation<V> relation) {
if (relation.size() <= 0) {
- return new Clustering<MedoidModel>("k-Medoids Clustering", "kmedoids-clustering");
+ return new Clustering<>("k-Medoids Clustering", "kmedoids-clustering");
}
DistanceQuery<V, D> distQ = database.getDistanceQuery(relation, getDistanceFunction());
DBIDs ids = relation.getDBIDs();
// Choose initial medoids
ArrayModifiableDBIDs medoids = DBIDUtil.newArray(initializer.chooseInitialMedoids(k, distQ));
// Setup cluster assignment store
- List<ModifiableDBIDs> clusters = new ArrayList<ModifiableDBIDs>();
+ List<ModifiableDBIDs> clusters = new ArrayList<>();
for (int i = 0; i < k; i++) {
clusters.add(DBIDUtil.newHashSet(relation.size() / k));
}
@@ -141,9 +142,13 @@ public class KMedoidsPAM<V, D extends NumberDistance<D, ?>> extends AbstractDist
// TODO: reuse this information, from the build phase, when possible?
assignToNearestCluster(medoids, ids, second, clusters, distQ);
+ IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("PAM iteration", LOG) : null;
// Swap phase
boolean changed = true;
while (changed) {
+ if (prog != null) {
+ prog.incrementProcessed(LOG);
+ }
changed = false;
// Try to swap the medoid with a better cluster member:
double best = 0;
@@ -189,6 +194,9 @@ public class KMedoidsPAM<V, D extends NumberDistance<D, ?>> extends AbstractDist
}
}
}
+ if (prog != null) {
+ prog.setCompleted(LOG);
+ }
if (LOG.isDebugging()) {
LOG.debug("Best cost: " + best);
}
@@ -204,10 +212,10 @@ public class KMedoidsPAM<V, D extends NumberDistance<D, ?>> extends AbstractDist
}
// Wrap result
- Clustering<MedoidModel> result = new Clustering<MedoidModel>("k-Medoids Clustering", "kmedoids-clustering");
+ Clustering<MedoidModel> result = new Clustering<>("k-Medoids Clustering", "kmedoids-clustering");
for (int i = 0; i < clusters.size(); i++) {
MedoidModel model = new MedoidModel(medoids.get(i));
- result.addCluster(new Cluster<MedoidModel>(clusters.get(i), model));
+ result.addToplevelCluster(new Cluster<>(clusters.get(i), model));
}
return result;
}
@@ -293,7 +301,7 @@ public class KMedoidsPAM<V, D extends NumberDistance<D, ?>> extends AbstractDist
k = kP.intValue();
}
- ObjectParameter<KMedoidsInitialization<V>> initialP = new ObjectParameter<KMedoidsInitialization<V>>(KMeans.INIT_ID, KMedoidsInitialization.class, PAMInitialMeans.class);
+ ObjectParameter<KMedoidsInitialization<V>> initialP = new ObjectParameter<>(KMeans.INIT_ID, KMedoidsInitialization.class, PAMInitialMeans.class);
if (config.grab(initialP)) {
initializer = initialP.instantiateClass(config);
}
@@ -307,7 +315,7 @@ public class KMedoidsPAM<V, D extends NumberDistance<D, ?>> extends AbstractDist
@Override
protected KMedoidsPAM<V, D> makeInstance() {
- return new KMedoidsPAM<V, D>(distanceFunction, k, maxiter, initializer);
+ return new KMedoidsPAM<>(distanceFunction, k, maxiter, initializer);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/PAMInitialMeans.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/PAMInitialMeans.java
index 1fc7160e..c7e1751f 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/PAMInitialMeans.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/PAMInitialMeans.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -25,6 +25,8 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
import java.util.ArrayList;
import java.util.List;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
@@ -69,16 +71,16 @@ public class PAMInitialMeans<V, D extends NumberDistance<D, ?>> implements KMean
}
@Override
- public List<V> chooseInitialMeans(Relation<V> relation, int k, PrimitiveDistanceFunction<? super V, ?> distanceFunction) {
+ public List<V> chooseInitialMeans(Database database, Relation<V> relation, int k, PrimitiveDistanceFunction<? super NumberVector<?>, ?> distanceFunction) {
// Get a distance query
if(!(distanceFunction.getDistanceFactory() instanceof NumberDistance)) {
throw new AbortException("PAM initialization can only be used with numerical distances.");
}
@SuppressWarnings("unchecked")
final PrimitiveDistanceFunction<? super V, D> distF = (PrimitiveDistanceFunction<? super V, D>) distanceFunction;
- final DistanceQuery<V, D> distQ = relation.getDatabase().getDistanceQuery(relation, distF);
+ final DistanceQuery<V, D> distQ = database.getDistanceQuery(relation, distF);
DBIDs medids = chooseInitialMedoids(k, distQ);
- List<V> medoids = new ArrayList<V>(k);
+ List<V> medoids = new ArrayList<>(k);
for(DBIDIter iter = medids.iter(); iter.valid(); iter.advance()) {
medoids.add(relation.get(iter));
}
@@ -179,7 +181,7 @@ public class PAMInitialMeans<V, D extends NumberDistance<D, ?>> implements KMean
public static class Parameterizer<V, D extends NumberDistance<D, ?>> extends AbstractParameterizer {
@Override
protected PAMInitialMeans<V, D> makeInstance() {
- return new PAMInitialMeans<V, D>();
+ return new PAMInitialMeans<>();
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyChosenInitialMeans.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyChosenInitialMeans.java
index 78e59be7..214f4ce6 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyChosenInitialMeans.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyChosenInitialMeans.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -25,6 +25,8 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
import java.util.ArrayList;
import java.util.List;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
@@ -52,9 +54,9 @@ public class RandomlyChosenInitialMeans<V> extends AbstractKMeansInitialization<
}
@Override
- public List<V> chooseInitialMeans(Relation<V> relation, int k, PrimitiveDistanceFunction<? super V, ?> distanceFunction) {
+ public List<V> chooseInitialMeans(Database database, Relation<V> relation, int k, PrimitiveDistanceFunction<? super NumberVector<?>, ?> distanceFunction) {
DBIDs ids = DBIDUtil.randomSample(relation.getDBIDs(), k, rnd);
- List<V> means = new ArrayList<V>(k);
+ List<V> means = new ArrayList<>(k);
for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
means.add(relation.get(iter));
}
@@ -74,10 +76,9 @@ public class RandomlyChosenInitialMeans<V> extends AbstractKMeansInitialization<
* @apiviz.exclude
*/
public static class Parameterizer<V> extends AbstractKMeansInitialization.Parameterizer<V> {
-
@Override
protected RandomlyChosenInitialMeans<V> makeInstance() {
- return new RandomlyChosenInitialMeans<V>(rnd);
+ return new RandomlyChosenInitialMeans<>(rnd);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyGeneratedInitialMeans.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyGeneratedInitialMeans.java
index 300f5cb0..ee90e0dc 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyGeneratedInitialMeans.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyGeneratedInitialMeans.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -27,6 +27,7 @@ import java.util.List;
import java.util.Random;
import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
@@ -54,11 +55,11 @@ public class RandomlyGeneratedInitialMeans<V extends NumberVector<?>> extends Ab
}
@Override
- public List<V> chooseInitialMeans(Relation<V> relation, int k, PrimitiveDistanceFunction<? super V, ?> distanceFunction) {
+ public List<V> chooseInitialMeans(Database database, Relation<V> relation, int k, PrimitiveDistanceFunction<? super NumberVector<?>, ?> distanceFunction) {
final int dim = RelationUtil.dimensionality(relation);
NumberVector.Factory<V, ?> factory = RelationUtil.getNumberVectorFactory(relation);
Pair<V, V> minmax = DatabaseUtil.computeMinMax(relation);
- List<V> means = new ArrayList<V>(k);
+ List<V> means = new ArrayList<>(k);
final Random random = rnd.getRandom();
for(int i = 0; i < k; i++) {
double[] r = MathUtil.randomDoubleArray(dim, random);
@@ -81,7 +82,7 @@ public class RandomlyGeneratedInitialMeans<V extends NumberVector<?>> extends Ab
public static class Parameterizer<V extends NumberVector<?>> extends AbstractKMeansInitialization.Parameterizer<V> {
@Override
protected RandomlyGeneratedInitialMeans<V> makeInstance() {
- return new RandomlyGeneratedInitialMeans<V>(rnd);
+ return new RandomlyGeneratedInitialMeans<>(rnd);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/SampleKMeansInitialization.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/SampleKMeansInitialization.java
new file mode 100644
index 00000000..9f0a1923
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/SampleKMeansInitialization.java
@@ -0,0 +1,160 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.ArrayList;
+import java.util.List;
+
+import de.lmu.ifi.dbs.elki.data.Cluster;
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.model.MeanModel;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.ProxyDatabase;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.relation.ProxyView;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ChainedParameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+
+/**
+ * Initialize k-means by running k-means on a sample of the data set only.
+ *
+ * @author Erich Schubert
+ *
+ * @param <V> Vector type
+ */
+public class SampleKMeansInitialization<V extends NumberVector<?>, D extends Distance<?>> extends AbstractKMeansInitialization<V> {
+ /**
+ * Variant of kMeans for the bisecting step.
+ */
+ private KMeans<V, D, ?> innerkMeans;
+
+ /**
+ * Sample size.
+ */
+ private double rate;
+
+ /**
+ * Constructor.
+ *
+ * @param rnd Random generator.
+ * @param innerkMeans Inner k-means algorithm.
+ * @param rate Sampling rate.
+ */
+ public SampleKMeansInitialization(RandomFactory rnd, KMeans<V, D, ?> innerkMeans, double rate) {
+ super(rnd);
+ this.innerkMeans = innerkMeans;
+ this.rate = rate;
+ }
+
+ @Override
+ public List<V> chooseInitialMeans(Database database, Relation<V> relation, int k, PrimitiveDistanceFunction<? super NumberVector<?>, ?> distanceFunction) {
+ final int samplesize = (int) Math.ceil(rate * relation.size());
+ final DBIDs sample = DBIDUtil.randomSample(relation.getDBIDs(), samplesize, rnd);
+
+ ProxyView<V> proxyv = new ProxyView<>(database, sample, relation);
+ ProxyDatabase proxydb = new ProxyDatabase(sample, proxyv);
+
+ innerkMeans.setK(k);
+ @SuppressWarnings("unchecked")
+ PrimitiveDistanceFunction<? super NumberVector<?>, D> df = (PrimitiveDistanceFunction<? super NumberVector<?>, D>) distanceFunction;
+ innerkMeans.setDistanceFunction(df);
+ Clustering<? extends MeanModel<V>> clusters = innerkMeans.run(proxydb, proxyv);
+ List<V> means = new ArrayList<>();
+ for (Cluster<? extends MeanModel<V>> cluster : clusters.getAllClusters()) {
+ means.add((V) cluster.getModel().getMean());
+ }
+
+ return means;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ *
+ * @param <V> Vector type
+ * @param <D> Distance type
+ */
+ public static class Parameterizer<V extends NumberVector<?>, D extends Distance<?>> extends AbstractKMeansInitialization.Parameterizer<V> {
+ /**
+ * Parameter to specify the kMeans variant.
+ */
+ public static final OptionID KMEANS_ID = new OptionID("kmeans.algorithm", "KMeans variant to run multiple times.");
+
+ /**
+ * Parameter to specify the sampling rate.
+ */
+ public static final OptionID SAMPLE_ID = new OptionID("kmeans.samplesize", "Sample set size (if > 1) or sampling rante (if < 1).");
+
+ /**
+ * Inner k-means algorithm to use.
+ */
+ protected KMeans<V, D, ?> innerkMeans;
+
+ /**
+ * Sampling rate.
+ */
+ protected double rate;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ ObjectParameter<KMeans<V, D, ?>> kMeansVariantP = new ObjectParameter<>(KMEANS_ID, KMeans.class);
+ if (config.grab(kMeansVariantP)) {
+ ListParameterization kMeansVariantParameters = new ListParameterization();
+
+ // We will always invoke this with k as requested from outside!
+ kMeansVariantParameters.addParameter(KMeans.K_ID, 13);
+ kMeansVariantParameters.addParameter(KMeans.DISTANCE_FUNCTION_ID, SquaredEuclideanDistanceFunction.class);
+
+ ChainedParameterization combinedConfig = new ChainedParameterization(kMeansVariantParameters, config);
+ combinedConfig.errorsTo(config);
+ innerkMeans = kMeansVariantP.instantiateClass(combinedConfig);
+ }
+
+ DoubleParameter sampleP = new DoubleParameter(SAMPLE_ID);
+ if (config.grab(sampleP)) {
+ rate = sampleP.doubleValue();
+ }
+ }
+
+ @Override
+ protected SampleKMeansInitialization<V, D> makeInstance() {
+ return new SampleKMeansInitialization<>(rnd, innerkMeans, rate);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/package-info.java
index 2ce625b0..aa4c3e24 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/package-info.java
@@ -5,7 +5,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2012
+Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/KMeansQualityMeasure.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/KMeansQualityMeasure.java
new file mode 100644
index 00000000..f2de7846
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/KMeansQualityMeasure.java
@@ -0,0 +1,54 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.quality;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.model.MeanModel;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
+
+/**
+ * Interface for computing the quality of a K-Means clustering.
+ *
+ * @author Erich Schubert
+ *
+ * @param <O> Input Object restriction type
+ * @param <D> Distance restriction type
+ */
+public interface KMeansQualityMeasure<O extends NumberVector<?>, D extends Distance<?>> {
+ /**
+ * Calculates and returns the quality measure.
+ *
+ * @param clustering Clustering to analyze
+ * @param distanceFunction Distance function to use (usually Euclidean or
+ * squared Euclidean!)
+ * @param relation Relation for accessing objects
+ * @param <V> Actual vector type (could be a subtype of O!)
+ *
+ * @return quality measure
+ */
+ <V extends O> double calculateCost(Clustering<? extends MeanModel<V>> clustering, PrimitiveDistanceFunction<? super V, ? extends D> distanceFunction, Relation<V> relation);
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/WithinClusterMeanDistanceQualityMeasure.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/WithinClusterMeanDistanceQualityMeasure.java
new file mode 100644
index 00000000..e0ddfff0
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/WithinClusterMeanDistanceQualityMeasure.java
@@ -0,0 +1,89 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.quality;
+
+/*
+ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.List;
+
+import de.lmu.ifi.dbs.elki.data.Cluster;
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.model.MeanModel;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
+
+/**
+ * Class for computing the average overall distance.
+ *
+ * The average of all average pairwise distances in a cluster.
+ *
+ * @author Stephan Baier
+ */
+public class WithinClusterMeanDistanceQualityMeasure implements KMeansQualityMeasure<NumberVector<?>, NumberDistance<?, ?>> {
+ @Override
+ public <V extends NumberVector<?>> double calculateCost(Clustering<? extends MeanModel<V>> clustering, PrimitiveDistanceFunction<? super V, ? extends NumberDistance<?, ?>> distanceFunction, Relation<V> relation) {
+ @SuppressWarnings("unchecked")
+ final List<Cluster<MeanModel<V>>> clusterList = (List<Cluster<MeanModel<V>>>) (List<?>) clustering.getAllClusters();
+
+ if (distanceFunction instanceof PrimitiveDoubleDistanceFunction) {
+ @SuppressWarnings("unchecked")
+ PrimitiveDoubleDistanceFunction<? super V> df = (PrimitiveDoubleDistanceFunction<? super V>) distanceFunction;
+ double clusterDistanceSum = 0;
+ for (Cluster<MeanModel<V>> cluster : clusterList) {
+ DBIDs ids = cluster.getIDs();
+
+ // Compute sum of pairwise distances:
+ double clusterPairwiseDistanceSum = 0;
+ for (DBIDIter iter1 = ids.iter(); iter1.valid(); iter1.advance()) {
+ V obj1 = relation.get(iter1);
+ for (DBIDIter iter2 = ids.iter(); iter2.valid(); iter2.advance()) {
+ clusterPairwiseDistanceSum += df.doubleDistance(obj1, relation.get(iter2));
+ }
+ }
+ clusterDistanceSum += clusterPairwiseDistanceSum / (ids.size() * ids.size());
+ }
+
+ return clusterDistanceSum / clusterList.size();
+ } else {
+ double clusterDistanceSum = 0;
+ for (Cluster<MeanModel<V>> cluster : clusterList) {
+ DBIDs ids = cluster.getIDs();
+
+ // Compute sum of pairwise distances:
+ double clusterPairwiseDistanceSum = 0;
+ for (DBIDIter iter1 = ids.iter(); iter1.valid(); iter1.advance()) {
+ V obj1 = relation.get(iter1);
+ for (DBIDIter iter2 = ids.iter(); iter2.valid(); iter2.advance()) {
+ clusterPairwiseDistanceSum += distanceFunction.distance(obj1, relation.get(iter2)).doubleValue();
+ }
+ }
+ clusterDistanceSum += clusterPairwiseDistanceSum / (ids.size() * ids.size());
+ }
+
+ return clusterDistanceSum / clusterList.size();
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/WithinClusterVarianceQualityMeasure.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/WithinClusterVarianceQualityMeasure.java
new file mode 100644
index 00000000..32ad5210
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/WithinClusterVarianceQualityMeasure.java
@@ -0,0 +1,83 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.quality;
+
+/*
+ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.List;
+
+import de.lmu.ifi.dbs.elki.data.Cluster;
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.model.MeanModel;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
+
+/**
+ * Class for computing the variance in a clustering result (sum-of-squares).
+ *
+ * @author Stephan Baier
+ */
+public class WithinClusterVarianceQualityMeasure implements KMeansQualityMeasure<NumberVector<?>, NumberDistance<?, ?>> {
+ @Override
+ public <V extends NumberVector<?>> double calculateCost(Clustering<? extends MeanModel<V>> clustering, PrimitiveDistanceFunction<? super V, ? extends NumberDistance<?, ?>> distanceFunction, Relation<V> relation) {
+ @SuppressWarnings("unchecked")
+ final List<Cluster<MeanModel<V>>> clusterList = (List<Cluster<MeanModel<V>>>) (List<?>) clustering.getAllClusters();
+
+ boolean squared = (distanceFunction instanceof SquaredEuclideanDistanceFunction);
+ if (distanceFunction instanceof PrimitiveDoubleDistanceFunction) {
+ @SuppressWarnings("unchecked")
+ PrimitiveDoubleDistanceFunction<? super V> df = (PrimitiveDoubleDistanceFunction<? super V>) distanceFunction;
+ double variance = 0.0;
+ for (Cluster<MeanModel<V>> cluster : clusterList) {
+ DBIDs ids = cluster.getIDs();
+ V mean = cluster.getModel().getMean();
+
+ for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ double dist = df.doubleDistance(relation.get(iter), mean);
+ if (squared) {
+ variance += dist;
+ } else {
+ variance += dist * dist;
+ }
+ }
+ }
+ return variance;
+ } else {
+ double variance = 0.0;
+ for (Cluster<MeanModel<V>> cluster : clusterList) {
+ DBIDs ids = cluster.getIDs();
+ V mean = cluster.getModel().getMean();
+
+ for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ double dist = distanceFunction.distance(relation.get(iter), mean).doubleValue();
+ variance += dist * dist;
+ }
+ }
+ return variance;
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/package-info.java
new file mode 100644
index 00000000..ed9a528d
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/package-info.java
@@ -0,0 +1,4 @@
+/**
+ * Quality measures for k-Means results.
+ */
+package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.quality; \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/package-info.java
index 4ba1ce09..26fb3024 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/package-info.java
@@ -19,7 +19,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2012
+Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/CLIQUE.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/CLIQUE.java
index 37b3eb57..db026e93 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/CLIQUE.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/CLIQUE.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.subspace;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -172,7 +172,7 @@ public class CLIQUE<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
if(LOG.isVerbose()) {
LOG.verbose("*** 1. Identification of subspaces that contain clusters ***");
}
- SortedMap<Integer, List<CLIQUESubspace<V>>> dimensionToDenseSubspaces = new TreeMap<Integer, List<CLIQUESubspace<V>>>();
+ SortedMap<Integer, List<CLIQUESubspace<V>>> dimensionToDenseSubspaces = new TreeMap<>();
List<CLIQUESubspace<V>> denseSubspaces = findOneDimensionalDenseSubspaces(relation);
dimensionToDenseSubspaces.put(Integer.valueOf(0), denseSubspaces);
if(LOG.isVerbose()) {
@@ -204,7 +204,7 @@ public class CLIQUE<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
}
// build result
int numClusters = 1;
- Clustering<SubspaceModel<V>> result = new Clustering<SubspaceModel<V>>("CLIQUE clustering", "clique-clustering");
+ Clustering<SubspaceModel<V>> result = new Clustering<>("CLIQUE clustering", "clique-clustering");
for(Integer dim : dimensionToDenseSubspaces.keySet()) {
List<CLIQUESubspace<V>> subspaces = dimensionToDenseSubspaces.get(dim);
List<Pair<Subspace, ModifiableDBIDs>> modelsAndClusters = determineClusters(subspaces);
@@ -214,10 +214,10 @@ public class CLIQUE<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
}
for(Pair<Subspace, ModifiableDBIDs> modelAndCluster : modelsAndClusters) {
- Cluster<SubspaceModel<V>> newCluster = new Cluster<SubspaceModel<V>>(modelAndCluster.second);
- newCluster.setModel(new SubspaceModel<V>(modelAndCluster.first, Centroid.make(relation, modelAndCluster.second).toVector(relation)));
+ Cluster<SubspaceModel<V>> newCluster = new Cluster<>(modelAndCluster.second);
+ newCluster.setModel(new SubspaceModel<>(modelAndCluster.first, Centroid.make(relation, modelAndCluster.second).toVector(relation)));
newCluster.setName("cluster_" + numClusters++);
- result.addCluster(newCluster);
+ result.addToplevelCluster(newCluster);
}
}
@@ -233,7 +233,7 @@ public class CLIQUE<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
* cluster models
*/
private List<Pair<Subspace, ModifiableDBIDs>> determineClusters(List<CLIQUESubspace<V>> denseSubspaces) {
- List<Pair<Subspace, ModifiableDBIDs>> clusters = new ArrayList<Pair<Subspace, ModifiableDBIDs>>();
+ List<Pair<Subspace, ModifiableDBIDs>> clusters = new ArrayList<>();
for(CLIQUESubspace<V> subspace : denseSubspaces) {
List<Pair<Subspace, ModifiableDBIDs>> clustersInSubspace = subspace.determineClusters();
@@ -339,7 +339,7 @@ public class CLIQUE<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
}
// build the 1 dimensional units
- List<CLIQUEUnit<V>> units = new ArrayList<CLIQUEUnit<V>>((xsi * dimensionality));
+ List<CLIQUEUnit<V>> units = new ArrayList<>((xsi * dimensionality));
for(int x = 0; x < xsi; x++) {
for(int d = 0; d < dimensionality; d++) {
units.add(new CLIQUEUnit<V>(new Interval(d, unit_bounds[x][d], unit_bounds[x + 1][d])));
@@ -396,8 +396,8 @@ public class CLIQUE<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
}
}
- Collection<CLIQUEUnit<V>> denseUnits = new ArrayList<CLIQUEUnit<V>>();
- Map<Integer, CLIQUESubspace<V>> denseSubspaces = new HashMap<Integer, CLIQUESubspace<V>>();
+ Collection<CLIQUEUnit<V>> denseUnits = new ArrayList<>();
+ Map<Integer, CLIQUESubspace<V>> denseSubspaces = new HashMap<>();
for(CLIQUEUnit<V> unit : units) {
// unit is a dense unit
if(unit.selectivity(total) >= tau) {
@@ -406,7 +406,7 @@ public class CLIQUE<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
int dim = unit.getIntervals().iterator().next().getDimension();
CLIQUESubspace<V> subspace_d = denseSubspaces.get(Integer.valueOf(dim));
if(subspace_d == null) {
- subspace_d = new CLIQUESubspace<V>(dim);
+ subspace_d = new CLIQUESubspace<>(dim);
denseSubspaces.put(Integer.valueOf(dim), subspace_d);
}
subspace_d.addDenseUnit(unit);
@@ -420,7 +420,7 @@ public class CLIQUE<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
LOG.debugFine(msg.toString());
}
- List<CLIQUESubspace<V>> subspaceCandidates = new ArrayList<CLIQUESubspace<V>>(denseSubspaces.values());
+ List<CLIQUESubspace<V>> subspaceCandidates = new ArrayList<>(denseSubspaces.values());
Collections.sort(subspaceCandidates, new CLIQUESubspace.CoverageComparator());
return subspaceCandidates;
}
@@ -436,12 +436,12 @@ public class CLIQUE<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
*/
private List<CLIQUESubspace<V>> findDenseSubspaceCandidates(Relation<V> database, List<CLIQUESubspace<V>> denseSubspaces) {
// sort (k-1)-dimensional dense subspace according to their dimensions
- List<CLIQUESubspace<V>> denseSubspacesByDimensions = new ArrayList<CLIQUESubspace<V>>(denseSubspaces);
+ List<CLIQUESubspace<V>> denseSubspacesByDimensions = new ArrayList<>(denseSubspaces);
Collections.sort(denseSubspacesByDimensions, new Subspace.DimensionComparator());
// determine k-dimensional dense subspace candidates
double all = database.size();
- List<CLIQUESubspace<V>> denseSubspaceCandidates = new ArrayList<CLIQUESubspace<V>>();
+ List<CLIQUESubspace<V>> denseSubspaceCandidates = new ArrayList<>();
while(!denseSubspacesByDimensions.isEmpty()) {
CLIQUESubspace<V> s1 = denseSubspacesByDimensions.remove(0);
@@ -614,7 +614,7 @@ public class CLIQUE<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
@Override
protected CLIQUE<V> makeInstance() {
- return new CLIQUE<V>(xsi, tau, prune);
+ return new CLIQUE<>(xsi, tau, prune);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DiSH.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DiSH.java
index a3496a0e..b17ebebb 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DiSH.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DiSH.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.subspace;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -62,7 +62,8 @@ import de.lmu.ifi.dbs.elki.result.optics.ClusterOrderEntry;
import de.lmu.ifi.dbs.elki.result.optics.ClusterOrderResult;
import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil;
import de.lmu.ifi.dbs.elki.utilities.FormatUtil;
-import de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy.HierarchyReferenceLists;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy.Hierarchy;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy.Hierarchy.Iter;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
@@ -238,29 +239,29 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
}
// build the hierarchy
- buildHierarchy(database, distFunc, clusters, dimensionality);
+ Clustering<SubspaceModel<V>> clustering = new Clustering<>("DiSH clustering", "dish-clustering");
+ buildHierarchy(database, distFunc, clustering, clusters, dimensionality);
if (LOG.isVerbose()) {
StringBuilder msg = new StringBuilder("Step 4: build hierarchy");
for (Cluster<SubspaceModel<V>> c : clusters) {
msg.append('\n').append(FormatUtil.format(dimensionality, c.getModel().getDimensions())).append(" ids ").append(c.size());
- for (Cluster<SubspaceModel<V>> cluster : c.getParents()) {
- msg.append("\n parent ").append(cluster);
+ for (Iter<Cluster<SubspaceModel<V>>> iter = clustering.getClusterHierarchy().iterParents(c); iter.valid(); iter.advance()) {
+ msg.append("\n parent ").append(iter.get());
}
- for (Cluster<SubspaceModel<V>> cluster : c.getChildren()) {
- msg.append("\n child ").append(cluster);
+ for (Iter<Cluster<SubspaceModel<V>>> iter = clustering.getClusterHierarchy().iterChildren(c); iter.valid(); iter.advance()) {
+ msg.append("\n child ").append(iter.get());
}
}
LOG.verbose(msg.toString());
}
// build result
- Clustering<SubspaceModel<V>> result = new Clustering<SubspaceModel<V>>("DiSH clustering", "dish-clustering");
for (Cluster<SubspaceModel<V>> c : clusters) {
- if (c.getParents() == null || c.getParents().isEmpty()) {
- result.addCluster(c);
+ if (clustering.getClusterHierarchy().numParents(c) == 0) {
+ clustering.addToplevelCluster(c);
}
}
- return result;
+ return clustering;
}
/**
@@ -274,9 +275,9 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
private Map<BitSet, List<Pair<BitSet, ArrayModifiableDBIDs>>> extractClusters(Relation<V> database, DiSHDistanceFunction.Instance<V> distFunc, ClusterOrderResult<PreferenceVectorBasedCorrelationDistance> clusterOrder) {
FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Extract Clusters", database.size(), LOG) : null;
int processed = 0;
- Map<BitSet, List<Pair<BitSet, ArrayModifiableDBIDs>>> clustersMap = new HashMap<BitSet, List<Pair<BitSet, ArrayModifiableDBIDs>>>();
- Map<DBID, ClusterOrderEntry<PreferenceVectorBasedCorrelationDistance>> entryMap = new HashMap<DBID, ClusterOrderEntry<PreferenceVectorBasedCorrelationDistance>>();
- Map<DBID, Pair<BitSet, ArrayModifiableDBIDs>> entryToClusterMap = new HashMap<DBID, Pair<BitSet, ArrayModifiableDBIDs>>();
+ Map<BitSet, List<Pair<BitSet, ArrayModifiableDBIDs>>> clustersMap = new HashMap<>();
+ Map<DBID, ClusterOrderEntry<PreferenceVectorBasedCorrelationDistance>> entryMap = new HashMap<>();
+ Map<DBID, Pair<BitSet, ArrayModifiableDBIDs>> entryToClusterMap = new HashMap<>();
for (Iterator<ClusterOrderEntry<PreferenceVectorBasedCorrelationDistance>> it = clusterOrder.iterator(); it.hasNext();) {
ClusterOrderEntry<PreferenceVectorBasedCorrelationDistance> entry = it.next();
entryMap.put(entry.getID(), entry);
@@ -287,7 +288,7 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
// get the list of (parallel) clusters for the preference vector
List<Pair<BitSet, ArrayModifiableDBIDs>> parallelClusters = clustersMap.get(preferenceVector);
if (parallelClusters == null) {
- parallelClusters = new ArrayList<Pair<BitSet, ArrayModifiableDBIDs>>();
+ parallelClusters = new ArrayList<>();
clustersMap.put(preferenceVector, parallelClusters);
}
@@ -305,7 +306,7 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
}
}
if (cluster == null) {
- cluster = new Pair<BitSet, ArrayModifiableDBIDs>(preferenceVector, DBIDUtil.newArray());
+ cluster = new Pair<>(preferenceVector, DBIDUtil.newArray());
parallelClusters.add(cluster);
}
cluster.second.add(entry.getID());
@@ -373,15 +374,13 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
private List<Cluster<SubspaceModel<V>>> sortClusters(Relation<V> database, Map<BitSet, List<Pair<BitSet, ArrayModifiableDBIDs>>> clustersMap) {
final int db_dim = RelationUtil.dimensionality(database);
// int num = 1;
- List<Cluster<SubspaceModel<V>>> clusters = new ArrayList<Cluster<SubspaceModel<V>>>();
+ List<Cluster<SubspaceModel<V>>> clusters = new ArrayList<>();
for (BitSet pv : clustersMap.keySet()) {
List<Pair<BitSet, ArrayModifiableDBIDs>> parallelClusters = clustersMap.get(pv);
for (int i = 0; i < parallelClusters.size(); i++) {
Pair<BitSet, ArrayModifiableDBIDs> c = parallelClusters.get(i);
- Cluster<SubspaceModel<V>> cluster = new Cluster<SubspaceModel<V>>(c.second);
- cluster.setModel(new SubspaceModel<V>(new Subspace(c.first), Centroid.make(database, c.second).toVector(database)));
- cluster.setHierarchy(new HierarchyReferenceLists<Cluster<SubspaceModel<V>>>(cluster, new ArrayList<Cluster<SubspaceModel<V>>>(), new ArrayList<Cluster<SubspaceModel<V>>>()));
- // cluster.setName("Cluster_" + num++);
+ Cluster<SubspaceModel<V>> cluster = new Cluster<>(c.second);
+ cluster.setModel(new SubspaceModel<>(new Subspace(c.first), Centroid.make(database, c.second).toVector(database)));
String subspace = FormatUtil.format(cluster.getModel().getSubspace().getDimensions(), db_dim, "");
if (parallelClusters.size() > 1) {
cluster.setName("Cluster_" + subspace + "_" + i);
@@ -415,9 +414,9 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
private void checkClusters(Relation<V> database, DiSHDistanceFunction.Instance<V> distFunc, Map<BitSet, List<Pair<BitSet, ArrayModifiableDBIDs>>> clustersMap, int minpts) {
// check if there are clusters < minpts
// and add them to not assigned
- List<Pair<BitSet, ArrayModifiableDBIDs>> notAssigned = new ArrayList<Pair<BitSet, ArrayModifiableDBIDs>>();
- Map<BitSet, List<Pair<BitSet, ArrayModifiableDBIDs>>> newClustersMap = new HashMap<BitSet, List<Pair<BitSet, ArrayModifiableDBIDs>>>();
- Pair<BitSet, ArrayModifiableDBIDs> noise = new Pair<BitSet, ArrayModifiableDBIDs>(new BitSet(), DBIDUtil.newArray());
+ List<Pair<BitSet, ArrayModifiableDBIDs>> notAssigned = new ArrayList<>();
+ Map<BitSet, List<Pair<BitSet, ArrayModifiableDBIDs>>> newClustersMap = new HashMap<>();
+ Pair<BitSet, ArrayModifiableDBIDs> noise = new Pair<>(new BitSet(), DBIDUtil.newArray());
for (BitSet pv : clustersMap.keySet()) {
// noise
if (pv.cardinality() == 0) {
@@ -429,7 +428,7 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
// clusters
else {
List<Pair<BitSet, ArrayModifiableDBIDs>> parallelClusters = clustersMap.get(pv);
- List<Pair<BitSet, ArrayModifiableDBIDs>> newParallelClusters = new ArrayList<Pair<BitSet, ArrayModifiableDBIDs>>(parallelClusters.size());
+ List<Pair<BitSet, ArrayModifiableDBIDs>> newParallelClusters = new ArrayList<>(parallelClusters.size());
for (Pair<BitSet, ArrayModifiableDBIDs> c : parallelClusters) {
if (!pv.equals(new BitSet()) && c.second.size() < minpts) {
notAssigned.add(c);
@@ -456,7 +455,7 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
}
}
- List<Pair<BitSet, ArrayModifiableDBIDs>> noiseList = new ArrayList<Pair<BitSet, ArrayModifiableDBIDs>>(1);
+ List<Pair<BitSet, ArrayModifiableDBIDs>> noiseList = new ArrayList<>(1);
noiseList.add(noise);
clustersMap.put(noise.first, noiseList);
}
@@ -510,13 +509,15 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
* Builds the cluster hierarchy.
*
* @param distFunc the distance function
+ * @param clustering Clustering we process
* @param clusters the sorted list of clusters
* @param dimensionality the dimensionality of the data
* @param database the database containing the data objects
*/
- private void buildHierarchy(Relation<V> database, DiSHDistanceFunction.Instance<V> distFunc, List<Cluster<SubspaceModel<V>>> clusters, int dimensionality) {
+ private void buildHierarchy(Relation<V> database, DiSHDistanceFunction.Instance<V> distFunc, Clustering<SubspaceModel<V>> clustering, List<Cluster<SubspaceModel<V>>> clusters, int dimensionality) {
StringBuilder msg = new StringBuilder();
final int db_dim = RelationUtil.dimensionality(database);
+ Hierarchy<Cluster<SubspaceModel<V>>> hier = clustering.getClusterHierarchy();
for (int i = 0; i < clusters.size() - 1; i++) {
Cluster<SubspaceModel<V>> c_i = clusters.get(i);
@@ -536,9 +537,8 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
// noise level reached
if (c_j.getModel().getSubspace().dimensionality() == 0) {
// no parents exists -> parent is noise
- if (c_i.getParents().isEmpty()) {
- c_j.getChildren().add(c_i);
- c_i.getParents().add(c_j);
+ if (hier.numParents(c_i) == 0) {
+ clustering.addChildCluster(c_j, c_i);
if (LOG.isDebugging()) {
msg.append("\n [").append(FormatUtil.format(db_dim, c_j.getModel().getSubspace().getDimensions()));
msg.append("] is parent of [").append(FormatUtil.format(db_dim, c_i.getModel().getSubspace().getDimensions()));
@@ -560,9 +560,8 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
if (d <= 2 * epsilon) {
// no parent exists or c_j is not a parent of the already
// existing parents
- if (c_i.getParents().isEmpty() || !isParent(database, distFunc, c_j, c_i.getParents())) {
- c_j.getChildren().add(c_i);
- c_i.getParents().add(c_j);
+ if (hier.numParents(c_i) == 0 || !isParent(database, distFunc, c_j, hier.iterParents(c_i))) {
+ clustering.addChildCluster(c_j, c_i);
if (LOG.isDebugging()) {
msg.append("\n [").append(FormatUtil.format(db_dim, c_j.getModel().getSubspace().getDimensions()));
msg.append("] is parent of [");
@@ -591,16 +590,17 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
* @param distFunc the distance function for distance computation between the
* clusters
* @param parent the parent to be tested
- * @param children the list of children to be tested
+ * @param iter the list of children to be tested
* @return true, if the specified parent cluster is a parent of one child of
* the children clusters, false otherwise
*/
- private boolean isParent(Relation<V> database, DiSHDistanceFunction.Instance<V> distFunc, Cluster<SubspaceModel<V>> parent, List<Cluster<SubspaceModel<V>>> children) {
+ private boolean isParent(Relation<V> database, DiSHDistanceFunction.Instance<V> distFunc, Cluster<SubspaceModel<V>> parent, Iter<Cluster<SubspaceModel<V>>> iter) {
V parent_centroid = ProjectedCentroid.make(parent.getModel().getDimensions(), database, parent.getIDs()).toVector(database);
int dimensionality = RelationUtil.dimensionality(database);
int subspaceDim_parent = dimensionality - parent.getModel().getSubspace().dimensionality();
- for (Cluster<SubspaceModel<V>> child : children) {
+ for (; iter.valid(); iter.advance()) {
+ Cluster<SubspaceModel<V>> child = iter.get();
V child_centroid = ProjectedCentroid.make(child.getModel().getDimensions(), database, child.getIDs()).toVector(database);
PreferenceVectorBasedCorrelationDistance distance = distFunc.correlationDistance(parent_centroid, child_centroid, parent.getModel().getSubspace().getDimensions(), child.getModel().getSubspace().getDimensions());
if (distance.getCorrelationValue() == subspaceDim_parent) {
@@ -699,7 +699,7 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
@Override
protected DiSH<V> makeInstance() {
- return new DiSH<V>(epsilon, dishDistance, opticsO);
+ return new DiSH<>(epsilon, dishDistance, opticsO);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/HiSC.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/HiSC.java
index 58f3acef..9ac7c072 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/HiSC.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/HiSC.java
@@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.subspace;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2012
+Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -119,7 +119,7 @@ public class HiSC<V extends NumberVector<?>> extends OPTICS<V, PreferenceVectorB
@Override
protected HiSC<V> makeInstance() {
- return new HiSC<V>(distanceFunction);
+ return new HiSC<>(distanceFunction);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PROCLUS.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PROCLUS.java
index ef49ff10..92158734 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PROCLUS.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PROCLUS.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.subspace;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -49,13 +49,13 @@ import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.DistanceDBIDPair;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDList;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult;
import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultUtil;
import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
@@ -150,13 +150,13 @@ public class PROCLUS<V extends NumberVector<?>> extends AbstractProjectedCluster
RangeQuery<V, DoubleDistance> rangeQuery = database.getRangeQuery(distFunc);
final Random random = rnd.getRandom();
- if(RelationUtil.dimensionality(relation) < l) {
+ if (RelationUtil.dimensionality(relation) < l) {
throw new IllegalStateException("Dimensionality of data < parameter l! " + "(" + RelationUtil.dimensionality(relation) + " < " + l + ")");
}
// TODO: use a StepProgress!
// initialization phase
- if(LOG.isVerbose()) {
+ if (LOG.isVerbose()) {
LOG.verbose("1. Initialization phase...");
}
int sampleSize = Math.min(relation.size(), k_i * k);
@@ -165,7 +165,7 @@ public class PROCLUS<V extends NumberVector<?>> extends AbstractProjectedCluster
int medoidSize = Math.min(relation.size(), m_i * k);
DBIDs medoids = greedy(distFunc, sampleSet, medoidSize, random);
- if(LOG.isDebugging()) {
+ if (LOG.isDebugging()) {
StringBuilder msg = new StringBuilder();
msg.append('\n');
msg.append("sampleSize ").append(sampleSize).append('\n');
@@ -176,7 +176,7 @@ public class PROCLUS<V extends NumberVector<?>> extends AbstractProjectedCluster
}
// iterative phase
- if(LOG.isVerbose()) {
+ if (LOG.isVerbose()) {
LOG.verbose("2. Iterative phase...");
}
double bestObjective = Double.POSITIVE_INFINITY;
@@ -184,7 +184,7 @@ public class PROCLUS<V extends NumberVector<?>> extends AbstractProjectedCluster
ModifiableDBIDs m_bad = null;
ModifiableDBIDs m_current = initialSet(medoids, k, random);
- if(LOG.isDebugging()) {
+ if (LOG.isDebugging()) {
StringBuilder msg = new StringBuilder();
msg.append('\n');
msg.append("m_c ").append(m_current).append('\n');
@@ -196,12 +196,12 @@ public class PROCLUS<V extends NumberVector<?>> extends AbstractProjectedCluster
// TODO: Use DataStore and Trove for performance
Map<DBID, PROCLUSCluster> clusters = null;
int loops = 0;
- while(loops < 10) {
+ while (loops < 10) {
Map<DBID, TIntSet> dimensions = findDimensions(m_current, relation, distFunc, rangeQuery);
clusters = assignPoints(dimensions, relation);
double objectiveFunction = evaluateClusters(clusters, dimensions, relation);
- if(objectiveFunction < bestObjective) {
+ if (objectiveFunction < bestObjective) {
// restart counting loops
loops = 0;
bestObjective = objectiveFunction;
@@ -211,32 +211,32 @@ public class PROCLUS<V extends NumberVector<?>> extends AbstractProjectedCluster
m_current = computeM_current(medoids, m_best, m_bad, random);
loops++;
- if(cprogress != null) {
+ if (cprogress != null) {
cprogress.setProcessed(clusters.size(), LOG);
}
}
- if(cprogress != null) {
+ if (cprogress != null) {
cprogress.setCompleted(LOG);
}
// refinement phase
- if(LOG.isVerbose()) {
+ if (LOG.isVerbose()) {
LOG.verbose("3. Refinement phase...");
}
- List<Pair<V, TIntSet>> dimensions = findDimensions(new ArrayList<PROCLUSCluster>(clusters.values()), relation);
+ List<Pair<V, TIntSet>> dimensions = findDimensions(new ArrayList<>(clusters.values()), relation);
List<PROCLUSCluster> finalClusters = finalAssignment(dimensions, relation);
// build result
int numClusters = 1;
- Clustering<SubspaceModel<V>> result = new Clustering<SubspaceModel<V>>("ProClus clustering", "proclus-clustering");
- for(PROCLUSCluster c : finalClusters) {
- Cluster<SubspaceModel<V>> cluster = new Cluster<SubspaceModel<V>>(c.objectIDs);
- cluster.setModel(new SubspaceModel<V>(new Subspace(c.getDimensions()), c.centroid));
+ Clustering<SubspaceModel<V>> result = new Clustering<>("ProClus clustering", "proclus-clustering");
+ for (PROCLUSCluster c : finalClusters) {
+ Cluster<SubspaceModel<V>> cluster = new Cluster<>(c.objectIDs);
+ cluster.setModel(new SubspaceModel<>(new Subspace(c.getDimensions()), c.centroid));
cluster.setName("cluster_" + numClusters++);
- result.addCluster(cluster);
+ result.addToplevelCluster(cluster);
}
return result;
}
@@ -257,22 +257,22 @@ public class PROCLUS<V extends NumberVector<?>> extends AbstractProjectedCluster
// m_1 is random point of S
DBID m_i = s.remove(random.nextInt(s.size()));
medoids.add(m_i);
- if(LOG.isDebugging()) {
+ if (LOG.isDebugging()) {
LOG.debugFiner("medoids " + medoids);
}
// compute distances between each point in S and m_i
// FIXME: don't use maps, so we can work with DBIDRef
- Map<DBID, DistanceDBIDPair<DoubleDistance>> distances = new HashMap<DBID, DistanceDBIDPair<DoubleDistance>>();
- for(DBIDIter iter = s.iter(); iter.valid(); iter.advance()) {
+ Map<DBID, DistanceDBIDPair<DoubleDistance>> distances = new HashMap<>();
+ for (DBIDIter iter = s.iter(); iter.valid(); iter.advance()) {
DBID id = DBIDUtil.deref(iter);
DoubleDistance dist = distFunc.distance(id, m_i);
distances.put(id, DBIDUtil.newDistancePair(dist, id));
}
- for(int i = 1; i < m; i++) {
- // choose medoid m_i to be far from prevois medoids
- List<DistanceDBIDPair<DoubleDistance>> d = new ArrayList<DistanceDBIDPair<DoubleDistance>>(distances.values());
+ for (int i = 1; i < m; i++) {
+ // choose medoid m_i to be far from previous medoids
+ List<DistanceDBIDPair<DoubleDistance>> d = new ArrayList<>(distances.values());
DistanceDBIDResultUtil.sortByDistance(d);
m_i = DBIDUtil.deref(d.get(d.size() - 1));
@@ -281,7 +281,7 @@ public class PROCLUS<V extends NumberVector<?>> extends AbstractProjectedCluster
distances.remove(m_i);
// compute distances of each point to closest medoid
- for(DBIDIter iter = s.iter(); iter.valid(); iter.advance()) {
+ for (DBIDIter iter = s.iter(); iter.valid(); iter.advance()) {
DBID id = DBIDUtil.deref(iter);
DoubleDistance dist_new = distFunc.distance(id, m_i);
DoubleDistance dist_old = distances.get(id).getDistance();
@@ -290,7 +290,7 @@ public class PROCLUS<V extends NumberVector<?>> extends AbstractProjectedCluster
distances.put(id, DBIDUtil.newDistancePair(dist, id));
}
- if(LOG.isDebugging()) {
+ if (LOG.isDebugging()) {
LOG.debugFiner("medoids " + medoids);
}
}
@@ -309,7 +309,7 @@ public class PROCLUS<V extends NumberVector<?>> extends AbstractProjectedCluster
private ModifiableDBIDs initialSet(DBIDs sampleSet, int k, Random random) {
ArrayModifiableDBIDs s = DBIDUtil.newArray(sampleSet);
ModifiableDBIDs initialSet = DBIDUtil.newHashSet();
- while(initialSet.size() < k) {
+ while (initialSet.size() < k) {
DBID next = s.remove(random.nextInt(s.size()));
initialSet.add(next);
}
@@ -330,16 +330,15 @@ public class PROCLUS<V extends NumberVector<?>> extends AbstractProjectedCluster
m_list.removeDBIDs(m_best);
ModifiableDBIDs m_current = DBIDUtil.newHashSet();
- for(DBIDIter iter = m_best.iter(); iter.valid(); iter.advance()) {
+ for (DBIDIter iter = m_best.iter(); iter.valid(); iter.advance()) {
DBID m_i = DBIDUtil.deref(iter);
- if(m_bad.contains(m_i)) {
+ if (m_bad.contains(m_i)) {
int currentSize = m_current.size();
- while(m_current.size() == currentSize) {
+ while (m_current.size() == currentSize) {
DBID next = m_list.remove(random.nextInt(m_list.size()));
m_current.add(next);
}
- }
- else {
+ } else {
m_current.add(m_i);
}
}
@@ -358,28 +357,28 @@ public class PROCLUS<V extends NumberVector<?>> extends AbstractProjectedCluster
* @param distFunc the distance function
* @return a mapping of the medoid's id to its locality
*/
- private Map<DBID, DistanceDBIDResult<DoubleDistance>> getLocalities(DBIDs medoids, Relation<V> database, DistanceQuery<V, DoubleDistance> distFunc, RangeQuery<V, DoubleDistance> rangeQuery) {
- Map<DBID, DistanceDBIDResult<DoubleDistance>> result = new HashMap<DBID, DistanceDBIDResult<DoubleDistance>>();
+ private Map<DBID, DistanceDBIDList<DoubleDistance>> getLocalities(DBIDs medoids, Relation<V> database, DistanceQuery<V, DoubleDistance> distFunc, RangeQuery<V, DoubleDistance> rangeQuery) {
+ Map<DBID, DistanceDBIDList<DoubleDistance>> result = new HashMap<>();
- for(DBIDIter iter = medoids.iter(); iter.valid(); iter.advance()) {
+ for (DBIDIter iter = medoids.iter(); iter.valid(); iter.advance()) {
DBID m = DBIDUtil.deref(iter);
// determine minimum distance between current medoid m and any other
// medoid m_i
DoubleDistance minDist = null;
- for(DBIDIter iter2 = medoids.iter(); iter2.valid(); iter2.advance()) {
+ for (DBIDIter iter2 = medoids.iter(); iter2.valid(); iter2.advance()) {
DBID m_i = DBIDUtil.deref(iter2);
- if(DBIDUtil.equal(m_i, m)) {
+ if (DBIDUtil.equal(m_i, m)) {
continue;
}
DoubleDistance currentDist = distFunc.distance(m, m_i);
- if(minDist == null || currentDist.compareTo(minDist) < 0) {
+ if (minDist == null || currentDist.compareTo(minDist) < 0) {
minDist = currentDist;
}
}
// determine points in sphere centered at m with radius minDist
assert minDist != null;
- DistanceDBIDResult<DoubleDistance> qr = rangeQuery.getRangeForDBID(m, minDist);
+ DistanceDBIDList<DoubleDistance> qr = rangeQuery.getRangeForDBID(m, minDist);
result.put(m, qr);
}
@@ -398,32 +397,32 @@ public class PROCLUS<V extends NumberVector<?>> extends AbstractProjectedCluster
*/
private Map<DBID, TIntSet> findDimensions(DBIDs medoids, Relation<V> database, DistanceQuery<V, DoubleDistance> distFunc, RangeQuery<V, DoubleDistance> rangeQuery) {
// get localities
- Map<DBID, DistanceDBIDResult<DoubleDistance>> localities = getLocalities(medoids, database, distFunc, rangeQuery);
+ Map<DBID, DistanceDBIDList<DoubleDistance>> localities = getLocalities(medoids, database, distFunc, rangeQuery);
// compute x_ij = avg distance from points in l_i to medoid m_i
int dim = RelationUtil.dimensionality(database);
- Map<DBID, double[]> averageDistances = new HashMap<DBID, double[]>();
+ Map<DBID, double[]> averageDistances = new HashMap<>();
- for(DBIDIter iter = medoids.iter(); iter.valid(); iter.advance()) {
+ for (DBIDIter iter = medoids.iter(); iter.valid(); iter.advance()) {
DBID m_i = DBIDUtil.deref(iter);
V medoid_i = database.get(m_i);
- DistanceDBIDResult<DoubleDistance> l_i = localities.get(m_i);
+ DistanceDBIDList<DoubleDistance> l_i = localities.get(m_i);
double[] x_i = new double[dim];
- for(DBIDIter qr = l_i.iter(); qr.valid(); qr.advance()) {
+ for (DBIDIter qr = l_i.iter(); qr.valid(); qr.advance()) {
V o = database.get(qr);
- for(int d = 0; d < dim; d++) {
+ for (int d = 0; d < dim; d++) {
x_i[d] += Math.abs(medoid_i.doubleValue(d) - o.doubleValue(d));
}
}
- for(int d = 0; d < dim; d++) {
+ for (int d = 0; d < dim; d++) {
x_i[d] /= l_i.size();
}
averageDistances.put(m_i, x_i);
}
- Map<DBID, TIntSet> dimensionMap = new HashMap<DBID, TIntSet>();
- List<CTriple<Double, DBID, Integer>> z_ijs = new ArrayList<CTriple<Double, DBID, Integer>>();
- for(DBIDIter iter = medoids.iter(); iter.valid(); iter.advance()) {
+ Map<DBID, TIntSet> dimensionMap = new HashMap<>();
+ List<CTriple<Double, DBID, Integer>> z_ijs = new ArrayList<>();
+ for (DBIDIter iter = medoids.iter(); iter.valid(); iter.advance()) {
DBID m_i = DBIDUtil.deref(iter);
TIntSet dims_i = new TIntHashSet();
dimensionMap.put(m_i, dims_i);
@@ -431,33 +430,33 @@ public class PROCLUS<V extends NumberVector<?>> extends AbstractProjectedCluster
double[] x_i = averageDistances.get(m_i);
// y_i
double y_i = 0;
- for(int j = 0; j < dim; j++) {
+ for (int j = 0; j < dim; j++) {
y_i += x_i[j];
}
y_i /= dim;
// sigma_i
double sigma_i = 0;
- for(int j = 0; j < dim; j++) {
+ for (int j = 0; j < dim; j++) {
double diff = x_i[j] - y_i;
sigma_i += diff * diff;
}
sigma_i /= (dim - 1);
sigma_i = Math.sqrt(sigma_i);
- for(int j = 0; j < dim; j++) {
- z_ijs.add(new CTriple<Double, DBID, Integer>((x_i[j] - y_i) / sigma_i, m_i, j));
+ for (int j = 0; j < dim; j++) {
+ z_ijs.add(new CTriple<>((x_i[j] - y_i) / sigma_i, m_i, j));
}
}
Collections.sort(z_ijs);
int max = Math.max(k * l, 2);
- for(int m = 0; m < max; m++) {
+ for (int m = 0; m < max; m++) {
CTriple<Double, DBID, Integer> z_ij = z_ijs.get(m);
TIntSet dims_i = dimensionMap.get(z_ij.getSecond());
dims_i.add(z_ij.getThird());
- if(LOG.isDebugging()) {
+ if (LOG.isDebugging()) {
StringBuilder msg = new StringBuilder();
msg.append('\n');
msg.append("z_ij ").append(z_ij).append('\n');
@@ -480,61 +479,61 @@ public class PROCLUS<V extends NumberVector<?>> extends AbstractProjectedCluster
private List<Pair<V, TIntSet>> findDimensions(List<PROCLUSCluster> clusters, Relation<V> database) {
// compute x_ij = avg distance from points in c_i to c_i.centroid
int dim = RelationUtil.dimensionality(database);
- Map<Integer, double[]> averageDistances = new HashMap<Integer, double[]>();
+ Map<Integer, double[]> averageDistances = new HashMap<>();
- for(int i = 0; i < clusters.size(); i++) {
+ for (int i = 0; i < clusters.size(); i++) {
PROCLUSCluster c_i = clusters.get(i);
double[] x_i = new double[dim];
- for(DBIDIter iter = c_i.objectIDs.iter(); iter.valid(); iter.advance()) {
+ for (DBIDIter iter = c_i.objectIDs.iter(); iter.valid(); iter.advance()) {
V o = database.get(iter);
- for(int d = 0; d < dim; d++) {
+ for (int d = 0; d < dim; d++) {
x_i[d] += Math.abs(c_i.centroid.doubleValue(d) - o.doubleValue(d));
}
}
- for(int d = 0; d < dim; d++) {
+ for (int d = 0; d < dim; d++) {
x_i[d] /= c_i.objectIDs.size();
}
averageDistances.put(i, x_i);
}
- List<CTriple<Double, Integer, Integer>> z_ijs = new ArrayList<CTriple<Double, Integer, Integer>>();
- for(int i = 0; i < clusters.size(); i++) {
+ List<CTriple<Double, Integer, Integer>> z_ijs = new ArrayList<>();
+ for (int i = 0; i < clusters.size(); i++) {
double[] x_i = averageDistances.get(i);
// y_i
double y_i = 0;
- for(int j = 0; j < dim; j++) {
+ for (int j = 0; j < dim; j++) {
y_i += x_i[j];
}
y_i /= dim;
// sigma_i
double sigma_i = 0;
- for(int j = 0; j < dim; j++) {
+ for (int j = 0; j < dim; j++) {
double diff = x_i[j] - y_i;
sigma_i += diff * diff;
}
sigma_i /= (dim - 1);
sigma_i = Math.sqrt(sigma_i);
- for(int j = 0; j < dim; j++) {
- z_ijs.add(new CTriple<Double, Integer, Integer>((x_i[j] - y_i) / sigma_i, i, j));
+ for (int j = 0; j < dim; j++) {
+ z_ijs.add(new CTriple<>((x_i[j] - y_i) / sigma_i, i, j));
}
}
Collections.sort(z_ijs);
// mapping cluster index -> dimensions
- Map<Integer, TIntSet> dimensionMap = new HashMap<Integer, TIntSet>();
+ Map<Integer, TIntSet> dimensionMap = new HashMap<>();
int max = Math.max(k * l, 2);
- for(int m = 0; m < max; m++) {
+ for (int m = 0; m < max; m++) {
CTriple<Double, Integer, Integer> z_ij = z_ijs.get(m);
TIntSet dims_i = dimensionMap.get(z_ij.getSecond());
- if(dims_i == null) {
+ if (dims_i == null) {
dims_i = new TIntHashSet();
dimensionMap.put(z_ij.getSecond(), dims_i);
}
dims_i.add(z_ij.getThird());
- if(LOG.isDebugging()) {
+ if (LOG.isDebugging()) {
StringBuilder msg = new StringBuilder();
msg.append('\n');
msg.append("z_ij ").append(z_ij).append('\n');
@@ -544,11 +543,11 @@ public class PROCLUS<V extends NumberVector<?>> extends AbstractProjectedCluster
}
// mapping cluster -> dimensions
- List<Pair<V, TIntSet>> result = new ArrayList<Pair<V, TIntSet>>();
- for(int i : dimensionMap.keySet()) {
+ List<Pair<V, TIntSet>> result = new ArrayList<>();
+ for (int i : dimensionMap.keySet()) {
TIntSet dims_i = dimensionMap.get(i);
PROCLUSCluster c_i = clusters.get(i);
- result.add(new Pair<V, TIntSet>(c_i.centroid, dims_i));
+ result.add(new Pair<>(c_i.centroid, dims_i));
}
return result;
}
@@ -562,19 +561,19 @@ public class PROCLUS<V extends NumberVector<?>> extends AbstractProjectedCluster
* @return the assignments of the object to the clusters
*/
private Map<DBID, PROCLUSCluster> assignPoints(Map<DBID, TIntSet> dimensions, Relation<V> database) {
- Map<DBID, ModifiableDBIDs> clusterIDs = new HashMap<DBID, ModifiableDBIDs>();
- for(DBID m_i : dimensions.keySet()) {
+ Map<DBID, ModifiableDBIDs> clusterIDs = new HashMap<>();
+ for (DBID m_i : dimensions.keySet()) {
clusterIDs.put(m_i, DBIDUtil.newHashSet());
}
- for(DBIDIter it = database.iterDBIDs(); it.valid(); it.advance()) {
+ for (DBIDIter it = database.iterDBIDs(); it.valid(); it.advance()) {
DBID p_id = DBIDUtil.deref(it);
V p = database.get(p_id);
DistanceDBIDPair<DoubleDistance> minDist = null;
- for(DBID m_i : dimensions.keySet()) {
+ for (DBID m_i : dimensions.keySet()) {
V m = database.get(m_i);
DistanceDBIDPair<DoubleDistance> currentDist = DBIDUtil.newDistancePair(manhattanSegmentalDistance(p, m, dimensions.get(m_i)), m_i);
- if(minDist == null || currentDist.compareByDistance(minDist) < 0) {
+ if (minDist == null || currentDist.compareByDistance(minDist) < 0) {
minDist = currentDist;
}
}
@@ -584,17 +583,17 @@ public class PROCLUS<V extends NumberVector<?>> extends AbstractProjectedCluster
ids.add(p_id);
}
- Map<DBID, PROCLUSCluster> clusters = new HashMap<DBID, PROCLUSCluster>();
- for(DBID m_i : dimensions.keySet()) {
+ Map<DBID, PROCLUSCluster> clusters = new HashMap<>();
+ for (DBID m_i : dimensions.keySet()) {
ModifiableDBIDs objectIDs = clusterIDs.get(m_i);
- if(!objectIDs.isEmpty()) {
+ if (!objectIDs.isEmpty()) {
TIntSet clusterDimensions = dimensions.get(m_i);
V centroid = Centroid.make(database, objectIDs).toVector(database);
clusters.put(m_i, new PROCLUSCluster(objectIDs, clusterDimensions, centroid));
}
}
- if(LOG.isDebugging()) {
+ if (LOG.isDebugging()) {
StringBuilder msg = new StringBuilder();
msg.append('\n');
msg.append("clusters ").append(clusters).append('\n');
@@ -612,22 +611,22 @@ public class PROCLUS<V extends NumberVector<?>> extends AbstractProjectedCluster
* @return the assignments of the object to the clusters
*/
private List<PROCLUSCluster> finalAssignment(List<Pair<V, TIntSet>> dimensions, Relation<V> database) {
- Map<Integer, ModifiableDBIDs> clusterIDs = new HashMap<Integer, ModifiableDBIDs>();
- for(int i = 0; i < dimensions.size(); i++) {
+ Map<Integer, ModifiableDBIDs> clusterIDs = new HashMap<>();
+ for (int i = 0; i < dimensions.size(); i++) {
clusterIDs.put(i, DBIDUtil.newHashSet());
}
- for(DBIDIter it = database.iterDBIDs(); it.valid(); it.advance()) {
+ for (DBIDIter it = database.iterDBIDs(); it.valid(); it.advance()) {
DBID p_id = DBIDUtil.deref(it);
V p = database.get(p_id);
Pair<DoubleDistance, Integer> minDist = null;
- for(int i = 0; i < dimensions.size(); i++) {
+ for (int i = 0; i < dimensions.size(); i++) {
Pair<V, TIntSet> pair_i = dimensions.get(i);
V c_i = pair_i.first;
TIntSet dimensions_i = pair_i.second;
DoubleDistance currentDist = manhattanSegmentalDistance(p, c_i, dimensions_i);
- if(minDist == null || currentDist.compareTo(minDist.first) < 0) {
- minDist = new Pair<DoubleDistance, Integer>(currentDist, i);
+ if (minDist == null || currentDist.compareTo(minDist.first) < 0) {
+ minDist = new Pair<>(currentDist, i);
}
}
// add p to cluster with mindist
@@ -636,17 +635,17 @@ public class PROCLUS<V extends NumberVector<?>> extends AbstractProjectedCluster
ids.add(p_id);
}
- List<PROCLUSCluster> clusters = new ArrayList<PROCLUSCluster>();
- for(int i = 0; i < dimensions.size(); i++) {
+ List<PROCLUSCluster> clusters = new ArrayList<>();
+ for (int i = 0; i < dimensions.size(); i++) {
ModifiableDBIDs objectIDs = clusterIDs.get(i);
- if(!objectIDs.isEmpty()) {
+ if (!objectIDs.isEmpty()) {
TIntSet clusterDimensions = dimensions.get(i).second;
V centroid = Centroid.make(database, objectIDs).toVector(database);
clusters.add(new PROCLUSCluster(objectIDs, clusterDimensions, centroid));
}
}
- if(LOG.isDebugging()) {
+ if (LOG.isDebugging()) {
StringBuilder msg = new StringBuilder();
msg.append('\n');
msg.append("clusters ").append(clusters).append('\n');
@@ -667,7 +666,7 @@ public class PROCLUS<V extends NumberVector<?>> extends AbstractProjectedCluster
*/
private DoubleDistance manhattanSegmentalDistance(V o1, V o2, TIntSet dimensions) {
double result = 0;
- for (TIntIterator iter = dimensions.iterator(); iter.hasNext(); ) {
+ for (TIntIterator iter = dimensions.iterator(); iter.hasNext();) {
final int d = iter.next();
result += Math.abs(o1.doubleValue(d) - o2.doubleValue(d));
}
@@ -685,13 +684,13 @@ public class PROCLUS<V extends NumberVector<?>> extends AbstractProjectedCluster
*/
private double evaluateClusters(Map<DBID, PROCLUSCluster> clusters, Map<DBID, TIntSet> dimensions, Relation<V> database) {
double result = 0;
- for(DBID m_i : clusters.keySet()) {
+ for (DBID m_i : clusters.keySet()) {
PROCLUSCluster c_i = clusters.get(m_i);
V centroid_i = c_i.centroid;
TIntSet dims_i = dimensions.get(m_i);
double w_i = 0;
- for (TIntIterator iter = dims_i.iterator(); iter.hasNext(); ) {
+ for (TIntIterator iter = dims_i.iterator(); iter.hasNext();) {
final int j = iter.next();
w_i += avgDistance(centroid_i, c_i.objectIDs, database, j);
}
@@ -716,7 +715,7 @@ public class PROCLUS<V extends NumberVector<?>> extends AbstractProjectedCluster
*/
private double avgDistance(V centroid, DBIDs objectIDs, Relation<V> database, int dimension) {
Mean avg = new Mean();
- for(DBIDIter iter = objectIDs.iter(); iter.valid(); iter.advance()) {
+ for (DBIDIter iter = objectIDs.iter(); iter.valid(); iter.advance()) {
V o = database.get(iter);
avg.put(Math.abs(centroid.doubleValue(dimension) - o.doubleValue(dimension)));
}
@@ -733,9 +732,9 @@ public class PROCLUS<V extends NumberVector<?>> extends AbstractProjectedCluster
*/
private ModifiableDBIDs computeBadMedoids(Map<DBID, PROCLUSCluster> clusters, int threshold) {
ModifiableDBIDs badMedoids = DBIDUtil.newHashSet();
- for(DBID m_i : clusters.keySet()) {
+ for (DBID m_i : clusters.keySet()) {
PROCLUSCluster c_i = clusters.get(m_i);
- if(c_i.objectIDs.size() < threshold) {
+ if (c_i.objectIDs.size() < threshold) {
badMedoids.add(m_i);
}
}
@@ -791,11 +790,10 @@ public class PROCLUS<V extends NumberVector<?>> extends AbstractProjectedCluster
StringBuilder result = new StringBuilder();
result.append("Dimensions: [");
boolean notFirst = false;
- for(TIntIterator iter = dimensions.iterator(); iter.hasNext(); ) {
- if(notFirst) {
+ for (TIntIterator iter = dimensions.iterator(); iter.hasNext();) {
+ if (notFirst) {
result.append(',');
- }
- else {
+ } else {
notFirst = true;
}
result.append(iter.next());
@@ -813,7 +811,7 @@ public class PROCLUS<V extends NumberVector<?>> extends AbstractProjectedCluster
*/
public BitSet getDimensions() {
BitSet result = new BitSet();
- for(TIntIterator iter = dimensions.iterator(); iter.hasNext(); ) {
+ for (TIntIterator iter = dimensions.iterator(); iter.hasNext();) {
result.set(iter.next());
}
return result;
@@ -847,19 +845,19 @@ public class PROCLUS<V extends NumberVector<?>> extends AbstractProjectedCluster
IntParameter m_iP = new IntParameter(M_I_ID, 10);
m_iP.addConstraint(new GreaterConstraint(0));
- if(config.grab(m_iP)) {
+ if (config.grab(m_iP)) {
m_i = m_iP.getValue();
}
RandomParameter rndP = new RandomParameter(SEED_ID);
- if(config.grab(rndP)) {
+ if (config.grab(rndP)) {
rnd = rndP.getValue();
}
}
@Override
protected PROCLUS<V> makeInstance() {
- return new PROCLUS<V>(k, k_i, l, m_i, rnd);
+ return new PROCLUS<>(k, k_i, l, m_i, rnd);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PreDeCon.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PreDeCon.java
index fc3228eb..4e670974 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PreDeCon.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PreDeCon.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.subspace;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -111,7 +111,7 @@ public class PreDeCon<V extends NumberVector<?>> extends AbstractProjectedDBSCAN
@Override
protected PreDeCon<V> makeInstance() {
- return new PreDeCon<V>(epsilon, minpts, outerdist, lambda);
+ return new PreDeCon<>(epsilon, minpts, outerdist, lambda);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SUBCLU.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SUBCLU.java
index 46c5f0b8..c8d0833e 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SUBCLU.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SUBCLU.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.subspace;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -43,7 +43,7 @@ import de.lmu.ifi.dbs.elki.database.ProxyDatabase;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.AbstractDimensionsSelectingDoubleDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.DimensionSelectingSubspaceDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
@@ -105,7 +105,7 @@ public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
/**
* Parameter to specify the maximum radius of the neighborhood to be
* considered, must be suitable to
- * {@link AbstractDimensionsSelectingDoubleDistanceFunction}.
+ * {@link DimensionSelectingSubspaceDistanceFunction}.
* <p>
* Key: {@code -subclu.epsilon}
* </p>
@@ -125,7 +125,7 @@ public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
* Holds the instance of the distance function specified by
* {@link #DISTANCE_FUNCTION_ID}.
*/
- private AbstractDimensionsSelectingDoubleDistanceFunction<V> distanceFunction;
+ private DimensionSelectingSubspaceDistanceFunction<V, DoubleDistance> distanceFunction;
/**
* Holds the value of {@link #EPSILON_ID}.
@@ -149,7 +149,7 @@ public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
* @param epsilon Epsilon value
* @param minpts Minpts value
*/
- public SUBCLU(AbstractDimensionsSelectingDoubleDistanceFunction<V> distanceFunction, DoubleDistance epsilon, int minpts) {
+ public SUBCLU(DimensionSelectingSubspaceDistanceFunction<V, DoubleDistance> distanceFunction, DoubleDistance epsilon, int minpts) {
super();
this.distanceFunction = distanceFunction;
this.epsilon = epsilon;
@@ -168,49 +168,49 @@ public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
StepProgress stepprog = LOG.isVerbose() ? new StepProgress(dimensionality) : null;
// Generate all 1-dimensional clusters
- if(stepprog != null) {
+ if (stepprog != null) {
stepprog.beginStep(1, "Generate all 1-dimensional clusters.", LOG);
}
// mapping of dimensionality to set of subspaces
- HashMap<Integer, List<Subspace>> subspaceMap = new HashMap<Integer, List<Subspace>>();
+ HashMap<Integer, List<Subspace>> subspaceMap = new HashMap<>();
// list of 1-dimensional subspaces containing clusters
- List<Subspace> s_1 = new ArrayList<Subspace>();
+ List<Subspace> s_1 = new ArrayList<>();
subspaceMap.put(0, s_1);
// mapping of subspaces to list of clusters
- TreeMap<Subspace, List<Cluster<Model>>> clusterMap = new TreeMap<Subspace, List<Cluster<Model>>>(new Subspace.DimensionComparator());
+ TreeMap<Subspace, List<Cluster<Model>>> clusterMap = new TreeMap<>(new Subspace.DimensionComparator());
- for(int d = 0; d < dimensionality; d++) {
+ for (int d = 0; d < dimensionality; d++) {
Subspace currentSubspace = new Subspace(d);
List<Cluster<Model>> clusters = runDBSCAN(relation, null, currentSubspace);
- if(LOG.isDebuggingFiner()) {
+ if (LOG.isDebuggingFiner()) {
StringBuilder msg = new StringBuilder();
msg.append('\n').append(clusters.size()).append(" clusters in subspace ").append(currentSubspace.dimensonsToString()).append(": \n");
- for(Cluster<Model> cluster : clusters) {
+ for (Cluster<Model> cluster : clusters) {
msg.append(" " + cluster.getIDs() + "\n");
}
LOG.debugFiner(msg.toString());
}
- if(!clusters.isEmpty()) {
+ if (!clusters.isEmpty()) {
s_1.add(currentSubspace);
clusterMap.put(currentSubspace, clusters);
}
}
// Generate (d+1)-dimensional clusters from d-dimensional clusters
- for(int d = 0; d < dimensionality - 1; d++) {
- if(stepprog != null) {
+ for (int d = 0; d < dimensionality - 1; d++) {
+ if (stepprog != null) {
stepprog.beginStep(d + 2, "Generate " + (d + 2) + "-dimensional clusters from " + (d + 1) + "-dimensional clusters.", LOG);
}
List<Subspace> subspaces = subspaceMap.get(d);
- if(subspaces == null || subspaces.isEmpty()) {
- if(stepprog != null) {
- for(int dim = d + 1; dim < dimensionality - 1; dim++) {
+ if (subspaces == null || subspaces.isEmpty()) {
+ if (stepprog != null) {
+ for (int dim = d + 1; dim < dimensionality - 1; dim++) {
stepprog.beginStep(dim + 2, "Generation of" + (dim + 2) + "-dimensional clusters not applicable, because no more " + (d + 2) + "-dimensional subspaces found.", LOG);
}
}
@@ -218,57 +218,57 @@ public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
}
List<Subspace> candidates = generateSubspaceCandidates(subspaces);
- List<Subspace> s_d = new ArrayList<Subspace>();
+ List<Subspace> s_d = new ArrayList<>();
- for(Subspace candidate : candidates) {
+ for (Subspace candidate : candidates) {
Subspace bestSubspace = bestSubspace(subspaces, candidate, clusterMap);
- if(LOG.isDebuggingFine()) {
+ if (LOG.isDebuggingFine()) {
LOG.debugFine("best subspace of " + candidate.dimensonsToString() + ": " + bestSubspace.dimensonsToString());
}
List<Cluster<Model>> bestSubspaceClusters = clusterMap.get(bestSubspace);
- List<Cluster<Model>> clusters = new ArrayList<Cluster<Model>>();
- for(Cluster<Model> cluster : bestSubspaceClusters) {
+ List<Cluster<Model>> clusters = new ArrayList<>();
+ for (Cluster<Model> cluster : bestSubspaceClusters) {
List<Cluster<Model>> candidateClusters = runDBSCAN(relation, cluster.getIDs(), candidate);
- if(!candidateClusters.isEmpty()) {
+ if (!candidateClusters.isEmpty()) {
clusters.addAll(candidateClusters);
}
}
- if(LOG.isDebuggingFine()) {
+ if (LOG.isDebuggingFine()) {
StringBuilder msg = new StringBuilder();
msg.append(clusters.size() + " cluster(s) in subspace " + candidate + ": \n");
- for(Cluster<Model> c : clusters) {
+ for (Cluster<Model> c : clusters) {
msg.append(" " + c.getIDs() + "\n");
}
LOG.debugFine(msg.toString());
}
- if(!clusters.isEmpty()) {
+ if (!clusters.isEmpty()) {
s_d.add(candidate);
clusterMap.put(candidate, clusters);
}
}
- if(!s_d.isEmpty()) {
+ if (!s_d.isEmpty()) {
subspaceMap.put(d + 1, s_d);
}
}
// build result
int numClusters = 1;
- result = new Clustering<SubspaceModel<V>>("SUBCLU clustering", "subclu-clustering");
- for(Subspace subspace : clusterMap.descendingKeySet()) {
+ result = new Clustering<>("SUBCLU clustering", "subclu-clustering");
+ for (Subspace subspace : clusterMap.descendingKeySet()) {
List<Cluster<Model>> clusters = clusterMap.get(subspace);
- for(Cluster<Model> cluster : clusters) {
- Cluster<SubspaceModel<V>> newCluster = new Cluster<SubspaceModel<V>>(cluster.getIDs());
- newCluster.setModel(new SubspaceModel<V>(subspace, Centroid.make(relation, cluster.getIDs()).toVector(relation)));
+ for (Cluster<Model> cluster : clusters) {
+ Cluster<SubspaceModel<V>> newCluster = new Cluster<>(cluster.getIDs());
+ newCluster.setModel(new SubspaceModel<>(subspace, Centroid.make(relation, cluster.getIDs()).toVector(relation)));
newCluster.setName("cluster_" + numClusters++);
- result.addCluster(newCluster);
+ result.addToplevelCluster(newCluster);
}
}
- if(stepprog != null) {
+ if (stepprog != null) {
stepprog.setCompleted(LOG);
}
return result;
@@ -300,7 +300,7 @@ public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
distanceFunction.setSelectedDimensions(subspace.getDimensions());
ProxyDatabase proxy;
- if(ids == null) {
+ if (ids == null) {
// TODO: in this case, we might want to use an index - the proxy below
// will prevent this!
ids = relation.getDBIDs();
@@ -308,18 +308,18 @@ public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
proxy = new ProxyDatabase(ids, relation);
- DBSCAN<V, DoubleDistance> dbscan = new DBSCAN<V, DoubleDistance>(distanceFunction, epsilon, minpts);
+ DBSCAN<V, DoubleDistance> dbscan = new DBSCAN<>(distanceFunction, epsilon, minpts);
// run DBSCAN
- if(LOG.isVerbose()) {
+ if (LOG.isVerbose()) {
LOG.verbose("\nRun DBSCAN on subspace " + subspace.dimensonsToString());
}
Clustering<Model> dbsres = dbscan.run(proxy);
// separate cluster and noise
List<Cluster<Model>> clusterAndNoise = dbsres.getAllClusters();
- List<Cluster<Model>> clusters = new ArrayList<Cluster<Model>>();
- for(Cluster<Model> c : clusterAndNoise) {
- if(!c.isNoise()) {
+ List<Cluster<Model>> clusters = new ArrayList<>();
+ for (Cluster<Model> c : clusterAndNoise) {
+ if (!c.isNoise()) {
clusters.add(c);
}
}
@@ -334,9 +334,9 @@ public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
* @return the {@code d+1}-dimensional subspace candidates
*/
private List<Subspace> generateSubspaceCandidates(List<Subspace> subspaces) {
- List<Subspace> candidates = new ArrayList<Subspace>();
+ List<Subspace> candidates = new ArrayList<>();
- if(subspaces.isEmpty()) {
+ if (subspaces.isEmpty()) {
return candidates;
}
@@ -344,46 +344,46 @@ public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
int d = subspaces.get(0).dimensionality();
StringBuilder msgFine = new StringBuilder("\n");
- if(LOG.isDebuggingFiner()) {
+ if (LOG.isDebuggingFiner()) {
msgFine.append("subspaces ").append(subspaces).append('\n');
}
- for(int i = 0; i < subspaces.size(); i++) {
+ for (int i = 0; i < subspaces.size(); i++) {
Subspace s1 = subspaces.get(i);
- for(int j = i + 1; j < subspaces.size(); j++) {
+ for (int j = i + 1; j < subspaces.size(); j++) {
Subspace s2 = subspaces.get(j);
Subspace candidate = s1.join(s2);
- if(candidate != null) {
- if(LOG.isDebuggingFiner()) {
+ if (candidate != null) {
+ if (LOG.isDebuggingFiner()) {
msgFine.append("candidate: ").append(candidate.dimensonsToString()).append('\n');
}
// prune irrelevant candidate subspaces
List<Subspace> lowerSubspaces = lowerSubspaces(candidate);
- if(LOG.isDebuggingFiner()) {
+ if (LOG.isDebuggingFiner()) {
msgFine.append("lowerSubspaces: ").append(lowerSubspaces).append('\n');
}
boolean irrelevantCandidate = false;
- for(Subspace s : lowerSubspaces) {
- if(!subspaces.contains(s)) {
+ for (Subspace s : lowerSubspaces) {
+ if (!subspaces.contains(s)) {
irrelevantCandidate = true;
break;
}
}
- if(!irrelevantCandidate) {
+ if (!irrelevantCandidate) {
candidates.add(candidate);
}
}
}
}
- if(LOG.isDebuggingFiner()) {
+ if (LOG.isDebuggingFiner()) {
LOG.debugFiner(msgFine.toString());
}
- if(LOG.isDebugging()) {
+ if (LOG.isDebugging()) {
StringBuilder msg = new StringBuilder();
msg.append(d + 1).append("-dimensional candidate subspaces: ");
- for(Subspace candidate : candidates) {
+ for (Subspace candidate : candidates) {
msg.append(candidate.dimensonsToString()).append(' ');
}
LOG.debug(msg.toString());
@@ -401,14 +401,14 @@ public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
*/
private List<Subspace> lowerSubspaces(Subspace subspace) {
int dimensionality = subspace.dimensionality();
- if(dimensionality <= 1) {
+ if (dimensionality <= 1) {
return null;
}
// order result according to the dimensions
- List<Subspace> result = new ArrayList<Subspace>();
+ List<Subspace> result = new ArrayList<>();
BitSet dimensions = subspace.getDimensions();
- for(int dim = dimensions.nextSetBit(0); dim >= 0; dim = dimensions.nextSetBit(dim + 1)) {
+ for (int dim = dimensions.nextSetBit(0); dim >= 0; dim = dimensions.nextSetBit(dim + 1)) {
BitSet newDimensions = (BitSet) dimensions.clone();
newDimensions.set(dim, false);
result.add(new Subspace(newDimensions));
@@ -432,14 +432,14 @@ public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
private Subspace bestSubspace(List<Subspace> subspaces, Subspace candidate, TreeMap<Subspace, List<Cluster<Model>>> clusterMap) {
Subspace bestSubspace = null;
- for(Subspace subspace : subspaces) {
+ for (Subspace subspace : subspaces) {
int min = Integer.MAX_VALUE;
- if(subspace.isSubspace(candidate)) {
+ if (subspace.isSubspace(candidate)) {
List<Cluster<Model>> clusters = clusterMap.get(subspace);
- for(Cluster<Model> cluster : clusters) {
+ for (Cluster<Model> cluster : clusters) {
int clusterSize = cluster.size();
- if(clusterSize < min) {
+ if (clusterSize < min) {
min = clusterSize;
bestSubspace = subspace;
}
@@ -472,31 +472,31 @@ public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
protected DoubleDistance epsilon = null;
- protected AbstractDimensionsSelectingDoubleDistanceFunction<V> distance = null;
+ protected DimensionSelectingSubspaceDistanceFunction<V, DoubleDistance> distance = null;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- ObjectParameter<AbstractDimensionsSelectingDoubleDistanceFunction<V>> param = new ObjectParameter<AbstractDimensionsSelectingDoubleDistanceFunction<V>>(DISTANCE_FUNCTION_ID, AbstractDimensionsSelectingDoubleDistanceFunction.class, SubspaceEuclideanDistanceFunction.class);
- if(config.grab(param)) {
+ ObjectParameter<DimensionSelectingSubspaceDistanceFunction<V, DoubleDistance>> param = new ObjectParameter<>(DISTANCE_FUNCTION_ID, DimensionSelectingSubspaceDistanceFunction.class, SubspaceEuclideanDistanceFunction.class);
+ if (config.grab(param)) {
distance = param.instantiateClass(config);
}
- DistanceParameter<DoubleDistance> epsilonP = new DistanceParameter<DoubleDistance>(EPSILON_ID, distance);
- if(config.grab(epsilonP)) {
+ DistanceParameter<DoubleDistance> epsilonP = new DistanceParameter<>(EPSILON_ID, distance);
+ if (config.grab(epsilonP)) {
epsilon = epsilonP.getValue();
}
IntParameter minptsP = new IntParameter(MINPTS_ID);
minptsP.addConstraint(new GreaterConstraint(0));
- if(config.grab(minptsP)) {
+ if (config.grab(minptsP)) {
minpts = minptsP.getValue();
}
}
@Override
protected SUBCLU<V> makeInstance() {
- return new SUBCLU<V>(distance, epsilon, minpts);
+ return new SUBCLU<>(distance, epsilon, minpts);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SubspaceClusteringAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SubspaceClusteringAlgorithm.java
index 17eb3c19..561816bd 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SubspaceClusteringAlgorithm.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SubspaceClusteringAlgorithm.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.subspace;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUESubspace.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUESubspace.java
index 6b22b233..50e3fcd5 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUESubspace.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUESubspace.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.clique;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -65,7 +65,7 @@ public class CLIQUESubspace<V extends NumberVector<?>> extends Subspace {
*/
public CLIQUESubspace(int dimension) {
super(dimension);
- denseUnits = new ArrayList<CLIQUEUnit<V>>();
+ denseUnits = new ArrayList<>();
coverage = 0;
}
@@ -76,7 +76,7 @@ public class CLIQUESubspace<V extends NumberVector<?>> extends Subspace {
*/
public CLIQUESubspace(BitSet dimensions) {
super(dimensions);
- denseUnits = new ArrayList<CLIQUEUnit<V>>();
+ denseUnits = new ArrayList<>();
coverage = 0;
}
@@ -104,12 +104,12 @@ public class CLIQUESubspace<V extends NumberVector<?>> extends Subspace {
* @return the clusters in this subspace and the corresponding cluster models
*/
public List<Pair<Subspace, ModifiableDBIDs>> determineClusters() {
- List<Pair<Subspace, ModifiableDBIDs>> clusters = new ArrayList<Pair<Subspace, ModifiableDBIDs>>();
+ List<Pair<Subspace, ModifiableDBIDs>> clusters = new ArrayList<>();
for(CLIQUEUnit<V> unit : getDenseUnits()) {
if(!unit.isAssigned()) {
ModifiableDBIDs cluster = DBIDUtil.newHashSet();
- CLIQUESubspace<V> model = new CLIQUESubspace<V>(getDimensions());
+ CLIQUESubspace<V> model = new CLIQUESubspace<>(getDimensions());
clusters.add(new Pair<Subspace, ModifiableDBIDs>(model, cluster));
dfs(unit, cluster, model);
}
@@ -217,7 +217,7 @@ public class CLIQUESubspace<V extends NumberVector<?>> extends Subspace {
return null;
}
- CLIQUESubspace<V> s = new CLIQUESubspace<V>(dimensions);
+ CLIQUESubspace<V> s = new CLIQUESubspace<>(dimensions);
for(CLIQUEUnit<V> u1 : this.getDenseUnits()) {
for(CLIQUEUnit<V> u2 : other.getDenseUnits()) {
CLIQUEUnit<V> u = u1.join(u2, all, tau);
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUEUnit.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUEUnit.java
index 70f251c9..a71b2b67 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUEUnit.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUEUnit.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.clique;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -77,7 +77,7 @@ public class CLIQUEUnit<V extends NumberVector<?>> {
public CLIQUEUnit(SortedSet<Interval> intervals, ModifiableDBIDs ids) {
this.intervals = intervals;
- dimensionToInterval = new TIntObjectHashMap<Interval>();
+ dimensionToInterval = new TIntObjectHashMap<>();
for(Interval interval : intervals) {
dimensionToInterval.put(interval.getDimension(), interval);
}
@@ -93,10 +93,10 @@ public class CLIQUEUnit<V extends NumberVector<?>> {
* @param interval the interval belonging to this unit
*/
public CLIQUEUnit(Interval interval) {
- intervals = new TreeSet<Interval>();
+ intervals = new TreeSet<>();
intervals.add(interval);
- dimensionToInterval = new TIntObjectHashMap<Interval>();
+ dimensionToInterval = new TIntObjectHashMap<>();
dimensionToInterval.put(interval.getDimension(), interval);
ids = DBIDUtil.newHashSet();
@@ -254,7 +254,7 @@ public class CLIQUEUnit<V extends NumberVector<?>> {
Iterator<Interval> it1 = this.intervals.iterator();
Iterator<Interval> it2 = other.intervals.iterator();
- SortedSet<Interval> resultIntervals = new TreeSet<Interval>();
+ SortedSet<Interval> resultIntervals = new TreeSet<>();
for(int i = 0; i < this.intervals.size() - 1; i++) {
i1 = it1.next();
i2 = it2.next();
@@ -270,7 +270,7 @@ public class CLIQUEUnit<V extends NumberVector<?>> {
resultIDs.retainAll(other.ids);
if(resultIDs.size() / all >= tau) {
- return new CLIQUEUnit<V>(resultIntervals, resultIDs);
+ return new CLIQUEUnit<>(resultIntervals, resultIDs);
}
return null;
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/package-info.java
index 7a686190..7acd7572 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/package-info.java
@@ -7,7 +7,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2012
+Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/package-info.java
index 2a1eb930..2efa038d 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/package-info.java
@@ -10,7 +10,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2012
+Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelClustering.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelClustering.java
index af8fb1ea..3b5d0ec2 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelClustering.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelClustering.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.trivial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -144,7 +144,7 @@ public class ByLabelClustering extends AbstractAlgorithm<Clustering<Model>> impl
HashMap<String, DBIDs> labelMap = multiple ? multipleAssignment(relation) : singleAssignment(relation);
ModifiableDBIDs noiseids = DBIDUtil.newArray();
- Clustering<Model> result = new Clustering<Model>("By Label Clustering", "bylabel-clustering");
+ Clustering<Model> result = new Clustering<>("By Label Clustering", "bylabel-clustering");
for(Entry<String, DBIDs> entry : labelMap.entrySet()) {
DBIDs ids = entry.getValue();
if(ids.size() <= 1) {
@@ -156,13 +156,13 @@ public class ByLabelClustering extends AbstractAlgorithm<Clustering<Model>> impl
if(noisepattern != null && noisepattern.matcher(entry.getKey()).find()) {
c.setNoise(true);
}
- result.addCluster(c);
+ result.addToplevelCluster(c);
}
// Collected noise IDs.
if(noiseids.size() > 0) {
Cluster<Model> c = new Cluster<Model>("Noise", noiseids, ClusterModel.CLUSTER);
c.setNoise(true);
- result.addCluster(c);
+ result.addToplevelCluster(c);
}
return result;
}
@@ -175,7 +175,7 @@ public class ByLabelClustering extends AbstractAlgorithm<Clustering<Model>> impl
* @return a mapping of labels to ids
*/
private HashMap<String, DBIDs> singleAssignment(Relation<?> data) {
- HashMap<String, DBIDs> labelMap = new HashMap<String, DBIDs>();
+ HashMap<String, DBIDs> labelMap = new HashMap<>();
for(DBIDIter iditer = data.iterDBIDs(); iditer.valid(); iditer.advance()) {
final Object val = data.get(iditer);
@@ -193,7 +193,7 @@ public class ByLabelClustering extends AbstractAlgorithm<Clustering<Model>> impl
* @return a mapping of labels to ids
*/
private HashMap<String, DBIDs> multipleAssignment(Relation<?> data) {
- HashMap<String, DBIDs> labelMap = new HashMap<String, DBIDs>();
+ HashMap<String, DBIDs> labelMap = new HashMap<>();
for(DBIDIter iditer = data.iterDBIDs(); iditer.valid(); iditer.advance()) {
String[] labels = data.get(iditer).toString().split(" ");
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelHierarchicalClustering.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelHierarchicalClustering.java
index dfb7d37f..33101221 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelHierarchicalClustering.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelHierarchicalClustering.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.trivial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -88,8 +88,7 @@ public class ByLabelHierarchicalClustering extends AbstractAlgorithm<Clustering<
try {
Relation<ClassLabel> relation = database.getRelation(TypeUtil.CLASSLABEL);
return run(relation);
- }
- catch(NoSupportedDataTypeException e) {
+ } catch (NoSupportedDataTypeException e) {
// Otherwise, try any labellike.
return run(database.getRelation(getInputTypeRestriction()[0]));
}
@@ -101,12 +100,13 @@ public class ByLabelHierarchicalClustering extends AbstractAlgorithm<Clustering<
* @param relation The data input to use
*/
public Clustering<Model> run(Relation<?> relation) {
- HashMap<String, DBIDs> labelmap = new HashMap<String, DBIDs>();
+ HashMap<String, DBIDs> labelmap = new HashMap<>();
ModifiableDBIDs noiseids = DBIDUtil.newArray();
+ Clustering<Model> clustering = new Clustering<>("By Label Hierarchical Clustering", "bylabel-clustering");
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
final Object val = relation.get(iditer);
- if(val == null) {
+ if (val == null) {
noiseids.add(iditer);
continue;
}
@@ -115,44 +115,41 @@ public class ByLabelHierarchicalClustering extends AbstractAlgorithm<Clustering<
assign(labelmap, label, iditer);
}
- ArrayList<Cluster<Model>> clusters = new ArrayList<Cluster<Model>>(labelmap.size());
- for(Entry<String, DBIDs> entry : labelmap.entrySet()) {
+ ArrayList<Cluster<Model>> clusters = new ArrayList<>(labelmap.size());
+ for (Entry<String, DBIDs> entry : labelmap.entrySet()) {
DBIDs ids = entry.getValue();
- if(ids instanceof DBID) {
+ if (ids instanceof DBID) {
noiseids.add((DBID) ids);
continue;
}
- Cluster<Model> clus = new Cluster<Model>(entry.getKey(), ids, ClusterModel.CLUSTER, new ArrayList<Cluster<Model>>(), new ArrayList<Cluster<Model>>());
+ Cluster<Model> clus = new Cluster<Model>(entry.getKey(), ids, ClusterModel.CLUSTER);
clusters.add(clus);
}
- for(Cluster<Model> cur : clusters) {
- for(Cluster<Model> oth : clusters) {
- if(oth != cur) {
- if(oth.getName().startsWith(cur.getName())) {
- oth.getParents().add(cur);
- cur.getChildren().add(oth);
- // System.err.println(oth.getLabel() + " is a child of " +
- // cur.getLabel());
+ for (Cluster<Model> cur : clusters) {
+ boolean isrootcluster = true;
+ for (Cluster<Model> oth : clusters) {
+ if (oth != cur) {
+ if (oth.getName().startsWith(cur.getName())) {
+ clustering.addChildCluster(oth, cur);
+ if (LOG.isDebuggingFiner()) {
+ LOG.debugFiner(oth.getName() + " is a child of " + cur.getName());
+ }
+ isrootcluster = false;
}
}
}
- }
- ArrayList<Cluster<Model>> rootclusters = new ArrayList<Cluster<Model>>();
- for(Cluster<Model> cur : clusters) {
- if(cur.getParents().size() == 0) {
- rootclusters.add(cur);
+ if (isrootcluster) {
+ clustering.addToplevelCluster(cur);
}
}
// Collected noise IDs.
- if(noiseids.size() > 0) {
+ if (noiseids.size() > 0) {
Cluster<Model> c = new Cluster<Model>("Noise", noiseids, ClusterModel.CLUSTER);
c.setNoise(true);
- rootclusters.add(c);
+ clustering.addToplevelCluster(c);
}
- assert (rootclusters.size() > 0) : "No clusters found by bylabel clustering. Empty database?";
-
- return new Clustering<Model>("By Label Hierarchical Clustering", "bylabel-clustering", rootclusters);
+ return clustering;
}
/**
@@ -163,21 +160,19 @@ public class ByLabelHierarchicalClustering extends AbstractAlgorithm<Clustering<
* @param id the id of the object to be assigned
*/
private void assign(HashMap<String, DBIDs> labelMap, String label, DBIDRef id) {
- if(labelMap.containsKey(label)) {
+ if (labelMap.containsKey(label)) {
DBIDs exist = labelMap.get(label);
- if(exist instanceof DBID) {
+ if (exist instanceof DBID) {
ModifiableDBIDs n = DBIDUtil.newHashSet();
n.add((DBID) exist);
n.add(id);
labelMap.put(label, n);
- }
- else {
+ } else {
assert (exist instanceof HashSetModifiableDBIDs);
assert (exist.size() > 1);
((ModifiableDBIDs) exist).add(id);
}
- }
- else {
+ } else {
labelMap.put(label, DBIDUtil.deref(id));
}
}
@@ -191,4 +186,4 @@ public class ByLabelHierarchicalClustering extends AbstractAlgorithm<Clustering<
protected Logging getLogger() {
return LOG;
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelOrAllInOneClustering.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelOrAllInOneClustering.java
index f082db9c..76b024a2 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelOrAllInOneClustering.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelOrAllInOneClustering.java
@@ -15,7 +15,7 @@ import de.lmu.ifi.dbs.elki.database.relation.Relation;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -66,9 +66,9 @@ public class ByLabelOrAllInOneClustering extends ByLabelClustering {
// Ignore.
}
final DBIDs ids = database.getRelation(TypeUtil.ANY).getDBIDs();
- Clustering<Model> result = new Clustering<Model>("All-in-one trivial Clustering", "allinone-clustering");
+ Clustering<Model> result = new Clustering<>("All-in-one trivial Clustering", "allinone-clustering");
Cluster<Model> c = new Cluster<Model>(ids, ClusterModel.CLUSTER);
- result.addCluster(c);
+ result.addToplevelCluster(c);
return result;
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByModelClustering.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByModelClustering.java
index 2114ac16..73ad9880 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByModelClustering.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByModelClustering.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.trivial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -101,7 +101,7 @@ public class ByModelClustering extends AbstractAlgorithm<Clustering<Model>> impl
*/
public Clustering<Model> run(Relation<Model> relation) {
// Build model mapping
- HashMap<Model, ModifiableDBIDs> modelMap = new HashMap<Model, ModifiableDBIDs>();
+ HashMap<Model, ModifiableDBIDs> modelMap = new HashMap<>();
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
Model model = relation.get(iditer);
ModifiableDBIDs modelids = modelMap.get(model);
@@ -112,16 +112,16 @@ public class ByModelClustering extends AbstractAlgorithm<Clustering<Model>> impl
modelids.add(iditer);
}
- Clustering<Model> result = new Clustering<Model>("By Model Clustering", "bymodel-clustering");
+ Clustering<Model> result = new Clustering<>("By Model Clustering", "bymodel-clustering");
for(Entry<Model, ModifiableDBIDs> entry : modelMap.entrySet()) {
final Model model = entry.getKey();
final ModifiableDBIDs ids = entry.getValue();
final String name = (model instanceof GeneratorInterface) ? ((GeneratorInterface) model).getName() : model.toString();
- Cluster<Model> c = new Cluster<Model>(name, ids, model);
+ Cluster<Model> c = new Cluster<>(name, ids, model);
if(noisepattern != null && noisepattern.matcher(name).find()) {
c.setNoise(true);
}
- result.addCluster(c);
+ result.addToplevelCluster(c);
}
return result;
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/TrivialAllInOne.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/TrivialAllInOne.java
index eaa5d2b2..dae50c25 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/TrivialAllInOne.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/TrivialAllInOne.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.trivial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -63,9 +63,9 @@ public class TrivialAllInOne extends AbstractAlgorithm<Clustering<Model>> implem
public Clustering<Model> run(Relation<?> relation) {
final DBIDs ids = relation.getDBIDs();
- Clustering<Model> result = new Clustering<Model>("All-in-one trivial Clustering", "allinone-clustering");
+ Clustering<Model> result = new Clustering<>("All-in-one trivial Clustering", "allinone-clustering");
Cluster<Model> c = new Cluster<Model>(ids, ClusterModel.CLUSTER);
- result.addCluster(c);
+ result.addToplevelCluster(c);
return result;
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/TrivialAllNoise.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/TrivialAllNoise.java
index dd0f94a5..ecc7dbec 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/TrivialAllNoise.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/TrivialAllNoise.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.trivial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -62,9 +62,9 @@ public class TrivialAllNoise extends AbstractAlgorithm<Clustering<Model>> implem
public Clustering<Model> run(Relation<?> relation) {
final DBIDs ids = relation.getDBIDs();
- Clustering<Model> result = new Clustering<Model>("All-in-noise trivial Clustering", "allinnoise-clustering");
+ Clustering<Model> result = new Clustering<>("All-in-noise trivial Clustering", "allinnoise-clustering");
Cluster<Model> c = new Cluster<Model>(ids, true, ClusterModel.CLUSTER);
- result.addCluster(c);
+ result.addToplevelCluster(c);
return result;
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/package-info.java
index 5870a736..6b7b50f5 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/package-info.java
@@ -7,7 +7,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2012
+Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ABOD.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ABOD.java
index d52a81fd..ad0b8175 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ABOD.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ABOD.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -23,7 +23,6 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import java.util.Collections;
import java.util.HashMap;
import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
@@ -34,19 +33,21 @@ import de.lmu.ifi.dbs.elki.database.QueryUtil;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDRange;
import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.distance.similarityfunction.PrimitiveSimilarityFunction;
import de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.KernelMatrix;
@@ -58,13 +59,13 @@ import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
-import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ComparableMaxHeap;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ComparableMinHeap;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -139,7 +140,7 @@ public class ABOD<V extends NumberVector<?>> extends AbstractDistanceBasedAlgori
/**
* Static DBID map.
*/
- private ArrayModifiableDBIDs staticids = null;
+ private ArrayDBIDs staticids = null;
/**
* Actual constructor, with parameters. Fast mode (sampling).
@@ -178,11 +179,15 @@ public class ABOD<V extends NumberVector<?>> extends AbstractDistanceBasedAlgori
*/
public OutlierResult getRanking(Relation<V> relation) {
// Fix a static set of IDs
- staticids = DBIDUtil.newArray(relation.getDBIDs());
- staticids.sort();
+ if (relation.getDBIDs() instanceof DBIDRange) {
+ staticids = (DBIDRange) relation.getDBIDs();
+ } else {
+ staticids = DBIDUtil.newArray(relation.getDBIDs());
+ ((ArrayModifiableDBIDs) staticids).sort();
+ }
KernelMatrix kernelMatrix = new KernelMatrix(primitiveKernelFunction, relation, staticids);
- Heap<DoubleDBIDPair> pq = new Heap<DoubleDBIDPair>(relation.size(), Collections.reverseOrder());
+ ComparableMaxHeap<DoubleDBIDPair> pq = new ComparableMaxHeap<>(relation.size());
// preprocess kNN neighborhoods
KNNQuery<V, DoubleDistance> knnQuery = QueryUtil.getKNNQuery(relation, getDistanceFunction(), k);
@@ -191,7 +196,7 @@ public class ABOD<V extends NumberVector<?>> extends AbstractDistanceBasedAlgori
for (DBIDIter objKey = relation.iterDBIDs(); objKey.valid(); objKey.advance()) {
s.reset();
- KNNResult<DoubleDistance> neighbors = knnQuery.getKNNForDBID(objKey, k);
+ KNNList<DoubleDistance> neighbors = knnQuery.getKNNForDBID(objKey, k);
for (DBIDIter key1 = neighbors.iter(); key1.valid(); key1.advance()) {
for (DBIDIter key2 = neighbors.iter(); key2.valid(); key2.advance()) {
if (DBIDUtil.equal(key2, key1) || DBIDUtil.equal(key1, objKey) || DBIDUtil.equal(key2, objKey)) {
@@ -214,12 +219,13 @@ public class ABOD<V extends NumberVector<?>> extends AbstractDistanceBasedAlgori
DoubleMinMax minmaxabod = new DoubleMinMax();
WritableDoubleDataStore abodvalues = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
- for (DoubleDBIDPair pair : pq) {
+ while (!pq.isEmpty()) {
+ DoubleDBIDPair pair = pq.poll();
abodvalues.putDouble(pair, pair.doubleValue());
minmaxabod.put(pair.doubleValue());
}
// Build result representation.
- Relation<Double> scoreResult = new MaterializedRelation<Double>("Angle-based Outlier Degree", "abod-outlier", TypeUtil.DOUBLE, abodvalues, relation.getDBIDs());
+ Relation<Double> scoreResult = new MaterializedRelation<>("Angle-based Outlier Degree", "abod-outlier", TypeUtil.DOUBLE, abodvalues, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new InvertedOutlierScoreMeta(minmaxabod.getMin(), minmaxabod.getMax(), 0.0, Double.POSITIVE_INFINITY);
return new OutlierResult(scoreMeta, scoreResult);
}
@@ -234,17 +240,21 @@ public class ABOD<V extends NumberVector<?>> extends AbstractDistanceBasedAlgori
final DBIDs ids = relation.getDBIDs();
// Fix a static set of IDs
// TODO: add a DBIDUtil.ensureSorted?
- staticids = DBIDUtil.newArray(ids);
- staticids.sort();
+ if (relation.getDBIDs() instanceof DBIDRange) {
+ staticids = (DBIDRange) relation.getDBIDs();
+ } else {
+ staticids = DBIDUtil.newArray(relation.getDBIDs());
+ ((ArrayModifiableDBIDs) staticids).sort();
+ }
KernelMatrix kernelMatrix = new KernelMatrix(primitiveKernelFunction, relation, staticids);
- Heap<DoubleDBIDPair> pq = new Heap<DoubleDBIDPair>(relation.size(), Collections.reverseOrder());
+ ComparableMaxHeap<DoubleDBIDPair> pq = new ComparableMaxHeap<>(relation.size());
// get Candidate Ranking
for (DBIDIter aKey = relation.iterDBIDs(); aKey.valid(); aKey.advance()) {
WritableDoubleDataStore dists = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT);
// determine kNearestNeighbors and pairwise distances
- Heap<DoubleDBIDPair> nn;
+ ComparableMinHeap<DoubleDBIDPair> nn;
if (!USE_RND_SAMPLE) {
nn = calcDistsandNN(relation, kernelMatrix, sampleSize, aKey, dists);
} else {
@@ -264,7 +274,7 @@ public class ABOD<V extends NumberVector<?>> extends AbstractDistanceBasedAlgori
pq.add(DBIDUtil.newPair(var, aKey));
}
// refine Candidates
- Heap<DoubleDBIDPair> resqueue = new Heap<DoubleDBIDPair>(k);
+ ComparableMinHeap<DoubleDBIDPair> resqueue = new ComparableMinHeap<>(k);
MeanVariance s = new MeanVariance();
while (!pq.isEmpty()) {
if (resqueue.size() == k && pq.peek().doubleValue() > resqueue.peek().doubleValue()) {
@@ -302,12 +312,13 @@ public class ABOD<V extends NumberVector<?>> extends AbstractDistanceBasedAlgori
}
DoubleMinMax minmaxabod = new DoubleMinMax();
WritableDoubleDataStore abodvalues = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
- for (DoubleDBIDPair pair : pq) {
+ while (!pq.isEmpty()) {
+ DoubleDBIDPair pair = pq.poll();
abodvalues.putDouble(pair, pair.doubleValue());
minmaxabod.put(pair.doubleValue());
}
// Build result representation.
- Relation<Double> scoreResult = new MaterializedRelation<Double>("Angle-based Outlier Detection", "abod-outlier", TypeUtil.DOUBLE, abodvalues, ids);
+ Relation<Double> scoreResult = new MaterializedRelation<>("Angle-based Outlier Detection", "abod-outlier", TypeUtil.DOUBLE, abodvalues, ids);
OutlierScoreMeta scoreMeta = new InvertedOutlierScoreMeta(minmaxabod.getMin(), minmaxabod.getMax(), 0.0, Double.POSITIVE_INFINITY);
return new OutlierResult(scoreMeta, scoreResult);
}
@@ -404,8 +415,8 @@ public class ABOD<V extends NumberVector<?>> extends AbstractDistanceBasedAlgori
return (kernelMatrix.getDistance(ai, ai) + kernelMatrix.getDistance(bi, ci) - kernelMatrix.getDistance(ai, ci) - kernelMatrix.getDistance(ai, bi));
}
- private Heap<DoubleDBIDPair> calcDistsandNN(Relation<V> data, KernelMatrix kernelMatrix, int sampleSize, DBIDRef aKey, WritableDoubleDataStore dists) {
- Heap<DoubleDBIDPair> nn = new Heap<DoubleDBIDPair>(sampleSize);
+ private ComparableMinHeap<DoubleDBIDPair> calcDistsandNN(Relation<V> data, KernelMatrix kernelMatrix, int sampleSize, DBIDRef aKey, WritableDoubleDataStore dists) {
+ ComparableMinHeap<DoubleDBIDPair> nn = new ComparableMinHeap<>(sampleSize);
for (DBIDIter bKey = data.iterDBIDs(); bKey.valid(); bKey.advance()) {
double val = calcCos(kernelMatrix, aKey, bKey);
dists.putDouble(bKey, val);
@@ -420,8 +431,8 @@ public class ABOD<V extends NumberVector<?>> extends AbstractDistanceBasedAlgori
return nn;
}
- private Heap<DoubleDBIDPair> calcDistsandRNDSample(Relation<V> data, KernelMatrix kernelMatrix, int sampleSize, DBIDRef aKey, WritableDoubleDataStore dists) {
- Heap<DoubleDBIDPair> nn = new Heap<DoubleDBIDPair>(sampleSize);
+ private ComparableMinHeap<DoubleDBIDPair> calcDistsandRNDSample(Relation<V> data, KernelMatrix kernelMatrix, int sampleSize, DBIDRef aKey, WritableDoubleDataStore dists) {
+ ComparableMinHeap<DoubleDBIDPair> nn = new ComparableMinHeap<>(sampleSize);
int step = (int) ((double) data.size() / (double) sampleSize);
int counter = 0;
for (DBIDIter bKey = data.iterDBIDs(); bKey.valid(); bKey.advance()) {
@@ -445,14 +456,14 @@ public class ABOD<V extends NumberVector<?>> extends AbstractDistanceBasedAlgori
public String getExplanations(Relation<V> data) {
KernelMatrix kernelMatrix = new KernelMatrix(primitiveKernelFunction, data, staticids);
// PQ for Outlier Ranking
- Heap<DoubleDBIDPair> pq = new Heap<DoubleDBIDPair>(data.size(), Collections.reverseOrder());
- HashMap<DBID, DBIDs> explaintab = new HashMap<DBID, DBIDs>();
+ ComparableMaxHeap<DoubleDBIDPair> pq = new ComparableMaxHeap<>(data.size());
+ HashMap<DBID, DBIDs> explaintab = new HashMap<>();
// test all objects
MeanVariance s = new MeanVariance(), s2 = new MeanVariance();
for (DBIDIter objKey = data.iterDBIDs(); objKey.valid(); objKey.advance()) {
s.reset();
// Queue for the best explanation
- Heap<DoubleDBIDPair> explain = new Heap<DoubleDBIDPair>();
+ ComparableMinHeap<DoubleDBIDPair> explain = new ComparableMinHeap<>();
// determine Object
// for each pair of other objects
for (DBIDIter key1 = data.iterDBIDs(); key1.valid(); key1.advance()) {
@@ -591,7 +602,7 @@ public class ABOD<V extends NumberVector<?>> extends AbstractDistanceBasedAlgori
if (config.grab(sampleSizeP)) {
sampleSize = sampleSizeP.getValue();
}
- final ObjectParameter<PrimitiveSimilarityFunction<V, DoubleDistance>> param = new ObjectParameter<PrimitiveSimilarityFunction<V, DoubleDistance>>(KERNEL_FUNCTION_ID, PrimitiveSimilarityFunction.class, PolynomialKernelFunction.class);
+ final ObjectParameter<PrimitiveSimilarityFunction<V, DoubleDistance>> param = new ObjectParameter<>(KERNEL_FUNCTION_ID, PrimitiveSimilarityFunction.class, PolynomialKernelFunction.class);
if (config.grab(param)) {
primitiveKernelFunction = param.instantiateClass(config);
}
@@ -599,7 +610,7 @@ public class ABOD<V extends NumberVector<?>> extends AbstractDistanceBasedAlgori
@Override
protected ABOD<V> makeInstance() {
- return new ABOD<V>(k, sampleSize, primitiveKernelFunction, distanceFunction);
+ return new ABOD<>(k, sampleSize, primitiveKernelFunction, distanceFunction);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractAggarwalYuOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractAggarwalYuOutlier.java
index 2a4885dc..99356aef 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractAggarwalYuOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractAggarwalYuOutlier.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -109,12 +109,12 @@ public abstract class AbstractAggarwalYuOutlier<V extends NumberVector<?>> exten
final int dim = RelationUtil.dimensionality(relation);
final int size = relation.size();
final DBIDs allids = relation.getDBIDs();
- final ArrayList<ArrayList<DBIDs>> ranges = new ArrayList<ArrayList<DBIDs>>();
+ final ArrayList<ArrayList<DBIDs>> ranges = new ArrayList<>();
// Temporary projection storage of the database
- final ArrayList<ArrayList<DoubleDBIDPair>> dbAxis = new ArrayList<ArrayList<DoubleDBIDPair>>(dim);
+ final ArrayList<ArrayList<DoubleDBIDPair>> dbAxis = new ArrayList<>(dim);
for(int i = 0; i < dim; i++) {
- ArrayList<DoubleDBIDPair> axis = new ArrayList<DoubleDBIDPair>(size);
+ ArrayList<DoubleDBIDPair> axis = new ArrayList<>(size);
dbAxis.add(i, axis);
}
// Project
@@ -129,7 +129,7 @@ public abstract class AbstractAggarwalYuOutlier<V extends NumberVector<?>> exten
for(int d = 0; d < dim; d++) {
ArrayList<DoubleDBIDPair> axis = dbAxis.get(d);
Collections.sort(axis);
- ArrayList<DBIDs> dimranges = new ArrayList<DBIDs>(phi + 1);
+ ArrayList<DBIDs> dimranges = new ArrayList<>(phi + 1);
dimranges.add(allids);
int start = 0;
for(int r = 0; r < phi; r++) {
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractDBOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractDBOutlier.java
index 0e6f502a..5cafe04d 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractDBOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractDBOutlier.java
@@ -86,7 +86,7 @@ public abstract class AbstractDBOutlier<O, D extends Distance<D>> extends Abstra
DoubleDataStore dbodscore = computeOutlierScores(database, relation, d);
// Build result representation.
- Relation<Double> scoreResult = new MaterializedRelation<Double>("Density-Based Outlier Detection", "db-outlier", TypeUtil.DOUBLE, dbodscore, relation.getDBIDs());
+ Relation<Double> scoreResult = new MaterializedRelation<>("Density-Based Outlier Detection", "db-outlier", TypeUtil.DOUBLE, dbodscore, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new ProbabilisticOutlierScore();
return new OutlierResult(scoreMeta, scoreResult);
}
@@ -132,7 +132,7 @@ public abstract class AbstractDBOutlier<O, D extends Distance<D>> extends Abstra
*/
protected void configD(Parameterization config, DistanceFunction<?, D> distanceFunction) {
final D distanceFactory = (distanceFunction != null) ? distanceFunction.getDistanceFactory() : null;
- final DistanceParameter<D> param = new DistanceParameter<D>(D_ID, distanceFactory);
+ final DistanceParameter<D> param = new DistanceParameter<>(D_ID, distanceFactory);
if(config.grab(param)) {
d = param.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuEvolutionary.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuEvolutionary.java
index c263cdfa..89be0e66 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuEvolutionary.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuEvolutionary.java
@@ -49,6 +49,7 @@ import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.utilities.FormatUtil;
import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap;
import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TopBoundedHeap;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
@@ -131,30 +132,30 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA
final int dbsize = relation.size();
ArrayList<ArrayList<DBIDs>> ranges = buildRanges(relation);
- Iterable<Individuum> individuums = (new EvolutionarySearch(relation, ranges, m, rnd.getRandom())).run();
+ Heap<Individuum>.UnorderedIter individuums = (new EvolutionarySearch(relation, ranges, m, rnd.getRandom())).run();
WritableDoubleDataStore outlierScore = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
- for(Individuum ind : individuums) {
- DBIDs ids = computeSubspaceForGene(ind.getGene(), ranges);
+ for (; individuums.valid(); individuums.advance()) {
+ DBIDs ids = computeSubspaceForGene(individuums.get().getGene(), ranges);
double sparsityC = sparsity(ids.size(), dbsize, k, phi);
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
double prev = outlierScore.doubleValue(iter);
- if(Double.isNaN(prev) || sparsityC < prev) {
+ if (Double.isNaN(prev) || sparsityC < prev) {
outlierScore.putDouble(iter, sparsityC);
}
}
}
DoubleMinMax minmax = new DoubleMinMax();
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double val = outlierScore.doubleValue(iditer);
- if(Double.isNaN(val)) {
+ if (Double.isNaN(val)) {
outlierScore.putDouble(iditer, 0.0);
val = 0.0;
}
minmax.put(val);
}
- Relation<Double> scoreResult = new MaterializedRelation<Double>("AggarwalYuEvolutionary", "aggarwal-yu-outlier", TypeUtil.DOUBLE, outlierScore, relation.getDBIDs());
+ Relation<Double> scoreResult = new MaterializedRelation<>("AggarwalYuEvolutionary", "aggarwal-yu-outlier", TypeUtil.DOUBLE, outlierScore, relation.getDBIDs());
OutlierScoreMeta meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax(), Double.NEGATIVE_INFINITY, 0.0);
return new OutlierResult(meta, scoreResult);
}
@@ -214,16 +215,16 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA
this.random = random;
}
- public Iterable<Individuum> run() {
+ public Heap<Individuum>.UnorderedIter run() {
ArrayList<Individuum> pop = initialPopulation(m);
// best Population
- TopBoundedHeap<Individuum> bestSol = new TopBoundedHeap<Individuum>(m, Collections.reverseOrder());
+ TopBoundedHeap<Individuum> bestSol = new TopBoundedHeap<>(m, Collections.reverseOrder());
for (Individuum ind : pop) {
bestSol.add(ind);
}
int iterations = 0;
- while(!checkConvergence(pop)) {
+ while (!checkConvergence(pop)) {
Collections.sort(pop);
pop = rouletteRankSelection(pop);
// Crossover
@@ -231,33 +232,33 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA
// Mutation with probability 0.25 , 0.25
pop = mutation(pop, 0.5, 0.5);
// Avoid duplicates
- ind: for(Individuum ind : pop) {
- for (Individuum b : bestSol) {
- if (b.equals(ind)) {
+ ind: for (Individuum ind : pop) {
+ for (Heap<Individuum>.UnorderedIter it = bestSol.unorderedIter(); it.valid(); it.advance()) {
+ if (it.get().equals(ind)) {
continue ind;
}
}
bestSol.add(ind);
}
- if(LOG.isDebuggingFinest()) {
+ if (LOG.isDebuggingFinest()) {
StringBuilder buf = new StringBuilder();
buf.append("Top solutions:\n");
- for(Individuum ind : bestSol) {
- buf.append(ind.toString()).append('\n');
+ for (Heap<Individuum>.UnorderedIter it = bestSol.unorderedIter(); it.valid(); it.advance()) {
+ buf.append(it.get().toString()).append('\n');
}
buf.append("Population:\n");
- for(Individuum ind : pop) {
+ for (Individuum ind : pop) {
buf.append(ind.toString()).append('\n');
}
LOG.debugFinest(buf.toString());
}
iterations++;
- if(iterations > MAX_ITERATIONS) {
+ if (iterations > MAX_ITERATIONS) {
LOG.warning("Maximum iterations reached.");
break;
}
}
- return bestSol;
+ return bestSol.unorderedIter();
}
/**
@@ -267,18 +268,18 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA
* @return Convergence
*/
private boolean checkConvergence(Collection<Individuum> pop) {
- if(pop.size() == 0) {
+ if (pop.size() == 0) {
return true;
}
// Gene occurrence counter
int[][] occur = new int[dim][phi + 1];
// Count gene occurrences
- for(Individuum ind : pop) {
+ for (Individuum ind : pop) {
int[] gene = ind.getGene();
- for(int d = 0; d < dim; d++) {
+ for (int d = 0; d < dim; d++) {
int val = gene[d] + DONT_CARE;
- if(val < 0 || val >= phi + 1) {
+ if (val < 0 || val >= phi + 1) {
LOG.warning("Invalid gene value encountered: " + val + " in " + ind.toString());
continue;
}
@@ -287,20 +288,20 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA
}
int conv = (int) (pop.size() * 0.95);
- if(LOG.isDebuggingFine()) {
+ if (LOG.isDebuggingFine()) {
LOG.debugFine("Convergence at " + conv + " of " + pop.size() + " individuums.");
}
- for(int d = 0; d < dim; d++) {
+ for (int d = 0; d < dim; d++) {
boolean converged = false;
- for(int val = 0; val < phi + 1; val++) {
- if(occur[d][val] >= conv) {
+ for (int val = 0; val < phi + 1; val++) {
+ if (occur[d][val] >= conv) {
converged = true;
break;
}
}
// A single failure to converge is sufficient to continue.
- if(!converged) {
+ if (!converged) {
return false;
}
}
@@ -315,21 +316,21 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA
*/
private ArrayList<Individuum> initialPopulation(int popsize) {
// Initial Population
- ArrayList<Individuum> population = new ArrayList<Individuum>(popsize);
+ ArrayList<Individuum> population = new ArrayList<>(popsize);
// fill population
- for(int i = 0; i < popsize; i++) {
+ for (int i = 0; i < popsize; i++) {
// Random Individual
int[] gene = new int[dim];
// fill don't care ( any dimension == don't care)
- for(int j = 0; j < dim; j++) {
+ for (int j = 0; j < dim; j++) {
gene[j] = DONT_CARE;
}
// count of don't care positions
int countDim = k;
// fill non don't care positions of the Individual
- while(countDim > 0) {
+ while (countDim > 0) {
int z = random.nextInt(dim);
- if(gene[z] == DONT_CARE) {
+ if (gene[z] == DONT_CARE) {
gene[z] = random.nextInt(phi) + 1;
countDim--;
}
@@ -357,24 +358,23 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA
// Relative weight := popsize - position => sum(1..popsize)
int totalweight = (popsize * (popsize + 1)) >> 1;
// Survivors
- ArrayList<Individuum> survivors = new ArrayList<Individuum>(popsize);
+ ArrayList<Individuum> survivors = new ArrayList<>(popsize);
// position of selection
- for(int i = 0; i < popsize; i++) {
+ for (int i = 0; i < popsize; i++) {
int z = random.nextInt(totalweight);
- for(int j = 0; j < popsize; j++) {
- if(z < popsize - j) {
+ for (int j = 0; j < popsize; j++) {
+ if (z < popsize - j) {
// TODO: need clone?
survivors.add(population.get(j));
break;
- }
- else {
+ } else {
// decrement
z -= (popsize - j);
}
}
}
- if(survivors.size() != popsize) {
+ if (survivors.size() != popsize) {
throw new AbortException("Selection step failed - implementation error?");
}
// Don't sort, to avoid biasing the crossover!
@@ -387,31 +387,30 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA
*/
private ArrayList<Individuum> mutation(ArrayList<Individuum> population, double perc1, double perc2) {
// the Mutations
- ArrayList<Individuum> mutations = new ArrayList<Individuum>();
+ ArrayList<Individuum> mutations = new ArrayList<>();
// Set of Positions which are don't care in the String
- TreeSet<Integer> Q = new TreeSet<Integer>();
+ TreeSet<Integer> Q = new TreeSet<>();
// Set of Positions which are not don't care in the String
- TreeSet<Integer> R = new TreeSet<Integer>();
+ TreeSet<Integer> R = new TreeSet<>();
// for each individuum
- for(int j = 0; j < population.size(); j++) {
+ for (int j = 0; j < population.size(); j++) {
// clear the Sets
Q.clear();
R.clear();
// Fill the Sets with the Positions
- for(int i = 0; i < dim; i++) {
- if(population.get(j).getGene()[i] == DONT_CARE) {
+ for (int i = 0; i < dim; i++) {
+ if (population.get(j).getGene()[i] == DONT_CARE) {
Q.add(i);
- }
- else {
+ } else {
R.add(i);
}
}
//
double r1 = random.nextDouble();
- if(Q.size() != 0) {
+ if (Q.size() != 0) {
// Mutation Variant 1
- if(r1 <= perc1) {
+ if (r1 <= perc1) {
// calc Mutation Spot
Integer[] pos = new Integer[Q.size()];
pos = Q.toArray(pos);
@@ -436,7 +435,7 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA
}
r1 = random.nextDouble();
// Mutation Variant 2
- if(r1 <= perc2) {
+ if (r1 <= perc2) {
// calc Mutation Spot
Integer[] pos = new Integer[R.size()];
pos = R.toArray(pos);
@@ -470,16 +469,16 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA
*/
private ArrayList<Individuum> crossoverOptimized(ArrayList<Individuum> population) {
// Crossover Set of population Set
- ArrayList<Individuum> crossover = new ArrayList<Individuum>();
+ ArrayList<Individuum> crossover = new ArrayList<>();
- for(int i = 0; i < population.size() - 1; i += 2) {
+ for (int i = 0; i < population.size() - 1; i += 2) {
Pair<Individuum, Individuum> recombine = recombineOptimized(population.get(i), population.get(i + 1));
// add the Solutions to the new Set
crossover.add(recombine.getFirst());
crossover.add(recombine.getSecond());
}
// if the set contains an odd number of Subspaces, retain the last one
- if(population.size() % 2 == 1) {
+ if (population.size() % 2 == 1) {
crossover.add(population.get(population.size() - 1));
}
// Collections.sort(crossover);
@@ -496,18 +495,18 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA
private Pair<Individuum, Individuum> recombineOptimized(Individuum parent1, Individuum parent2) {
Pair<Individuum, Individuum> recombinePair;
// Set of Positions in which either s1 or s2 are don't care
- ArrayList<Integer> Q = new ArrayList<Integer>(dim);
+ ArrayList<Integer> Q = new ArrayList<>(dim);
// Set of Positions in which neither s1 or s2 is don't care
- ArrayList<Integer> R = new ArrayList<Integer>(dim);
+ ArrayList<Integer> R = new ArrayList<>(dim);
- for(int i = 0; i < dim; i++) {
- if((parent1.getGene()[i] == DONT_CARE) && (parent2.getGene()[i] != DONT_CARE)) {
+ for (int i = 0; i < dim; i++) {
+ if ((parent1.getGene()[i] == DONT_CARE) && (parent2.getGene()[i] != DONT_CARE)) {
Q.add(i);
}
- if((parent1.getGene()[i] != DONT_CARE) && (parent2.getGene()[i] == DONT_CARE)) {
+ if ((parent1.getGene()[i] != DONT_CARE) && (parent2.getGene()[i] == DONT_CARE)) {
Q.add(i);
}
- if((parent1.getGene()[i] != DONT_CARE) && (parent2.getGene()[i] != DONT_CARE)) {
+ if ((parent1.getGene()[i] != DONT_CARE) && (parent2.getGene()[i] != DONT_CARE)) {
R.add(i);
}
}
@@ -519,11 +518,11 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA
int count = k - R.size();
Iterator<Integer> q = Q.iterator();
- while(count > 0) {
+ while (count > 0) {
int[] l1 = b.clone();
int[] l2 = b.clone();
- while(q.hasNext()) {
+ while (q.hasNext()) {
int next = q.next();
// pos = next;
@@ -537,15 +536,14 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA
final double sparsityL1 = sparsity(computeSubspaceForGene(l1, ranges).size(), dbsize, k, phi);
final double sparsityL2 = sparsity(computeSubspaceForGene(l2, ranges).size(), dbsize, k, phi);
- if(sparsityL1 <= sparsityL2) {
+ if (sparsityL1 <= sparsityL2) {
b = l1.clone();
- if(s1Null) {
+ if (s1Null) {
count--;
}
- }
- else {
+ } else {
b = l2.clone();
- if(s2Null) {
+ if (s2Null) {
count--;
}
}
@@ -557,17 +555,16 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA
// create the complementary String
int[] comp = new int[dim];
- for(int i = 0; i < dim; i++) {
- if(b[i] == parent1.getGene()[i]) {
+ for (int i = 0; i < dim; i++) {
+ if (b[i] == parent1.getGene()[i]) {
comp[i] = parent2.getGene()[i];
- }
- else {
+ } else {
comp[i] = parent2.getGene()[i];
}
}
final Individuum i1 = makeIndividuum(b);
final Individuum i2 = makeIndividuum(comp);
- recombinePair = new Pair<Individuum, Individuum>(i1, i2);
+ recombinePair = new Pair<>(i1, i2);
return recombinePair;
}
@@ -584,7 +581,7 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA
* @return best gene combination
*/
private Individuum combineRecursive(ArrayList<Integer> r, int i, int[] current, Individuum parent1, Individuum parent2) {
- if(i == r.size()) {
+ if (i == r.size()) {
return makeIndividuum(current);
}
// Position to modify
@@ -597,10 +594,9 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA
Individuum i1 = combineRecursive(r, i + 1, gene1, parent1, parent2);
Individuum i2 = combineRecursive(r, i + 1, gene2, parent1, parent2);
// Return the better result.
- if(i1.getFitness() < i2.getFitness()) {
+ if (i1.getFitness() < i2.getFitness()) {
return i1;
- }
- else {
+ } else {
return i2;
}
}
@@ -610,8 +606,8 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA
* Individuum for the evolutionary search.
*
* @author Erich Schubert
- *
- * @apiviz.exclude de.lmu.ifi.dbs.elki.utilities.pairs.FCPair
+ *
+ * @apiviz.exclude de.lmu.ifi.dbs.elki.utilities.pairs.FCPair
*/
private static class Individuum extends FCPair<Double, int[]> {
/**
@@ -661,15 +657,15 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA
@Override
public boolean equals(Object obj) {
- if(!(obj instanceof Individuum)) {
+ if (!(obj instanceof Individuum)) {
return false;
}
Individuum other = (Individuum) obj;
- if(other.second.length != this.second.length) {
+ if (other.second.length != this.second.length) {
return false;
}
- for(int i = 0; i < this.second.length; i++) {
- if(other.second[i] != this.second[i]) {
+ for (int i = 0; i < this.second.length; i++) {
+ if (other.second[i] != this.second[i]) {
return false;
}
}
@@ -708,18 +704,18 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA
super.makeOptions(config);
final IntParameter mP = new IntParameter(M_ID);
mP.addConstraint(new GreaterEqualConstraint(2));
- if(config.grab(mP)) {
+ if (config.grab(mP)) {
m = mP.getValue();
}
final RandomParameter rndP = new RandomParameter(SEED_ID);
- if(config.grab(rndP)) {
+ if (config.grab(rndP)) {
rnd = rndP.getValue();
}
}
@Override
protected AggarwalYuEvolutionary<V> makeInstance() {
- return new AggarwalYuEvolutionary<V>(k, phi, m, rnd);
+ return new AggarwalYuEvolutionary<>(k, phi, m, rnd);
}
}
-}
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuNaive.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuNaive.java
index 9cd7d79f..1816c3a3 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuNaive.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuNaive.java
@@ -102,15 +102,15 @@ public class AggarwalYuNaive<V extends NumberVector<?>> extends AbstractAggarwal
// Build a list of all subspaces
{
// R1 initial one-dimensional subspaces.
- Rk = new ArrayList<ArrayList<IntIntPair>>();
+ Rk = new ArrayList<>();
// Set of all dim*phi ranges
- ArrayList<IntIntPair> q = new ArrayList<IntIntPair>();
+ ArrayList<IntIntPair> q = new ArrayList<>();
for(int i = 0; i < dimensionality; i++) {
for(int j = 1; j <= phi; j++) {
IntIntPair s = new IntIntPair(i, j);
q.add(s);
// Add to first Rk
- ArrayList<IntIntPair> v = new ArrayList<IntIntPair>();
+ ArrayList<IntIntPair> v = new ArrayList<>();
v.add(s);
Rk.add(v);
}
@@ -118,7 +118,7 @@ public class AggarwalYuNaive<V extends NumberVector<?>> extends AbstractAggarwal
// build Ri
for(int i = 2; i <= k; i++) {
- ArrayList<ArrayList<IntIntPair>> Rnew = new ArrayList<ArrayList<IntIntPair>>();
+ ArrayList<ArrayList<IntIntPair>> Rnew = new ArrayList<>();
for(int j = 0; j < Rk.size(); j++) {
ArrayList<IntIntPair> c = Rk.get(j);
@@ -131,7 +131,7 @@ public class AggarwalYuNaive<V extends NumberVector<?>> extends AbstractAggarwal
}
}
if(!invalid) {
- ArrayList<IntIntPair> neu = new ArrayList<IntIntPair>(c);
+ ArrayList<IntIntPair> neu = new ArrayList<>(c);
neu.add(pair);
Rnew.add(neu);
}
@@ -165,7 +165,7 @@ public class AggarwalYuNaive<V extends NumberVector<?>> extends AbstractAggarwal
}
minmax.put(val);
}
- Relation<Double> scoreResult = new MaterializedRelation<Double>("AggarwalYuNaive", "aggarwal-yu-outlier", TypeUtil.DOUBLE, sparsity, relation.getDBIDs());
+ Relation<Double> scoreResult = new MaterializedRelation<>("AggarwalYuNaive", "aggarwal-yu-outlier", TypeUtil.DOUBLE, sparsity, relation.getDBIDs());
OutlierScoreMeta meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax(), Double.NEGATIVE_INFINITY, 0.0);
return new OutlierResult(meta, scoreResult);
}
@@ -185,7 +185,7 @@ public class AggarwalYuNaive<V extends NumberVector<?>> extends AbstractAggarwal
public static class Parameterizer<V extends NumberVector<?>> extends AbstractAggarwalYuOutlier.Parameterizer {
@Override
protected AggarwalYuNaive<V> makeInstance() {
- return new AggarwalYuNaive<V>(k, phi);
+ return new AggarwalYuNaive<>(k, phi);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/COP.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/COP.java
index ac544b7f..06168c5a 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/COP.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/COP.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -39,12 +39,12 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
@@ -54,10 +54,11 @@ import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAResult;
import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCARunner;
import de.lmu.ifi.dbs.elki.math.statistics.distribution.ChiSquaredDistribution;
-import de.lmu.ifi.dbs.elki.math.statistics.distribution.GammaDistribution;
+import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.GammaChoiWetteEstimator;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
@@ -108,6 +109,53 @@ public class COP<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
public static final String COP_ERRORVEC = "cop-errorvec";
/**
+ * A clone of
+ * {@link de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.DoubleArrayAdapter}
+ * that only uses the first 85% of the array!
+ */
+ private static final NumberArrayAdapter<Double, double[]> SHORTENED_ARRAY = new NumberArrayAdapter<Double, double[]>() {
+ @Override
+ public int size(double[] array) {
+ return (int) (.85 * array.length);
+ }
+
+ @Override
+ public Double get(double[] array, int off) throws IndexOutOfBoundsException {
+ return Double.valueOf(array[off]);
+ }
+
+ @Override
+ public double getDouble(double[] array, int off) throws IndexOutOfBoundsException {
+ return array[off];
+ }
+
+ @Override
+ public float getFloat(double[] array, int off) throws IndexOutOfBoundsException {
+ return (float) array[off];
+ }
+
+ @Override
+ public int getInteger(double[] array, int off) throws IndexOutOfBoundsException {
+ return (int) array[off];
+ }
+
+ @Override
+ public short getShort(double[] array, int off) throws IndexOutOfBoundsException {
+ return (short) array[off];
+ }
+
+ @Override
+ public long getLong(double[] array, int off) throws IndexOutOfBoundsException {
+ return (long) array[off];
+ }
+
+ @Override
+ public byte getByte(double[] array, int off) throws IndexOutOfBoundsException {
+ return (byte) array[off];
+ }
+ };
+
+ /**
* Number of neighbors to be considered.
*/
int k;
@@ -184,7 +232,7 @@ public class COP<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Correlation Outlier Probabilities", relation.size(), LOG) : null;
for (DBIDIter id = ids.iter(); id.valid(); id.advance()) {
- KNNResult<D> neighbors = knnQuery.getKNNForDBID(id, k + 1);
+ KNNList<D> neighbors = knnQuery.getKNNForDBID(id, k + 1);
ModifiableDBIDs nids = DBIDUtil.newHashSet(neighbors);
nids.remove(id); // Do not use query object
@@ -241,7 +289,7 @@ public class COP<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
// Sort, so we can trim the top 15% below.
Arrays.sort(dists[d]);
// Evaluate
- double score = 1 - GammaDistribution.estimate(dists[d], (int) (.85 * dists[d].length)).cdf(sqdevs);
+ double score = 1 - GammaChoiWetteEstimator.STATIC.estimate(dists[d], SHORTENED_ARRAY).cdf(sqdevs);
if (score < min) {
min = score;
vdim = d + 1;
@@ -271,11 +319,11 @@ public class COP<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
}
// combine results.
- Relation<Double> scoreResult = new MaterializedRelation<Double>("Correlation Outlier Probabilities", COP_SCORES, TypeUtil.DOUBLE, cop_score, ids);
+ Relation<Double> scoreResult = new MaterializedRelation<>("Correlation Outlier Probabilities", COP_SCORES, TypeUtil.DOUBLE, cop_score, ids);
OutlierScoreMeta scoreMeta = new ProbabilisticOutlierScore();
OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
- result.addChildResult(new MaterializedRelation<Integer>("Local Dimensionality", COP_DIM, TypeUtil.INTEGER, cop_dim, ids));
- result.addChildResult(new MaterializedRelation<Vector>("Error vectors", COP_ERRORVEC, TypeUtil.VECTOR, cop_err_v, ids));
+ result.addChildResult(new MaterializedRelation<>("Local Dimensionality", COP_DIM, TypeUtil.INTEGER, cop_dim, ids));
+ result.addChildResult(new MaterializedRelation<>("Error vectors", COP_ERRORVEC, TypeUtil.VECTOR, cop_err_v, ids));
return result;
}
@@ -361,7 +409,7 @@ public class COP<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
if (config.grab(kP)) {
k = kP.intValue();
}
- EnumParameter<DistanceDist> distP = new EnumParameter<DistanceDist>(DIST_ID, DistanceDist.class, DistanceDist.GAMMA);
+ EnumParameter<DistanceDist> distP = new EnumParameter<>(DIST_ID, DistanceDist.class, DistanceDist.GAMMA);
if (config.grab(distP)) {
dist = distP.getValue();
}
@@ -371,7 +419,7 @@ public class COP<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
if (config.grab(expectP)) {
expect = expectP.doubleValue();
}
- ObjectParameter<PCARunner<V>> pcaP = new ObjectParameter<PCARunner<V>>(PCARUNNER_ID, PCARunner.class, PCARunner.class);
+ ObjectParameter<PCARunner<V>> pcaP = new ObjectParameter<>(PCARUNNER_ID, PCARunner.class, PCARunner.class);
if (config.grab(pcaP)) {
pca = pcaP.instantiateClass(config);
}
@@ -379,7 +427,7 @@ public class COP<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
@Override
protected COP<V, D> makeInstance() {
- return new COP<V, D>(distanceFunction, k, pca, expect, dist);
+ return new COP<>(distanceFunction, k, pca, expect, dist);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierDetection.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierDetection.java
index ba1fd841..4f4d12bf 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierDetection.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierDetection.java
@@ -29,12 +29,12 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.DoubleDataStore;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
@@ -117,7 +117,7 @@ public class DBOutlierDetection<O, D extends Distance<D>> extends AbstractDBOutl
if(knnQuery != null) {
for(DBIDIter iditer = distFunc.getRelation().iterDBIDs(); iditer.valid(); iditer.advance()) {
counter++;
- final KNNResult<D> knns = knnQuery.getKNNForDBID(iditer, m);
+ final KNNList<D> knns = knnQuery.getKNNForDBID(iditer, m);
if(LOG.isDebugging()) {
LOG.debugFine("distance to mth nearest neighbour" + knns.toString());
}
@@ -184,7 +184,7 @@ public class DBOutlierDetection<O, D extends Distance<D>> extends AbstractDBOutl
@Override
protected DBOutlierDetection<O, D> makeInstance() {
- return new DBOutlierDetection<O, D>(distanceFunction, d, p);
+ return new DBOutlierDetection<>(distanceFunction, d, p);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierScore.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierScore.java
index a2d39130..d6528682 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierScore.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierScore.java
@@ -103,7 +103,7 @@ public class DBOutlierScore<O, D extends Distance<D>> extends AbstractDBOutlier<
public static class Parameterizer<O, D extends Distance<D>> extends AbstractDBOutlier.Parameterizer<O, D> {
@Override
protected DBOutlierScore<O, D> makeInstance() {
- return new DBOutlierScore<O, D>(distanceFunction, d);
+ return new DBOutlierScore<>(distanceFunction, d);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/EMOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/EMOutlier.java
index 2d2a4466..f8fd686f 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/EMOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/EMOutlier.java
@@ -104,7 +104,7 @@ public class EMOutlier<V extends NumberVector<?>> extends AbstractAlgorithm<Outl
emo_score.putDouble(iditer, maxProb);
globmax = Math.max(maxProb, globmax);
}
- Relation<Double> scoreres = new MaterializedRelation<Double>("EM outlier scores", "em-outlier", TypeUtil.DOUBLE, emo_score, relation.getDBIDs());
+ Relation<Double> scoreres = new MaterializedRelation<>("EM outlier scores", "em-outlier", TypeUtil.DOUBLE, emo_score, relation.getDBIDs());
OutlierScoreMeta meta = new ProbabilisticOutlierScore(0.0, globmax);
// combine results.
OutlierResult result = new OutlierResult(meta, scoreres);
@@ -142,7 +142,7 @@ public class EMOutlier<V extends NumberVector<?>> extends AbstractAlgorithm<Outl
@Override
protected EMOutlier<V> makeInstance() {
- return new EMOutlier<V>(em);
+ return new EMOutlier<>(em);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianModel.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianModel.java
index 6aed60fe..c9e6a634 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianModel.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianModel.java
@@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2012
+Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -136,7 +136,7 @@ public class GaussianModel<V extends NumberVector<?>> extends AbstractAlgorithm<
else {
meta = new InvertedOutlierScoreMeta(mm.getMin(), mm.getMax(), 0.0, Double.POSITIVE_INFINITY);
}
- Relation<Double> res = new MaterializedRelation<Double>("Gaussian Model Outlier Score", "gaussian-model-outlier", TypeUtil.DOUBLE, oscores, relation.getDBIDs());
+ Relation<Double> res = new MaterializedRelation<>("Gaussian Model Outlier Score", "gaussian-model-outlier", TypeUtil.DOUBLE, oscores, relation.getDBIDs());
return new OutlierResult(meta, res);
}
@@ -171,7 +171,7 @@ public class GaussianModel<V extends NumberVector<?>> extends AbstractAlgorithm<
@Override
protected GaussianModel<V> makeInstance() {
- return new GaussianModel<V>(invert);
+ return new GaussianModel<>(invert);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianUniformMixture.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianUniformMixture.java
index db53a3ef..294592e8 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianUniformMixture.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianUniformMixture.java
@@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2012
+Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -184,7 +184,7 @@ public class GaussianUniformMixture<V extends NumberVector<?>> extends AbstractA
}
OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, 0.0);
- Relation<Double> res = new MaterializedRelation<Double>("Gaussian Mixture Outlier Score", "gaussian-mixture-outlier", TypeUtil.DOUBLE, oscores, relation.getDBIDs());
+ Relation<Double> res = new MaterializedRelation<>("Gaussian Mixture Outlier Score", "gaussian-mixture-outlier", TypeUtil.DOUBLE, oscores, relation.getDBIDs());
return new OutlierResult(meta, res);
}
@@ -267,7 +267,7 @@ public class GaussianUniformMixture<V extends NumberVector<?>> extends AbstractA
@Override
protected GaussianUniformMixture<V> makeInstance() {
- return new GaussianUniformMixture<V>(l, c);
+ return new GaussianUniformMixture<>(l, c);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/HilOut.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/HilOut.java
index 15f6cbf3..e0cdd0c5 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/HilOut.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/HilOut.java
@@ -1,28 +1,28 @@
package de.lmu.ifi.dbs.elki.algorithm.outlier;
-/*
- This file is part of ELKI:
+
+/*
+ This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-
- Copyright (C) 2012
- Ludwig-Maximilians-Universität München
- Lehr- und Forschungseinheit für Datenbanksysteme
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-import java.util.Collections;
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
import java.util.Comparator;
import java.util.HashSet;
import java.util.Set;
@@ -39,14 +39,16 @@ import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDFactory;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
-import de.lmu.ifi.dbs.elki.database.ids.DoubleDistanceDBIDPair;
import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDPair;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.LPNormDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.LPNormDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultUtil;
import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
@@ -57,7 +59,9 @@ import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.utilities.BitsUtil;
import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
-import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ComparatorMaxHeap;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ComparatorMinHeap;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ObjectHeap;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
@@ -139,6 +143,13 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo
*/
private double omega_star;
+ // public int distcomp = 1;
+
+ /**
+ * Comparator for sorting the heaps.
+ */
+ private static final Comparator<? super DistanceDBIDPair<?>> COMPARATOR = DistanceDBIDResultUtil.distanceComparator();
+
/**
* Type of output: all scores (upper bounds) or top n only
*
@@ -182,18 +193,18 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo
Pair<O, O> hbbs = DatabaseUtil.computeMinMax(relation);
min = new double[d];
double[] max = new double[d];
- for(int i = 0; i < d; i++) {
+ for (int i = 0; i < d; i++) {
min[i] = hbbs.first.doubleValue(i);
max[i] = hbbs.second.doubleValue(i);
diameter = Math.max(diameter, max[i] - min[i]);
}
// Enlarge bounding box to have equal lengths.
- for(int i = 0; i < d; i++) {
+ for (int i = 0; i < d; i++) {
double diff = (diameter - (max[i] - min[i])) * .5;
min[i] -= diff;
max[i] += diff;
}
- if(LOG.isVerbose()) {
+ if (LOG.isVerbose()) {
LOG.verbose("Rescaling dataset by " + (1 / diameter) + " to fit the unit cube.");
}
}
@@ -205,7 +216,7 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo
FiniteProgress progressHilOut = LOG.isVerbose() ? new FiniteProgress("HilOut iterations", d + 1, LOG) : null;
FiniteProgress progressTrueOut = LOG.isVerbose() ? new FiniteProgress("True outliers found", n, LOG) : null;
// Main part: 1. Phase max. d+1 loops
- for(int j = 0; j <= d && n_star < n; j++) {
+ for (int j = 0; j <= d && n_star < n; j++) {
// initialize (clear) out and wlb - not 100% clear in the paper
h.out.clear();
h.wlb.clear();
@@ -215,61 +226,64 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo
scan(h, (int) (k * capital_n / (double) capital_n_star));
// determine the true outliers (n_star)
trueOutliers(h);
- if(progressTrueOut != null) {
+ if (progressTrueOut != null) {
progressTrueOut.setProcessed(n_star, LOG);
}
// Build the top Set as out + wlb
h.top.clear();
HashSetModifiableDBIDs top_keys = DBIDUtil.newHashSet(h.out.size());
- for(HilFeature entry : h.out) {
+ for (ObjectHeap.UnsortedIter<HilFeature> iter = h.out.unsortedIter(); iter.valid(); iter.advance()) {
+ HilFeature entry = iter.get();
top_keys.add(entry.id);
h.top.add(entry);
}
- for(HilFeature entry : h.wlb) {
- if(!top_keys.contains(entry.id)) {
+ for (ObjectHeap.UnsortedIter<HilFeature> iter = h.wlb.unsortedIter(); iter.valid(); iter.advance()) {
+ HilFeature entry = iter.get();
+ if (!top_keys.contains(entry.id)) {
// No need to update top_keys - discarded
h.top.add(entry);
}
}
- if(progressHilOut != null) {
+ if (progressHilOut != null) {
progressHilOut.incrementProcessed(LOG);
}
}
// 2. Phase: Additional Scan if less than n true outliers determined
- if(n_star < n) {
+ if (n_star < n) {
h.out.clear();
h.wlb.clear();
// TODO: reinitialize shift to 0?
scan(h, capital_n);
}
- if(progressHilOut != null) {
+ if (progressHilOut != null) {
progressHilOut.setProcessed(d, LOG);
progressHilOut.ensureCompleted(LOG);
}
- if(progressTrueOut != null) {
+ if (progressTrueOut != null) {
progressTrueOut.setProcessed(n, LOG);
progressTrueOut.ensureCompleted(LOG);
}
DoubleMinMax minmax = new DoubleMinMax();
// Return weights in out
- if(tn == ScoreType.TopN) {
+ if (tn == ScoreType.TopN) {
minmax.put(0.0);
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
hilout_weight.putDouble(iditer, 0.0);
}
- for(HilFeature ent : h.out) {
+ for (ObjectHeap.UnsortedIter<HilFeature> iter = h.out.unsortedIter(); iter.valid(); iter.advance()) {
+ HilFeature ent = iter.get();
minmax.put(ent.ubound);
hilout_weight.putDouble(ent.id, ent.ubound);
}
}
// Return all weights in pf
else {
- for(HilFeature ent : h.pf) {
+ for (HilFeature ent : h.pf) {
minmax.put(ent.ubound);
hilout_weight.putDouble(ent.id, ent.ubound);
}
}
- Relation<Double> scoreResult = new MaterializedRelation<Double>("HilOut weight", "hilout-weight", TypeUtil.DOUBLE, hilout_weight, relation.getDBIDs());
+ Relation<Double> scoreResult = new MaterializedRelation<>("HilOut weight", "hilout-weight", TypeUtil.DOUBLE, hilout_weight, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY);
OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
return result;
@@ -283,37 +297,35 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo
*/
private void scan(HilbertFeatures hf, int k0) {
final int mink0 = Math.min(2 * k0, capital_n - 1);
- if(LOG.isDebuggingFine()) {
+ if (LOG.isDebuggingFine()) {
LOG.debugFine("Scanning with k0=" + k0 + " (" + mink0 + ")" + " N*=" + capital_n_star);
}
- for(int i = 0; i < hf.pf.length; i++) {
- if(hf.pf[i].ubound < omega_star) {
+ for (int i = 0; i < hf.pf.length; i++) {
+ if (hf.pf[i].ubound < omega_star) {
continue;
}
- if(hf.pf[i].lbound < hf.pf[i].ubound) {
+ if (hf.pf[i].lbound < hf.pf[i].ubound) {
double omega = hf.fastUpperBound(i);
- if(omega < omega_star) {
+ if (omega < omega_star) {
hf.pf[i].ubound = omega;
- }
- else {
+ } else {
int maxcount;
// capital_n-1 instead of capital_n: all, except self
- if(hf.top.contains(hf.pf[i])) {
+ if (hf.top.contains(hf.pf[i])) {
maxcount = capital_n - 1;
- }
- else {
+ } else {
maxcount = mink0;
}
innerScan(hf, i, maxcount);
}
}
- if(hf.pf[i].ubound > 0) {
+ if (hf.pf[i].ubound > 0) {
hf.updateOUT(i);
}
- if(hf.pf[i].lbound > 0) {
+ if (hf.pf[i].lbound > 0) {
hf.updateWLB(i);
}
- if(hf.wlb.size() >= n) {
+ if (hf.wlb.size() >= n) {
omega_star = Math.max(omega_star, hf.wlb.peek().lbound);
}
}
@@ -332,43 +344,40 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo
int a = i, b = i;
int level = h, levela = h, levelb = h;
// Explore up to "maxcount" neighbors in this pass
- for(int count = 0; count < maxcount; count++) {
+ for (int count = 0; count < maxcount; count++) {
final int c; // Neighbor to explore
- if(a == 0) { // At left end, explore right
+ if (a == 0) { // At left end, explore right
// assert (b < capital_n - 1);
levelb = Math.min(levelb, hf.pf[b].level);
b++;
c = b;
- }
- else if(b >= capital_n - 1) { // At right end, explore left
+ } else if (b >= capital_n - 1) { // At right end, explore left
// assert (a > 0);
a--;
levela = Math.min(levela, hf.pf[a].level);
c = a;
- }
- else if(hf.pf[a - 1].level >= hf.pf[b].level) { // Prefer higher level
+ } else if (hf.pf[a - 1].level >= hf.pf[b].level) { // Prefer higher level
a--;
levela = Math.min(levela, hf.pf[a].level);
c = a;
- }
- else {
+ } else {
// assert (b < capital_n - 1);
levelb = Math.min(levelb, hf.pf[b].level);
b++;
c = b;
}
- if(!hf.pf[i].nn_keys.contains(hf.pf[c].id)) {
+ if (!hf.pf[i].nn_keys.contains(hf.pf[c].id)) {
// hf.distcomp ++;
hf.pf[i].insert(hf.pf[c].id, distq.distance(p, hf.pf[c].id).doubleValue(), k);
- if(hf.pf[i].nn.size() == k) {
- if(hf.pf[i].sum_nn < omega_star) {
+ if (hf.pf[i].nn.size() == k) {
+ if (hf.pf[i].sum_nn < omega_star) {
break; // stop = true
}
final int mlevel = Math.max(levela, levelb);
- if(mlevel < level) {
+ if (mlevel < level) {
level = mlevel;
final double delta = hf.minDistLevel(hf.pf[i].id, level);
- if(delta >= hf.pf[i].nn.peek().doubleDistance()) {
+ if (delta >= hf.pf[i].nn.peek().doubleDistance()) {
break; // stop = true
}
}
@@ -378,16 +387,17 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo
double br = hf.boxRadius(i, a - 1, b + 1);
double newlb = 0.0;
double newub = 0.0;
- for(DoubleDistanceDBIDPair entry : hf.pf[i].nn) {
+ for (ObjectHeap.UnsortedIter<DoubleDistanceDBIDPair> iter = hf.pf[i].nn.unsortedIter(); iter.valid(); iter.advance()) {
+ DoubleDistanceDBIDPair entry = iter.get();
newub += entry.doubleDistance();
- if(entry.doubleDistance() <= br) {
+ if (entry.doubleDistance() <= br) {
newlb += entry.doubleDistance();
}
}
- if(newlb > hf.pf[i].lbound) {
+ if (newlb > hf.pf[i].lbound) {
hf.pf[i].lbound = newlb;
}
- if(newub < hf.pf[i].ubound) {
+ if (newub < hf.pf[i].ubound) {
hf.pf[i].ubound = newub;
}
}
@@ -401,8 +411,9 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo
private void trueOutliers(HilbertFeatures h) {
n_star = 0;
- for(HilFeature entry : h.out) {
- if(entry.ubound >= omega_star && (entry.ubound - entry.lbound < 1E-10)) {
+ for (ObjectHeap.UnsortedIter<HilFeature> iter = h.out.unsortedIter(); iter.valid(); iter.advance()) {
+ HilFeature entry = iter.get();
+ if (entry.ubound >= omega_star && (entry.ubound - entry.lbound < 1E-10)) {
n_star++;
}
}
@@ -461,12 +472,12 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo
/**
* "OUT"
*/
- private Heap<HilFeature> out;
+ private ObjectHeap<HilFeature> out;
/**
* "WLB"
*/
- private Heap<HilFeature> wlb;
+ private ObjectHeap<HilFeature> wlb;
/**
* Constructor.
@@ -483,22 +494,22 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo
this.pf = new HilFeature[relation.size()];
int pos = 0;
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- pf[pos++] = new HilFeature(DBIDUtil.deref(iditer), new Heap<DoubleDistanceDBIDPair>(k, Collections.reverseOrder()));
+ for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ pf[pos++] = new HilFeature(DBIDUtil.deref(iditer), new ComparatorMaxHeap<DoubleDistanceDBIDPair>(k, COMPARATOR));
}
- this.out = new Heap<HilFeature>(n, new Comparator<HilFeature>() {
+ this.out = new ComparatorMinHeap<>(n, new Comparator<HilFeature>() {
@Override
public int compare(HilFeature o1, HilFeature o2) {
return Double.compare(o1.ubound, o2.ubound);
}
});
- this.wlb = new Heap<HilFeature>(n, new Comparator<HilFeature>() {
+ this.wlb = new ComparatorMinHeap<>(n, new Comparator<HilFeature>() {
@Override
public int compare(HilFeature o1, HilFeature o2) {
return Double.compare(o1.lbound, o2.lbound);
}
});
- this.top = new HashSet<HilFeature>(2 * n);
+ this.top = new HashSet<>(2 * n);
}
/**
@@ -512,45 +523,42 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo
// FIXME: 64 bit mode untested - sign bit is tricky to handle correctly
// with the rescaling. 63 bit should be fine. The sign bit probably needs
// to be handled differently, or at least needs careful testing of the API
- if(h >= 32) { // 32 to 63 bit
+ if (h >= 32) { // 32 to 63 bit
final long scale = Long.MAX_VALUE; // = 63 bits
- for(int i = 0; i < pf.length; i++) {
+ for (int i = 0; i < pf.length; i++) {
NumberVector<?> obj = relation.get(pf[i].id);
long[] coord = new long[d];
- for(int dim = 0; dim < d; dim++) {
+ for (int dim = 0; dim < d; dim++) {
coord[dim] = (long) (getDimForObject(obj, dim) * .5 * scale);
}
pf[i].hilbert = HilbertSpatialSorter.coordinatesToHilbert(coord, h, 1);
}
- }
- else if(h >= 16) { // 16-31 bit
+ } else if (h >= 16) { // 16-31 bit
final int scale = ~1 >>> 1;
- for(int i = 0; i < pf.length; i++) {
+ for (int i = 0; i < pf.length; i++) {
NumberVector<?> obj = relation.get(pf[i].id);
int[] coord = new int[d];
- for(int dim = 0; dim < d; dim++) {
+ for (int dim = 0; dim < d; dim++) {
coord[dim] = (int) (getDimForObject(obj, dim) * .5 * scale);
}
pf[i].hilbert = HilbertSpatialSorter.coordinatesToHilbert(coord, h, 1);
}
- }
- else if(h >= 8) { // 8-15 bit
+ } else if (h >= 8) { // 8-15 bit
final int scale = ~1 >>> 16;
- for(int i = 0; i < pf.length; i++) {
+ for (int i = 0; i < pf.length; i++) {
NumberVector<?> obj = relation.get(pf[i].id);
short[] coord = new short[d];
- for(int dim = 0; dim < d; dim++) {
+ for (int dim = 0; dim < d; dim++) {
coord[dim] = (short) (getDimForObject(obj, dim) * .5 * scale);
}
pf[i].hilbert = HilbertSpatialSorter.coordinatesToHilbert(coord, h, 16);
}
- }
- else { // 1-7 bit
+ } else { // 1-7 bit
final int scale = ~1 >>> 8;
- for(int i = 0; i < pf.length; i++) {
+ for (int i = 0; i < pf.length; i++) {
NumberVector<?> obj = relation.get(pf[i].id);
byte[] coord = new byte[d];
- for(int dim = 0; dim < d; dim++) {
+ for (int dim = 0; dim < d; dim++) {
coord[dim] = (byte) (getDimForObject(obj, dim) * .5 * scale);
}
pf[i].hilbert = HilbertSpatialSorter.coordinatesToHilbert(coord, h, 24);
@@ -558,13 +566,13 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo
}
java.util.Arrays.sort(pf);
// Update levels
- for(int i = 0; i < pf.length - 1; i++) {
+ for (int i = 0; i < pf.length - 1; i++) {
pf[i].level = minRegLevel(i, i + 1);
}
// Count candidates
capital_n_star = 0;
- for(int i = 0; i < pf.length; i++) {
- if(pf[i].ubound >= omega_star) {
+ for (int i = 0; i < pf.length; i++) {
+ if (pf[i].ubound >= omega_star) {
capital_n_star++;
}
}
@@ -576,12 +584,11 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo
* @param i position in pf of the feature to be inserted
*/
private void updateOUT(int i) {
- if(out.size() < n) {
+ if (out.size() < n) {
out.add(pf[i]);
- }
- else {
+ } else {
HilFeature head = out.peek();
- if(pf[i].ubound > head.ubound) {
+ if (pf[i].ubound > head.ubound) {
// replace smallest
out.replaceTopElement(pf[i]);
}
@@ -594,12 +601,11 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo
* @param i position in pf of the feature to be inserted
*/
private void updateWLB(int i) {
- if(wlb.size() < n) {
+ if (wlb.size() < n) {
wlb.add(pf[i]);
- }
- else {
+ } else {
HilFeature head = wlb.peek();
- if(pf[i].lbound > head.lbound) {
+ if (pf[i].lbound > head.lbound) {
// replace smallest
wlb.replaceTopElement(pf[i]);
}
@@ -616,13 +622,12 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo
private double fastUpperBound(int i) {
int pre = i;
int post = i;
- while(post - pre < k) {
+ while (post - pre < k) {
int pre_level = (pre - 1 >= 0) ? pf[pre - 1].level : -2;
int post_level = (post < capital_n - 1) ? pf[post].level : -2;
- if(post_level >= pre_level) {
+ if (post_level >= pre_level) {
post++;
- }
- else {
+ } else {
pre--;
}
}
@@ -642,7 +647,7 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo
// 2 ^ - (level - 1)
final double r = 1.0 / (1 << (level - 1));
double dist = Double.POSITIVE_INFINITY;
- for(int dim = 0; dim < d; dim++) {
+ for (int dim = 0; dim < d; dim++) {
final double p_m_r = getDimForObject(obj, dim) % r;
dist = Math.min(dist, Math.min(p_m_r, r - p_m_r));
}
@@ -661,35 +666,32 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo
// level 1 is supposed to have r=1 as in the original publication
final double r = 1.0 / (1 << (level - 1));
double dist;
- if(t == 1.0) {
+ if (t == 1.0) {
dist = 0.0;
- for(int dim = 0; dim < d; dim++) {
+ for (int dim = 0; dim < d; dim++) {
final double p_m_r = getDimForObject(obj, dim) % r;
// assert (p_m_r >= 0);
dist += Math.max(p_m_r, r - p_m_r);
}
- }
- else if(t == 2.0) {
+ } else if (t == 2.0) {
dist = 0.0;
- for(int dim = 0; dim < d; dim++) {
+ for (int dim = 0; dim < d; dim++) {
final double p_m_r = getDimForObject(obj, dim) % r;
// assert (p_m_r >= 0);
double a = Math.max(p_m_r, r - p_m_r);
dist += a * a;
}
dist = Math.sqrt(dist);
- }
- else if(!Double.isInfinite(t)) {
+ } else if (!Double.isInfinite(t)) {
dist = 0.0;
- for(int dim = 0; dim < d; dim++) {
+ for (int dim = 0; dim < d; dim++) {
final double p_m_r = getDimForObject(obj, dim) % r;
dist += Math.pow(Math.max(p_m_r, r - p_m_r), t);
}
dist = Math.pow(dist, 1.0 / t);
- }
- else {
+ } else {
dist = Double.NEGATIVE_INFINITY;
- for(int dim = 0; dim < d; dim++) {
+ for (int dim = 0; dim < d; dim++) {
final double p_m_r = getDimForObject(obj, dim) % r;
dist = Math.max(dist, Math.max(p_m_r, r - p_m_r));
}
@@ -705,9 +707,9 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo
* @return Number of level shared
*/
private int numberSharedLevels(long[] a, long[] b) {
- for(int i = 0, j = a.length - 1; i < a.length; i++, j--) {
+ for (int i = 0, j = a.length - 1; i < a.length; i++, j--) {
final long diff = a[j] ^ b[j];
- if(diff != 0) {
+ if (diff != 0) {
// expected unused = available - used
final int expected = (a.length * Long.SIZE) - (d * h);
return ((BitsUtil.numberOfLeadingZeros(diff) + i * Long.SIZE) - expected) / d;
@@ -756,16 +758,14 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo
private double boxRadius(int i, int a, int b) {
// level are inversely ordered to box sizes. min -> max
final int level;
- if(a < 0) {
- if(b >= pf.length) {
+ if (a < 0) {
+ if (b >= pf.length) {
return Double.POSITIVE_INFINITY;
}
level = maxRegLevel(i, b);
- }
- else if(b >= pf.length) {
+ } else if (b >= pf.length) {
level = maxRegLevel(i, a);
- }
- else {
+ } else {
level = Math.max(maxRegLevel(i, a), maxRegLevel(i, b));
}
return minDistLevel(pf[i].id, level);
@@ -822,7 +822,7 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo
/**
* Heap with the nearest known neighbors
*/
- public Heap<DoubleDistanceDBIDPair> nn;
+ public ObjectHeap<DoubleDistanceDBIDPair> nn;
/**
* Set representation of the nearest neighbors for faster lookups
@@ -840,7 +840,7 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo
* @param id Object ID
* @param nn Heap for neighbors
*/
- public HilFeature(DBID id, Heap<DoubleDistanceDBIDPair> nn) {
+ public HilFeature(DBID id, ObjectHeap<DoubleDistanceDBIDPair> nn) {
super();
this.id = id;
this.nn = nn;
@@ -861,15 +861,14 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo
*/
protected void insert(DBID id, double dt, int k) {
// assert (!nn_keys.contains(id));
- if(nn.size() < k) {
+ if (nn.size() < k) {
DoubleDistanceDBIDPair entry = DBIDFactory.FACTORY.newDistancePair(dt, id);
nn.add(entry);
nn_keys.add(id);
sum_nn += dt;
- }
- else {
+ } else {
DoubleDistanceDBIDPair head = nn.peek();
- if(dt < head.doubleDistance()) {
+ if (dt < head.doubleDistance()) {
head = nn.poll(); // Remove worst
sum_nn -= head.doubleDistance();
nn_keys.remove(head);
@@ -891,7 +890,7 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo
* @author Jonathan von Brünken
*
* @apiviz.exclude
- *
+ *
* @param <O> Vector type
*/
public static class Parameterizer<O extends NumberVector<?>> extends AbstractParameterizer {
@@ -952,34 +951,34 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo
super.makeOptions(config);
final IntParameter kP = new IntParameter(K_ID, 5);
- if(config.grab(kP)) {
+ if (config.grab(kP)) {
k = kP.getValue();
}
final IntParameter nP = new IntParameter(N_ID, 10);
- if(config.grab(nP)) {
+ if (config.grab(nP)) {
n = nP.getValue();
}
final IntParameter hP = new IntParameter(H_ID, 32);
- if(config.grab(hP)) {
+ if (config.grab(hP)) {
h = hP.getValue();
}
-
+
ObjectParameter<LPNormDistanceFunction> distP = AbstractDistanceBasedAlgorithm.makeParameterDistanceFunction(EuclideanDistanceFunction.class, LPNormDistanceFunction.class);
if (config.grab(distP)) {
distfunc = distP.instantiateClass(config);
}
- final EnumParameter<ScoreType> tnP = new EnumParameter<ScoreType>(TN_ID, ScoreType.class, ScoreType.TopN);
- if(config.grab(tnP)) {
+ final EnumParameter<ScoreType> tnP = new EnumParameter<>(TN_ID, ScoreType.class, ScoreType.TopN);
+ if (config.grab(tnP)) {
tn = tnP.getValue();
}
}
@Override
protected HilOut<O> makeInstance() {
- return new HilOut<O>(distfunc, k, n, h, tn);
+ return new HilOut<>(distfunc, k, n, h, tn);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNOutlier.java
index 4c4873dd..503487c8 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNOutlier.java
@@ -31,13 +31,13 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList;
+import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceKNNList;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
@@ -53,10 +53,8 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameteriz
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
/**
- * <p>
* Outlier Detection based on the distance of an object to its k nearest
* neighbor.
- * </p>
*
* <p>
* Reference:<br>
@@ -119,7 +117,7 @@ public class KNNOutlier<O, D extends NumberDistance<D, ?>> extends AbstractDista
// compute distance to the k nearest neighbor.
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
// distance to the kth nearest neighbor
- final KNNResult<D> knns = knnQuery.getKNNForDBID(iditer, k);
+ final KNNList<D> knns = knnQuery.getKNNForDBID(iditer, k);
final double dkn;
if(knns instanceof DoubleDistanceKNNList) {
dkn = ((DoubleDistanceKNNList) knns).doubleKNNDistance();
@@ -138,7 +136,7 @@ public class KNNOutlier<O, D extends NumberDistance<D, ?>> extends AbstractDista
if(progressKNNDistance != null) {
progressKNNDistance.ensureCompleted(LOG);
}
- Relation<Double> scoreres = new MaterializedRelation<Double>("kNN Outlier Score", "knn-outlier", TypeUtil.DOUBLE, knno_score, relation.getDBIDs());
+ Relation<Double> scoreres = new MaterializedRelation<>("kNN Outlier Score", "knn-outlier", TypeUtil.DOUBLE, knno_score, relation.getDBIDs());
OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0.0);
return new OutlierResult(meta, scoreres);
}
@@ -174,7 +172,7 @@ public class KNNOutlier<O, D extends NumberDistance<D, ?>> extends AbstractDista
@Override
protected KNNOutlier<O, D> makeInstance() {
- return new KNNOutlier<O, D>(distanceFunction, k);
+ return new KNNOutlier<>(distanceFunction, k);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNWeightOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNWeightOutlier.java
index e7eeeb9c..88603f09 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNWeightOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNWeightOutlier.java
@@ -31,15 +31,15 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList;
+import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceDBIDResultIter;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceKNNList;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
@@ -124,15 +124,15 @@ public class KNNWeightOutlier<O, D extends NumberDistance<D, ?>> extends Abstrac
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
// compute sum of the distances to the k nearest neighbors
- final KNNResult<D> knn = knnQuery.getKNNForDBID(iditer, k);
+ final KNNList<D> knn = knnQuery.getKNNForDBID(iditer, k);
double skn = 0;
if(knn instanceof DoubleDistanceKNNList) {
- for(DoubleDistanceDBIDResultIter neighbor = ((DoubleDistanceKNNList) knn).iter(); neighbor.valid(); neighbor.advance()) {
+ for(DoubleDistanceDBIDListIter neighbor = ((DoubleDistanceKNNList) knn).iter(); neighbor.valid(); neighbor.advance()) {
skn += neighbor.doubleDistance();
}
}
else {
- for(DistanceDBIDResultIter<D> neighbor = knn.iter(); neighbor.valid(); neighbor.advance()) {
+ for(DistanceDBIDListIter<D> neighbor = knn.iter(); neighbor.valid(); neighbor.advance()) {
skn += neighbor.getDistance().doubleValue();
}
}
@@ -147,7 +147,7 @@ public class KNNWeightOutlier<O, D extends NumberDistance<D, ?>> extends Abstrac
progressKNNWeight.ensureCompleted(LOG);
}
- Relation<Double> res = new MaterializedRelation<Double>("Weighted kNN Outlier Score", "knnw-outlier", TypeUtil.DOUBLE, knnw_score, relation.getDBIDs());
+ Relation<Double> res = new MaterializedRelation<>("Weighted kNN Outlier Score", "knnw-outlier", TypeUtil.DOUBLE, knnw_score, relation.getDBIDs());
OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0.0);
return new OutlierResult(meta, res);
}
@@ -183,7 +183,7 @@ public class KNNWeightOutlier<O, D extends NumberDistance<D, ?>> extends Abstrac
@Override
protected KNNWeightOutlier<O, D> makeInstance() {
- return new KNNWeightOutlier<O, D>(distanceFunction, k);
+ return new KNNWeightOutlier<>(distanceFunction, k);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ODIN.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ODIN.java
new file mode 100644
index 00000000..f22cdeb7
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ODIN.java
@@ -0,0 +1,192 @@
+package de.lmu.ifi.dbs.elki.algorithm.outlier;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
+import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+
+/**
+ * Outlier detection based on the in-degree of the kNN graph.
+ *
+ * This is a curried version: instead of using a threshold T to obtain a binary
+ * decision, we use the computed value as outlier score; normalized by k to make
+ * the numbers more comparable across different parameterizations.
+ *
+ * Reference:
+ * <p>
+ * V. Hautamäki and I. Kärkkäinen and P Fränti<br />
+ * Outlier detection using k-nearest neighbour graph<br />
+ * Proc. 17th Int. Conf. Pattern Recognition, ICPR 2004 <br />
+ * </p>
+ *
+ * @author Erich Schubert
+ *
+ * @param <O> Object type
+ * @param <D> Distance type
+ */
+@Reference(authors = "V. Hautamäki and I. Kärkkäinen and P Fränti", title = "Outlier detection using k-nearest neighbour graph", booktitle = "Proc. 17th Int. Conf. Pattern Recognition, ICPR 2004", url = "http://dx.doi.org/10.1109/ICPR.2004.1334558")
+public class ODIN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm {
+ /**
+ * Class logger.
+ */
+ private static final Logging LOG = Logging.getLogger(ODIN.class);
+
+ /**
+ * Number of neighbors for kNN graph.
+ */
+ int k;
+
+ /**
+ * Constructor.
+ *
+ * @param distanceFunction Distance function
+ * @param k k parameter
+ */
+ public ODIN(DistanceFunction<? super O, D> distanceFunction, int k) {
+ super(distanceFunction);
+ this.k = k;
+ }
+
+ /**
+ * Run the ODIN algorithm
+ *
+ * @param database Database to run on.
+ * @param relation Relation to process.
+ * @return ODIN outlier result.
+ */
+ public OutlierResult run(Database database, Relation<O> relation) {
+ // Get the query functions:
+ DistanceQuery<O, D> dq = database.getDistanceQuery(relation, getDistanceFunction());
+ KNNQuery<O, D> knnq = database.getKNNQuery(dq, k);
+
+ // Get the objects to process, and a data storage for counting and output:
+ DBIDs ids = relation.getDBIDs();
+ WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_DB, 0.);
+
+ double inc = 1. / (k - 1);
+ double min = Double.POSITIVE_INFINITY, max = 0.0;
+ // Process all objects
+ for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ // Find the nearest neighbors (using an index, if available!)
+ KNNList<D> neighbors = knnq.getKNNForDBID(iter, k);
+ // For each neighbor, except ourselves, increase the in-degree:
+ for (DBIDIter nei = neighbors.iter(); nei.valid(); nei.advance()) {
+ if (DBIDUtil.equal(iter, nei)) {
+ continue;
+ }
+ final double value = scores.doubleValue(nei) + inc;
+ if (value < min) {
+ min = value;
+ }
+ if (value > max) {
+ max = value;
+ }
+ scores.put(nei, value);
+ }
+ }
+
+ // Wrap the result and add metadata.
+ OutlierScoreMeta meta = new InvertedOutlierScoreMeta(min, max, 0., inc * (ids.size() - 1), 1);
+ Relation<Double> rel = new MaterializedRelation<>("ODIN In-Degree", "odin", TypeUtil.DOUBLE, scores, ids);
+ return new OutlierResult(meta, rel);
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(getDistanceFunction().getInputTypeRestriction());
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ *
+ * @param <O> Object type
+ * @param <D> Distance type
+ */
+ public static class Parameterizer<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ /**
+ * Parameter for the number of nearest neighbors:
+ *
+ * <pre>
+ * -odin.k &lt;int&gt;
+ * </pre>
+ */
+ public static final OptionID K_ID = new OptionID("odin.k", "Number of neighbors to use for kNN graph.");
+
+ /**
+ * Number of nearest neighbors to use.
+ */
+ int k;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ IntParameter param = new IntParameter(K_ID);
+ // Since in a database context, the 1 nearest neighbor
+ // will usually be the query object itself, we require
+ // this value to be at least 2.
+ param.addConstraint(new GreaterConstraint(1));
+ if (config.grab(param)) {
+ k = param.intValue();
+ }
+ }
+
+ @Override
+ protected ODIN<O, D> makeInstance() {
+ return new ODIN<>(distanceFunction, k);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OPTICSOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OPTICSOF.java
index bed27a33..f6d46f57 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OPTICSOF.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OPTICSOF.java
@@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2012
+Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -37,14 +37,14 @@ import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
@@ -115,7 +115,7 @@ public class OPTICSOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanc
DBIDs ids = relation.getDBIDs();
// FIXME: implicit preprocessor.
- WritableDataStore<KNNResult<D>> nMinPts = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, KNNResult.class);
+ WritableDataStore<KNNList<D>> nMinPts = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, KNNList.class);
WritableDoubleDataStore coreDistance = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
WritableIntegerDataStore minPtsNeighborhoodSize = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, -1);
@@ -123,7 +123,7 @@ public class OPTICSOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanc
// N_minpts(id) and core-distance(id)
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- KNNResult<D> minptsNeighbours = knnQuery.getKNNForDBID(iditer, minpts);
+ KNNList<D> minptsNeighbours = knnQuery.getKNNForDBID(iditer, minpts);
D d = minptsNeighbours.getKNNDistance();
nMinPts.put(iditer, minptsNeighbours);
coreDistance.putDouble(iditer, d.doubleValue());
@@ -134,10 +134,10 @@ public class OPTICSOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanc
WritableDataStore<List<Double>> reachDistance = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, List.class);
WritableDoubleDataStore lrds = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- List<Double> core = new ArrayList<Double>();
+ List<Double> core = new ArrayList<>();
double lrd = 0;
// TODO: optimize for double distances
- for (DistanceDBIDResultIter<D> neighbor = nMinPts.get(iditer).iter(); neighbor.valid(); neighbor.advance()) {
+ for (DistanceDBIDListIter<D> neighbor = nMinPts.get(iditer).iter(); neighbor.valid(); neighbor.advance()) {
double coreDist = coreDistance.doubleValue(neighbor);
double dist = distQuery.distance(iditer, neighbor).doubleValue();
double rd = Math.max(coreDist, dist);
@@ -165,7 +165,7 @@ public class OPTICSOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanc
ofminmax.put(of);
}
// Build result representation.
- Relation<Double> scoreResult = new MaterializedRelation<Double>("OPTICS Outlier Scores", "optics-outlier", TypeUtil.DOUBLE, ofs, relation.getDBIDs());
+ Relation<Double> scoreResult = new MaterializedRelation<>("OPTICS Outlier Scores", "optics-outlier", TypeUtil.DOUBLE, ofs, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(ofminmax.getMin(), ofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
return new OutlierResult(scoreMeta, scoreResult);
}
@@ -202,7 +202,7 @@ public class OPTICSOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanc
@Override
protected OPTICSOF<O, D> makeInstance() {
- return new OPTICSOF<O, D>(distanceFunction, minpts);
+ return new OPTICSOF<>(distanceFunction, minpts);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OutlierAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OutlierAlgorithm.java
index 00c4a8ec..f3ef5ab5 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OutlierAlgorithm.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OutlierAlgorithm.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -33,7 +33,7 @@ import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
* @author Erich Schubert
*
* @apiviz.landmark
- *
+ * @apiviz.excludeSubtypes
* @apiviz.has OutlierResult
*/
public interface OutlierAlgorithm extends Algorithm {
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ReferenceBasedOutlierDetection.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ReferenceBasedOutlierDetection.java
index 93eca7db..092bbc45 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ReferenceBasedOutlierDetection.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ReferenceBasedOutlierDetection.java
@@ -31,18 +31,19 @@ import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.DistanceDBIDPair;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDList;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDPair;
+import de.lmu.ifi.dbs.elki.database.ids.generic.GenericDistanceDBIDList;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.GenericDistanceDBIDList;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.Mean;
@@ -136,11 +137,12 @@ public class ReferenceBasedOutlierDetection<V extends NumberVector<?>, D extends
/**
* Run the algorithm on the given relation.
*
+ * @param database Database
* @param relation Relation to process
* @return Outlier result
*/
- public OutlierResult run(Relation<V> relation) {
- DistanceQuery<V, D> distFunc = relation.getDatabase().getDistanceQuery(relation, distanceFunction);
+ public OutlierResult run(Database database, Relation<V> relation) {
+ DistanceQuery<V, D> distFunc = database.getDistanceQuery(relation, distanceFunction);
Collection<V> refPoints = refp.getReferencePoints(relation);
DBIDs ids = relation.getDBIDs();
@@ -158,7 +160,7 @@ public class ReferenceBasedOutlierDetection<V extends NumberVector<?>, D extends
}
V firstRef = iter.next();
// compute distance vector for the first reference point
- DistanceDBIDResult<D> firstReferenceDists = computeDistanceVector(firstRef, relation, distFunc);
+ DistanceDBIDList<D> firstReferenceDists = computeDistanceVector(firstRef, relation, distFunc);
for(int l = 0; l < firstReferenceDists.size(); l++) {
double density = computeDensity(firstReferenceDists, l);
// Initial value
@@ -167,7 +169,7 @@ public class ReferenceBasedOutlierDetection<V extends NumberVector<?>, D extends
// compute density values for all remaining reference points
while(iter.hasNext()) {
V refPoint = iter.next();
- DistanceDBIDResult<D> referenceDists = computeDistanceVector(refPoint, relation, distFunc);
+ DistanceDBIDList<D> referenceDists = computeDistanceVector(refPoint, relation, distFunc);
// compute density value for each object
for(int l = 0; l < referenceDists.size(); l++) {
double density = computeDensity(referenceDists, l);
@@ -194,9 +196,9 @@ public class ReferenceBasedOutlierDetection<V extends NumberVector<?>, D extends
// adds reference points to the result. header information for the
// visualizer to find the reference points in the result
- ReferencePointsResult<V> refp = new ReferencePointsResult<V>("Reference points", "reference-points", refPoints);
+ ReferencePointsResult<V> refp = new ReferencePointsResult<>("Reference points", "reference-points", refPoints);
- Relation<Double> scoreResult = new MaterializedRelation<Double>("Reference-points Outlier Scores", "reference-outlier", TypeUtil.DOUBLE, rbod_score, relation.getDBIDs());
+ Relation<Double> scoreResult = new MaterializedRelation<>("Reference-points Outlier Scores", "reference-outlier", TypeUtil.DOUBLE, rbod_score, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(0.0, 1.0, 0.0, 1.0, 0.0);
OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
result.addChildResult(refp);
@@ -213,9 +215,9 @@ public class ReferenceBasedOutlierDetection<V extends NumberVector<?>, D extends
* @return array containing the distance to one reference point for each
* database object and the object id
*/
- protected DistanceDBIDResult<D> computeDistanceVector(V refPoint, Relation<V> database, DistanceQuery<V, D> distFunc) {
+ protected DistanceDBIDList<D> computeDistanceVector(V refPoint, Relation<V> database, DistanceQuery<V, D> distFunc) {
// TODO: optimize for double distances?
- GenericDistanceDBIDList<D> referenceDists = new GenericDistanceDBIDList<D>(database.size());
+ GenericDistanceDBIDList<D> referenceDists = new GenericDistanceDBIDList<>(database.size());
for(DBIDIter iditer = database.iterDBIDs(); iditer.valid(); iditer.advance()) {
referenceDists.add(distFunc.distance(iditer, refPoint), iditer);
}
@@ -235,7 +237,7 @@ public class ReferenceBasedOutlierDetection<V extends NumberVector<?>, D extends
* @param index index of the current object
* @return density for one object and reference point
*/
- protected double computeDensity(DistanceDBIDResult<D> referenceDists, int index) {
+ protected double computeDensity(DistanceDBIDList<D> referenceDists, int index) {
final DistanceDBIDPair<D> x = referenceDists.get(index);
final double xDist = x.getDistance().doubleValue();
@@ -321,7 +323,7 @@ public class ReferenceBasedOutlierDetection<V extends NumberVector<?>, D extends
if(config.grab(pK)) {
k = pK.getValue();
}
- final ObjectParameter<ReferencePointsHeuristic<V>> refpP = new ObjectParameter<ReferencePointsHeuristic<V>>(REFP_ID, ReferencePointsHeuristic.class, GridBasedReferencePoints.class);
+ final ObjectParameter<ReferencePointsHeuristic<V>> refpP = new ObjectParameter<>(REFP_ID, ReferencePointsHeuristic.class, GridBasedReferencePoints.class);
if(config.grab(refpP)) {
refp = refpP.instantiateClass(config);
}
@@ -329,7 +331,7 @@ public class ReferenceBasedOutlierDetection<V extends NumberVector<?>, D extends
@Override
protected ReferenceBasedOutlierDetection<V, D> makeInstance() {
- return new ReferenceBasedOutlierDetection<V, D>(k, distanceFunction, refp);
+ return new ReferenceBasedOutlierDetection<>(k, distanceFunction, refp);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleCOP.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleCOP.java
index e8077819..38820ab7 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleCOP.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleCOP.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -41,11 +41,11 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
@@ -107,7 +107,7 @@ public class SimpleCOP<V extends NumberVector<?>, D extends NumberDistance<D, ?>
public SimpleCOP(DistanceFunction<? super V, D> distanceFunction, int k, PCAFilteredRunner<V> pca) {
super(distanceFunction);
this.k = k;
- this.dependencyDerivator = new DependencyDerivator<V, D>(null, FormatUtil.NF8, pca, 0, false);
+ this.dependencyDerivator = new DependencyDerivator<>(null, FormatUtil.NF, pca, 0, false);
}
public OutlierResult run(Database database, Relation<V> data) throws IllegalStateException {
@@ -124,7 +124,7 @@ public class SimpleCOP<V extends NumberVector<?>, D extends NumberDistance<D, ?>
FiniteProgress progressLocalPCA = LOG.isVerbose() ? new FiniteProgress("Correlation Outlier Probabilities", data.size(), LOG) : null;
double sqrt2 = Math.sqrt(2.0);
for (DBIDIter id = data.iterDBIDs(); id.valid(); id.advance()) {
- KNNResult<D> neighbors = knnQuery.getKNNForDBID(id, k + 1);
+ KNNList<D> neighbors = knnQuery.getKNNForDBID(id, k + 1);
ModifiableDBIDs nids = DBIDUtil.newArray(neighbors);
nids.remove(id);
@@ -156,14 +156,14 @@ public class SimpleCOP<V extends NumberVector<?>, D extends NumberDistance<D, ?>
}
}
// combine results.
- Relation<Double> scoreResult = new MaterializedRelation<Double>("Original Correlation Outlier Probabilities", "origcop-outlier", TypeUtil.DOUBLE, cop_score, ids);
+ Relation<Double> scoreResult = new MaterializedRelation<>("Original Correlation Outlier Probabilities", "origcop-outlier", TypeUtil.DOUBLE, cop_score, ids);
OutlierScoreMeta scoreMeta = new ProbabilisticOutlierScore();
OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
// extra results
- result.addChildResult(new MaterializedRelation<Integer>("Local Dimensionality", COP.COP_DIM, TypeUtil.INTEGER, cop_dim, ids));
- result.addChildResult(new MaterializedRelation<Vector>("Error vectors", COP.COP_ERRORVEC, TypeUtil.VECTOR, cop_err_v, ids));
- result.addChildResult(new MaterializedRelation<Matrix>("Data vectors", "cop-datavec", TypeUtil.MATRIX, cop_datav, ids));
- result.addChildResult(new MaterializedRelation<CorrelationAnalysisSolution<?>>("Correlation analysis", "cop-sol", new SimpleTypeInformation<CorrelationAnalysisSolution<?>>(CorrelationAnalysisSolution.class), cop_sol, ids));
+ result.addChildResult(new MaterializedRelation<>("Local Dimensionality", COP.COP_DIM, TypeUtil.INTEGER, cop_dim, ids));
+ result.addChildResult(new MaterializedRelation<>("Error vectors", COP.COP_ERRORVEC, TypeUtil.VECTOR, cop_err_v, ids));
+ result.addChildResult(new MaterializedRelation<>("Data vectors", "cop-datavec", TypeUtil.MATRIX, cop_datav, ids));
+ result.addChildResult(new MaterializedRelation<>("Correlation analysis", "cop-sol", new SimpleTypeInformation<CorrelationAnalysisSolution<?>>(CorrelationAnalysisSolution.class), cop_sol, ids));
return result;
}
@@ -222,7 +222,7 @@ public class SimpleCOP<V extends NumberVector<?>, D extends NumberDistance<D, ?>
if (config.grab(kP)) {
k = kP.intValue();
}
- ObjectParameter<PCAFilteredRunner<V>> pcaP = new ObjectParameter<PCAFilteredRunner<V>>(PCARUNNER_ID, PCAFilteredRunner.class, PCAFilteredRunner.class);
+ ObjectParameter<PCAFilteredRunner<V>> pcaP = new ObjectParameter<>(PCARUNNER_ID, PCAFilteredRunner.class, PCAFilteredRunner.class);
if (config.grab(pcaP)) {
pca = pcaP.instantiateClass(config);
}
@@ -230,7 +230,7 @@ public class SimpleCOP<V extends NumberVector<?>, D extends NumberDistance<D, ?>
@Override
protected SimpleCOP<V, D> makeInstance() {
- return new SimpleCOP<V, D>(distanceFunction, k, pca);
+ return new SimpleCOP<>(distanceFunction, k, pca);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ALOCI.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/ALOCI.java
index 41da687f..d48679a9 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ALOCI.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/ALOCI.java
@@ -1,4 +1,4 @@
-package de.lmu.ifi.dbs.elki.algorithm.outlier;
+package de.lmu.ifi.dbs.elki.algorithm.outlier.lof;
/*
This file is part of ELKI:
@@ -28,6 +28,7 @@ import java.util.List;
import java.util.Random;
import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
@@ -42,8 +43,8 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.NumberVectorDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
@@ -163,7 +164,7 @@ public class ALOCI<O extends NumberVector<?>, D extends NumberDistance<D, ?>> ex
}
}
- List<ALOCIQuadTree> qts = new ArrayList<ALOCIQuadTree>(g);
+ List<ALOCIQuadTree> qts = new ArrayList<>(g);
double[] nshift = new double[dim];
ALOCIQuadTree qt = new ALOCIQuadTree(min, max, nshift, nmin, relation);
@@ -251,7 +252,7 @@ public class ALOCI<O extends NumberVector<?>, D extends NumberDistance<D, ?>> ex
if(progressLOCI != null) {
progressLOCI.ensureCompleted(LOG);
}
- Relation<Double> scoreResult = new MaterializedRelation<Double>("aLOCI normalized MDEF", "aloci-mdef-outlier", TypeUtil.DOUBLE, mdef_norm, relation.getDBIDs());
+ Relation<Double> scoreResult = new MaterializedRelation<>("aLOCI normalized MDEF", "aloci-mdef-outlier", TypeUtil.DOUBLE, mdef_norm, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY);
OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
return result;
@@ -370,7 +371,7 @@ public class ALOCI<O extends NumberVector<?>, D extends NumberDistance<D, ?>> ex
}
this.relation = relation;
ArrayModifiableDBIDs ids = DBIDUtil.newArray(relation.getDBIDs());
- List<Node> children = new ArrayList<Node>();
+ List<Node> children = new ArrayList<>();
bulkLoad(min.clone(), max.clone(), children, ids, 0, ids.size(), 0, 0, 0);
this.root = new Node(0, new Vector(center), ids.size(), -1, children);
}
@@ -432,7 +433,7 @@ public class ALOCI<O extends NumberVector<?>, D extends NumberDistance<D, ?>> ex
return;
}
else {
- List<Node> newchildren = new ArrayList<Node>();
+ List<Node> newchildren = new ArrayList<>();
bulkLoad(lmin, lmax, newchildren, ids, start, end, 0, level + 1, 0);
children.add(new Node(code, new Vector(center), end - start, level, newchildren));
return;
@@ -730,7 +731,7 @@ public class ALOCI<O extends NumberVector<?>, D extends NumberDistance<D, ?>> ex
@Override
protected ALOCI<O, D> makeInstance() {
- return new ALOCI<O, D>(distanceFunction, nmin, alpha, g, rnd);
+ return new ALOCI<>(distanceFunction, nmin, alpha, g, rnd);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/FlexibleLOF.java
index 66bed47a..80f60e8b 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOF.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/FlexibleLOF.java
@@ -1,10 +1,10 @@
-package de.lmu.ifi.dbs.elki.algorithm.outlier;
+package de.lmu.ifi.dbs.elki.algorithm.outlier.lof;
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -25,9 +25,11 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
import de.lmu.ifi.dbs.elki.data.type.CombinedTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.QueryUtil;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
@@ -36,6 +38,10 @@ import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList;
+import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
@@ -44,10 +50,6 @@ import de.lmu.ifi.dbs.elki.database.query.rknn.RKNNQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceDBIDResultIter;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceKNNList;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor;
import de.lmu.ifi.dbs.elki.logging.Logging;
@@ -70,8 +72,7 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
/**
* <p>
- * Algorithm to compute density-based local outlier factors in a database based
- * on a specified parameter {@link #K_ID} ({@code -lof.k}).
+ * Flexible variant of the "Local Outlier Factor" algorithm.
* </p>
*
* <p>
@@ -85,14 +86,15 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
* The k nearest neighbors are determined using the parameter
* {@link de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm#DISTANCE_FUNCTION_ID}
* , while the reference set used in reachability distance computation is
- * configured using {@link #REACHABILITY_DISTANCE_FUNCTION_ID}.
+ * configured using {@link Parameterizer#REACHABILITY_DISTANCE_FUNCTION_ID}.
* </p>
*
* <p>
- * The original LOF parameter was called &quot;minPts&quot;. Since kNN queries
- * in ELKI have slightly different semantics - exactly k neighbors are returned
- * - we chose to rename the parameter to {@link #K_ID} ({@code -lof.k}) to
- * reflect this difference.
+ * The original LOF parameter was called &quot;minPts&quot;. For consistency
+ * with the name "kNN query", we chose to rename the parameter to {@code k}.
+ * Flexible LOF allows you to set the two values different, which yields the
+ * parameters {@link Parameterizer#KREF_ID} ({@code -lof.krefer}) and
+ * {@link Parameterizer#KREACH_ID} ({@code -lof.kreach})
* </p>
*
* <p>
@@ -116,33 +118,26 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
@Title("LOF: Local Outlier Factor")
@Description("Algorithm to compute density-based local outlier factors in a database based on the neighborhood size parameter 'k'")
@Reference(authors = "M. M. Breunig, H.-P. Kriegel, R. Ng, and J. Sander", title = "LOF: Identifying Density-Based Local Outliers", booktitle = "Proc. 2nd ACM SIGMOD Int. Conf. on Management of Data (SIGMOD '00), Dallas, TX, 2000", url = "http://dx.doi.org/10.1145/342009.335388")
-public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
+public class FlexibleLOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
- private static final Logging LOG = Logging.getLogger(LOF.class);
+ private static final Logging LOG = Logging.getLogger(FlexibleLOF.class);
/**
- * The distance function to determine the reachability distance between
- * database objects.
+ * Number of neighbors in comparison set.
*/
- public static final OptionID REACHABILITY_DISTANCE_FUNCTION_ID = new OptionID("lof.reachdistfunction", "Distance function to determine the reachability distance between database objects.");
+ protected int krefer = 2;
/**
- * Parameter to specify the number of nearest neighbors of an object to be
- * considered for computing its LOF_SCORE, must be an integer greater than 1.
+ * Number of neighbors used for reachability distance.
*/
- public static final OptionID K_ID = new OptionID("lof.k", "The number of nearest neighbors of an object to be considered for computing its LOF_SCORE.");
-
- /**
- * Holds the value of {@link #K_ID}.
- */
- protected int k = 2;
+ protected int kreach = 2;
/**
* Neighborhood distance function.
*/
- protected DistanceFunction<? super O, D> neighborhoodDistanceFunction;
+ protected DistanceFunction<? super O, D> referenceDistanceFunction;
/**
* Reachability distance function.
@@ -160,42 +155,30 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
/**
* Constructor.
*
- * @param k the value of k
+ * @param krefer The number of neighbors for reference
+ * @param kreach The number of neighbors for reachability distance
* @param neighborhoodDistanceFunction the neighborhood distance function
* @param reachabilityDistanceFunction the reachability distance function
*/
- public LOF(int k, DistanceFunction<? super O, D> neighborhoodDistanceFunction, DistanceFunction<? super O, D> reachabilityDistanceFunction) {
+ public FlexibleLOF(int krefer, int kreach, DistanceFunction<? super O, D> neighborhoodDistanceFunction, DistanceFunction<? super O, D> reachabilityDistanceFunction) {
super();
- this.k = k + (objectIsInKNN ? 0 : 1);
- this.neighborhoodDistanceFunction = neighborhoodDistanceFunction;
+ this.krefer = krefer + (objectIsInKNN ? 0 : 1);
+ this.kreach = kreach + (objectIsInKNN ? 0 : 1);
+ this.referenceDistanceFunction = neighborhoodDistanceFunction;
this.reachabilityDistanceFunction = reachabilityDistanceFunction;
}
/**
- * Constructor.
- *
- * @param k the value of k
- * @param distanceFunction the distance function
- *
- * Uses the same distance function for neighborhood computation and
- * reachability distance (standard as in the original publication),
- * same as {@link #LOF(int, DistanceFunction, DistanceFunction)
- * LOF(int, distanceFunction, distanceFunction)}.
- */
- public LOF(int k, DistanceFunction<? super O, D> distanceFunction) {
- this(k, distanceFunction, distanceFunction);
- }
-
- /**
- * Performs the Generalized LOF_SCORE algorithm on the given database by
- * calling {@link #doRunInTime}.
+ * Performs the Generalized LOF algorithm on the given database by calling
+ * {@link #doRunInTime}.
*
+ * @param database Database to query
* @param relation Data to process
* @return LOF outlier result
*/
- public OutlierResult run(Relation<O> relation) {
+ public OutlierResult run(Database database, Relation<O> relation) {
StepProgress stepprog = LOG.isVerbose() ? new StepProgress("LOF", 3) : null;
- Pair<KNNQuery<O, D>, KNNQuery<O, D>> pair = getKNNQueries(relation, stepprog);
+ Pair<KNNQuery<O, D>, KNNQuery<O, D>> pair = getKNNQueries(database, relation, stepprog);
KNNQuery<O, D> kNNRefer = pair.getFirst();
KNNQuery<O, D> kNNReach = pair.getSecond();
return doRunInTime(relation.getDBIDs(), kNNRefer, kNNReach, stepprog).getResult();
@@ -208,40 +191,41 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
* @param stepprog the progress logger
* @return the kNN queries for the algorithm
*/
- private Pair<KNNQuery<O, D>, KNNQuery<O, D>> getKNNQueries(Relation<O> relation, StepProgress stepprog) {
+ private Pair<KNNQuery<O, D>, KNNQuery<O, D>> getKNNQueries(Database database, Relation<O> relation, StepProgress stepprog) {
// "HEAVY" flag for knnReach since it is used more than once
- KNNQuery<O, D> knnReach = QueryUtil.getKNNQuery(relation, reachabilityDistanceFunction, k, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
+ KNNQuery<O, D> knnReach = QueryUtil.getKNNQuery(relation, reachabilityDistanceFunction, kreach, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
// No optimized kNN query - use a preprocessor!
if (!(knnReach instanceof PreprocessorKNNQuery)) {
if (stepprog != null) {
- if (neighborhoodDistanceFunction.equals(reachabilityDistanceFunction)) {
+ if (referenceDistanceFunction.equals(reachabilityDistanceFunction)) {
stepprog.beginStep(1, "Materializing neighborhoods w.r.t. reference neighborhood distance function.", LOG);
} else {
stepprog.beginStep(1, "Not materializing neighborhoods w.r.t. reference neighborhood distance function, but materializing neighborhoods w.r.t. reachability distance function.", LOG);
}
}
- MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<O, D>(relation, reachabilityDistanceFunction, k);
- relation.getDatabase().addIndex(preproc);
- DistanceQuery<O, D> rdq = relation.getDatabase().getDistanceQuery(relation, reachabilityDistanceFunction);
- knnReach = preproc.getKNNQuery(rdq, k);
+ int kpreproc = (referenceDistanceFunction.equals(reachabilityDistanceFunction)) ? Math.max(kreach, krefer) : kreach;
+ MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<>(relation, reachabilityDistanceFunction, kpreproc);
+ database.addIndex(preproc);
+ DistanceQuery<O, D> rdq = database.getDistanceQuery(relation, reachabilityDistanceFunction);
+ knnReach = preproc.getKNNQuery(rdq, kreach);
}
// knnReach is only used once
KNNQuery<O, D> knnRefer;
- if (neighborhoodDistanceFunction == reachabilityDistanceFunction || neighborhoodDistanceFunction.equals(reachabilityDistanceFunction)) {
+ if (referenceDistanceFunction == reachabilityDistanceFunction || referenceDistanceFunction.equals(reachabilityDistanceFunction)) {
knnRefer = knnReach;
} else {
// do not materialize the first neighborhood, since it is used only once
- knnRefer = QueryUtil.getKNNQuery(relation, neighborhoodDistanceFunction, k);
+ knnRefer = QueryUtil.getKNNQuery(relation, referenceDistanceFunction, krefer);
}
- return new Pair<KNNQuery<O, D>, KNNQuery<O, D>>(knnRefer, knnReach);
+ return new Pair<>(knnRefer, knnReach);
}
/**
* Performs the Generalized LOF_SCORE algorithm on the given database and
- * returns a {@link LOF.LOFResult} encapsulating information that may be
- * needed by an OnlineLOF algorithm.
+ * returns a {@link FlexibleLOF.LOFResult} encapsulating information that may
+ * be needed by an OnlineLOF algorithm.
*
* @param ids Object ids
* @param kNNRefer the kNN query w.r.t. reference neighborhood distance
@@ -279,11 +263,11 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
}
// Build result representation.
- Relation<Double> scoreResult = new MaterializedRelation<Double>("Local Outlier Factor", "lof-outlier", TypeUtil.DOUBLE, lofs, ids);
+ Relation<Double> scoreResult = new MaterializedRelation<>("Local Outlier Factor", "lof-outlier", TypeUtil.DOUBLE, lofs, ids);
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
- return new LOFResult<O, D>(result, kNNRefer, kNNReach, lrds, lofs);
+ return new LOFResult<>(result, kNNRefer, kNNReach, lrds, lofs);
}
/**
@@ -298,14 +282,14 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
WritableDoubleDataStore lrds = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
FiniteProgress lrdsProgress = LOG.isVerbose() ? new FiniteProgress("LRD", ids.size(), LOG) : null;
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
- final KNNResult<D> neighbors = knnReach.getKNNForDBID(iter, k);
+ final KNNList<D> neighbors = knnReach.getKNNForDBID(iter, kreach);
double sum = 0.0;
int count = 0;
if (neighbors instanceof DoubleDistanceKNNList) {
// Fast version for double distances
- for (DoubleDistanceDBIDResultIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) {
+ for (DoubleDistanceDBIDListIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) {
if (objectIsInKNN || !DBIDUtil.equal(neighbor, iter)) {
- KNNResult<D> neighborsNeighbors = knnReach.getKNNForDBID(neighbor, k);
+ KNNList<D> neighborsNeighbors = knnReach.getKNNForDBID(neighbor, kreach);
final double nkdist;
if (neighborsNeighbors instanceof DoubleDistanceKNNList) {
nkdist = ((DoubleDistanceKNNList) neighborsNeighbors).doubleKNNDistance();
@@ -317,16 +301,16 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
}
}
} else {
- for (DistanceDBIDResultIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ for (DistanceDBIDListIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
if (objectIsInKNN || !DBIDUtil.equal(neighbor, iter)) {
- KNNResult<D> neighborsNeighbors = knnReach.getKNNForDBID(neighbor, k);
+ KNNList<D> neighborsNeighbors = knnReach.getKNNForDBID(neighbor, kreach);
sum += Math.max(neighbor.getDistance().doubleValue(), neighborsNeighbors.getKNNDistance().doubleValue());
count++;
}
}
}
// Avoid division by 0
- final double lrd = (sum > 0) ? (count / sum) : 0;
+ final double lrd = (sum > 0) ? (count / sum) : Double.POSITIVE_INFINITY;
lrds.putDouble(iter, lrd);
if (lrdsProgress != null) {
lrdsProgress.incrementProcessed(LOG);
@@ -356,8 +340,8 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
final double lrdp = lrds.doubleValue(iter);
final double lof;
- if (lrdp > 0) {
- final KNNResult<D> neighbors = knnRefer.getKNNForDBID(iter, k);
+ if (lrdp > 0 && !Double.isInfinite(lrdp)) {
+ final KNNList<D> neighbors = knnRefer.getKNNForDBID(iter, krefer);
double sum = 0.0;
int count = 0;
for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
@@ -373,7 +357,9 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
}
lofs.putDouble(iter, lof);
// update minimum and maximum
- lofminmax.put(lof);
+ if (!Double.isInfinite(lof)) {
+ lofminmax.put(lof);
+ }
if (progressLOFs != null) {
progressLOFs.incrementProcessed(LOG);
@@ -382,16 +368,16 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
if (progressLOFs != null) {
progressLOFs.ensureCompleted(LOG);
}
- return new Pair<WritableDoubleDataStore, DoubleMinMax>(lofs, lofminmax);
+ return new Pair<>(lofs, lofminmax);
}
@Override
public TypeInformation[] getInputTypeRestriction() {
final TypeInformation type;
- if (reachabilityDistanceFunction.equals(neighborhoodDistanceFunction)) {
+ if (reachabilityDistanceFunction.equals(referenceDistanceFunction)) {
type = reachabilityDistanceFunction.getInputTypeRestriction();
} else {
- type = new CombinedTypeInformation(neighborhoodDistanceFunction.getInputTypeRestriction(), reachabilityDistanceFunction.getInputTypeRestriction());
+ type = new CombinedTypeInformation(referenceDistanceFunction.getInputTypeRestriction(), reachabilityDistanceFunction.getInputTypeRestriction());
}
return TypeUtil.array(type);
}
@@ -403,11 +389,13 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
/**
* Encapsulates information like the neighborhood, the LRD and LOF values of
- * the objects during a run of the {@link LOF} algorithm.
+ * the objects during a run of the {@link FlexibleLOF} algorithm.
+ *
+ * @author Elke Achtert
*/
public static class LOFResult<O, D extends NumberDistance<D, ?>> {
/**
- * The result of the run of the {@link LOF} algorithm.
+ * The result of the run of the {@link FlexibleLOF} algorithm.
*/
private OutlierResult result;
@@ -442,10 +430,10 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
private final WritableDoubleDataStore lofs;
/**
- * Encapsulates information generated during a run of the {@link LOF}
- * algorithm.
+ * Encapsulates information generated during a run of the
+ * {@link FlexibleLOF} algorithm.
*
- * @param result the result of the run of the {@link LOF} algorithm
+ * @param result the result of the run of the {@link FlexibleLOF} algorithm
* @param kNNRefer the kNN query w.r.t. the reference neighborhood distance
* @param kNNReach the kNN query w.r.t. the reachability distance
* @param lrds the LRD values of the objects
@@ -498,7 +486,7 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
/**
* Get the outlier result.
*
- * @return the result of the run of the {@link LOF} algorithm
+ * @return the result of the run of the {@link FlexibleLOF} algorithm
*/
public OutlierResult getResult() {
return result;
@@ -550,9 +538,33 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
*/
public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
/**
- * The neighborhood size to use.
+ * The distance function to determine the reachability distance between
+ * database objects.
+ */
+ public static final OptionID REACHABILITY_DISTANCE_FUNCTION_ID = new OptionID("lof.reachdistfunction", "Distance function to determine the reachability distance between database objects.");
+
+ /**
+ * Parameter to specify the number of nearest neighbors of an object to be
+ * considered for computing its LOF_SCORE, must be an integer greater than
+ * 1.
+ */
+ public static final OptionID KREF_ID = new OptionID("lof.krefer", "The number of nearest neighbors of an object to be considered for computing its LOF_SCORE.");
+
+ /**
+ * Parameter to specify the number of nearest neighbors of an object to be
+ * considered for computing its reachability distance.
+ */
+ public static final OptionID KREACH_ID = new OptionID("lof.kreach", "The number of nearest neighbors of an object to be considered for computing its LOF_SCORE.");
+
+ /**
+ * The reference set size to use.
*/
- protected int k = 2;
+ protected int krefer = 2;
+
+ /**
+ * The set size to use for reachability distance.
+ */
+ protected int kreach = 2;
/**
* Neighborhood distance function.
@@ -568,23 +580,33 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final IntParameter pK = new IntParameter(K_ID);
+ final IntParameter pK = new IntParameter(KREF_ID);
pK.addConstraint(new GreaterConstraint(1));
if (config.grab(pK)) {
- k = pK.getValue();
+ krefer = pK.intValue();
}
- final ObjectParameter<DistanceFunction<O, D>> reachDistP = new ObjectParameter<DistanceFunction<O, D>>(REACHABILITY_DISTANCE_FUNCTION_ID, DistanceFunction.class, true);
+ final IntParameter pK2 = new IntParameter(KREACH_ID);
+ pK2.setOptional(true);
+ pK2.addConstraint(new GreaterConstraint(1));
+ if (config.grab(pK2)) {
+ kreach = pK2.intValue();
+ } else {
+ kreach = krefer;
+ }
+
+ final ObjectParameter<DistanceFunction<O, D>> reachDistP = new ObjectParameter<>(REACHABILITY_DISTANCE_FUNCTION_ID, DistanceFunction.class);
+ reachDistP.setOptional(true);
if (config.grab(reachDistP)) {
reachabilityDistanceFunction = reachDistP.instantiateClass(config);
+ } else {
+ reachabilityDistanceFunction = distanceFunction;
}
}
@Override
- protected LOF<O, D> makeInstance() {
- // Default is to re-use the same distance
- DistanceFunction<O, D> rdist = (reachabilityDistanceFunction != null) ? reachabilityDistanceFunction : distanceFunction;
- return new LOF<O, D>(k, distanceFunction, rdist);
+ protected FlexibleLOF<O, D> makeInstance() {
+ return new FlexibleLOF<>(kreach, krefer, distanceFunction, reachabilityDistanceFunction);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/INFLO.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/INFLO.java
index 655a0910..ae297a3c 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/INFLO.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/INFLO.java
@@ -1,4 +1,4 @@
-package de.lmu.ifi.dbs.elki.algorithm.outlier;
+package de.lmu.ifi.dbs.elki.algorithm.outlier.lof;
/*
This file is part of ELKI:
@@ -24,6 +24,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
*/
import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
@@ -34,13 +35,13 @@ import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
@@ -154,7 +155,7 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa
int count = rnns.get(id).size();
if (!processedIDs.contains(id)) {
// TODO: use exactly k neighbors?
- KNNResult<D> list = knnQuery.getKNNForDBID(id, k);
+ KNNList<D> list = knnQuery.getKNNForDBID(id, k);
knns.get(id).addDBIDs(list);
processedIDs.add(id);
density.putDouble(id, 1 / list.getKNNDistance().doubleValue());
@@ -164,7 +165,7 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa
for (DBIDIter q = knns.get(id).iter(); q.valid(); q.advance()) {
if (!processedIDs.contains(q)) {
// TODO: use exactly k neighbors?
- KNNResult<D> listQ = knnQuery.getKNNForDBID(q, k);
+ KNNList<D> listQ = knnQuery.getKNNForDBID(q, k);
knns.get(q).addDBIDs(listQ);
density.putDouble(q, 1 / listQ.getKNNDistance().doubleValue());
processedIDs.add(q);
@@ -209,7 +210,7 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa
}
// Build result representation.
- Relation<Double> scoreResult = new MaterializedRelation<Double>("Influence Outlier Score", "inflo-outlier", TypeUtil.DOUBLE, inflos, relation.getDBIDs());
+ Relation<Double> scoreResult = new MaterializedRelation<>("Influence Outlier Score", "inflo-outlier", TypeUtil.DOUBLE, inflos, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(inflominmax.getMin(), inflominmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
return new OutlierResult(scoreMeta, scoreResult);
}
@@ -254,7 +255,7 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa
@Override
protected INFLO<O, D> makeInstance() {
- return new INFLO<O, D>(distanceFunction, m, k);
+ return new INFLO<>(distanceFunction, m, k);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LDF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDF.java
index 4ce0313e..4a86e93d 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LDF.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDF.java
@@ -1,10 +1,10 @@
-package de.lmu.ifi.dbs.elki.algorithm.outlier;
+package de.lmu.ifi.dbs.elki.algorithm.outlier.lof;
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -24,10 +24,12 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
*/
import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.CombinedTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.QueryUtil;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
@@ -35,6 +37,10 @@ import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList;
+import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
@@ -43,18 +49,14 @@ import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceDBIDResultIter;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceKNNList;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.logging.progress.StepProgress;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
-import de.lmu.ifi.dbs.elki.math.statistics.GaussianKernelDensityFunction;
-import de.lmu.ifi.dbs.elki.math.statistics.KernelDensityFunction;
+import de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions.GaussianKernelDensityFunction;
+import de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions.KernelDensityFunction;
import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
@@ -133,10 +135,11 @@ public class LDF<O extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
/**
* Run the naive kernel density LOF algorithm.
*
+ * @param database Database to query
* @param relation Data to process
* @return LOF outlier result
*/
- public OutlierResult run(Relation<O> relation) {
+ public OutlierResult run(Database database, Relation<O> relation) {
StepProgress stepprog = LOG.isVerbose() ? new StepProgress("LDF", 3) : null;
final int dim = RelationUtil.dimensionality(relation);
@@ -150,43 +153,54 @@ public class LDF<O extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
if (stepprog != null) {
stepprog.beginStep(1, "Materializing neighborhoods w.r.t. distance function.", LOG);
}
- MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<O, D>(relation, getDistanceFunction(), k);
- relation.getDatabase().addIndex(preproc);
- DistanceQuery<O, D> rdq = relation.getDatabase().getDistanceQuery(relation, getDistanceFunction());
+ MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<>(relation, getDistanceFunction(), k);
+ database.addIndex(preproc);
+ DistanceQuery<O, D> rdq = database.getDistanceQuery(relation, getDistanceFunction());
knnq = preproc.getKNNQuery(rdq, k);
}
- // Compute LRDs
+ // Compute LDEs
if (stepprog != null) {
stepprog.beginStep(2, "Computing LDEs.", LOG);
}
WritableDoubleDataStore ldes = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
FiniteProgress densProgress = LOG.isVerbose() ? new FiniteProgress("Densities", ids.size(), LOG) : null;
for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
- final KNNResult<D> neighbors = knnq.getKNNForDBID(it, k);
+ final KNNList<D> neighbors = knnq.getKNNForDBID(it, k);
double sum = 0.0;
int count = 0;
if (neighbors instanceof DoubleDistanceKNNList) {
// Fast version for double distances
- for (DoubleDistanceDBIDResultIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) {
+ for (DoubleDistanceDBIDListIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) {
if (DBIDUtil.equal(neighbor, it)) {
continue;
}
- double nkdist = ((DoubleDistanceKNNList) knnq.getKNNForDBID(neighbor, k)).doubleKNNDistance();
-
- final double v = Math.max(nkdist, neighbor.doubleDistance()) / (h * nkdist);
- sum += kernel.density(v) / Math.pow(h * nkdist, dim);
- count++;
+ final double nkdist = ((DoubleDistanceKNNList) knnq.getKNNForDBID(neighbor, k)).doubleKNNDistance();
+ if (nkdist > 0.) {
+ final double v = Math.max(nkdist, neighbor.doubleDistance()) / (h * nkdist);
+ sum += kernel.density(v) / Math.pow(h * nkdist, dim);
+ count++;
+ } else {
+ sum = Double.POSITIVE_INFINITY;
+ count++;
+ break;
+ }
}
} else {
- for (DistanceDBIDResultIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ for (DistanceDBIDListIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
if (DBIDUtil.equal(neighbor, it)) {
continue;
}
- double nkdist = knnq.getKNNForDBID(neighbor, k).getKNNDistance().doubleValue();
- final double v = Math.max(nkdist, neighbor.getDistance().doubleValue()) / (h * nkdist);
- sum += kernel.density(v) / Math.pow(h * nkdist, dim);
- count++;
+ final double nkdist = knnq.getKNNForDBID(neighbor, k).getKNNDistance().doubleValue();
+ if (nkdist > 0.) {
+ final double v = Math.max(nkdist, neighbor.getDistance().doubleValue()) / (h * nkdist);
+ sum += kernel.density(v) / Math.pow(h * nkdist, dim);
+ count++;
+ } else {
+ sum = Double.POSITIVE_INFINITY;
+ count++;
+ break;
+ }
}
}
ldes.putDouble(it, sum / count);
@@ -209,7 +223,7 @@ public class LDF<O extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("Local Density Factors", ids.size(), LOG) : null;
for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
final double lrdp = ldes.doubleValue(it);
- final KNNResult<D> neighbors = knnq.getKNNForDBID(it, k);
+ final KNNList<D> neighbors = knnq.getKNNForDBID(it, k);
double sum = 0.0;
int count = 0;
for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
@@ -240,7 +254,7 @@ public class LDF<O extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
}
// Build result representation.
- Relation<Double> scoreResult = new MaterializedRelation<Double>("Local Density Factor", "ldf-outlier", TypeUtil.DOUBLE, ldfs, ids);
+ Relation<Double> scoreResult = new MaterializedRelation<>("Local Density Factor", "ldf-outlier", TypeUtil.DOUBLE, ldfs, ids);
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, 1. / c, 1 / (1 + c));
OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
@@ -318,7 +332,7 @@ public class LDF<O extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
k = pK.getValue();
}
- ObjectParameter<KernelDensityFunction> kernelP = new ObjectParameter<KernelDensityFunction>(KERNEL_ID, KernelDensityFunction.class, GaussianKernelDensityFunction.class);
+ ObjectParameter<KernelDensityFunction> kernelP = new ObjectParameter<>(KERNEL_ID, KernelDensityFunction.class, GaussianKernelDensityFunction.class);
if (config.grab(kernelP)) {
kernel = kernelP.instantiateClass(config);
}
@@ -336,7 +350,7 @@ public class LDF<O extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
@Override
protected LDF<O, D> makeInstance() {
- return new LDF<O, D>(k, distanceFunction, kernel, h, c);
+ return new LDF<>(k, distanceFunction, kernel, h, c);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LDOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDOF.java
index fbbfe484..80ed3f68 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LDOF.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDOF.java
@@ -1,4 +1,4 @@
-package de.lmu.ifi.dbs.elki.algorithm.outlier;
+package de.lmu.ifi.dbs.elki.algorithm.outlier.lof;
/*
This file is part of ELKI:
@@ -24,6 +24,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
*/
import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
@@ -32,13 +33,13 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
@@ -47,6 +48,7 @@ import de.lmu.ifi.dbs.elki.math.Mean;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.Alias;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
@@ -78,6 +80,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@Title("LDOF: Local Distance-Based Outlier Factor")
@Description("Local outlier detection appraoch suitable for scattered data by averaging the kNN distance over all k nearest neighbors")
@Reference(authors = "K. Zhang, M. Hutter, H. Jin", title = "A New Local Distance-Based Outlier Detection Approach for Scattered Real-World Data", booktitle = "Proc. 13th Pacific-Asia Conference on Advances in Knowledge Discovery and Data Mining (PAKDD 2009), Bangkok, Thailand, 2009", url = "http://dx.doi.org/10.1007/978-3-642-01307-2_84")
+@Alias({"de.lmu.ifi.dbs.elki.algorithm.outlier.LDOF"})
public class LDOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
@@ -136,14 +139,14 @@ public class LDOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
Mean dxp = new Mean(), Dxp = new Mean();
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- KNNResult<D> neighbors = knnQuery.getKNNForDBID(iditer, k);
+ KNNList<D> neighbors = knnQuery.getKNNForDBID(iditer, k);
// skip the point itself
dxp.reset(); Dxp.reset();
// TODO: optimize for double distances
- for (DistanceDBIDResultIter<D> neighbor1 = neighbors.iter(); neighbor1.valid(); neighbor1.advance()) {
+ for (DistanceDBIDListIter<D> neighbor1 = neighbors.iter(); neighbor1.valid(); neighbor1.advance()) {
if(!DBIDUtil.equal(neighbor1, iditer)) {
dxp.put(neighbor1.getDistance().doubleValue());
- for (DistanceDBIDResultIter<D> neighbor2 = neighbors.iter(); neighbor2.valid(); neighbor2.advance()) {
+ for (DistanceDBIDListIter<D> neighbor2 = neighbors.iter(); neighbor2.valid(); neighbor2.advance()) {
if(!DBIDUtil.equal(neighbor1, neighbor2) && !DBIDUtil.equal(neighbor2, iditer)) {
Dxp.put(distFunc.distance(neighbor1, neighbor2).doubleValue());
}
@@ -167,7 +170,7 @@ public class LDOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
}
// Build result representation.
- Relation<Double> scoreResult = new MaterializedRelation<Double>("LDOF Outlier Score", "ldof-outlier", TypeUtil.DOUBLE, ldofs, relation.getDBIDs());
+ Relation<Double> scoreResult = new MaterializedRelation<>("LDOF Outlier Score", "ldof-outlier", TypeUtil.DOUBLE, ldofs, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(ldofminmax.getMin(), ldofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, LDOF_BASELINE);
return new OutlierResult(scoreMeta, scoreResult);
}
@@ -204,7 +207,7 @@ public class LDOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
@Override
protected LDOF<O, D> makeInstance() {
- return new LDOF<O, D>(distanceFunction, k);
+ return new LDOF<>(distanceFunction, k);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOCI.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOCI.java
index ba9ad20e..e76c6034 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOCI.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOCI.java
@@ -1,10 +1,10 @@
-package de.lmu.ifi.dbs.elki.algorithm.outlier;
+package de.lmu.ifi.dbs.elki.algorithm.outlier.lof;
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -28,6 +28,7 @@ import java.util.Collections;
import java.util.List;
import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
@@ -36,14 +37,14 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
-import de.lmu.ifi.dbs.elki.database.ids.DistanceDBIDPair;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDList;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDPair;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
@@ -52,6 +53,7 @@ import de.lmu.ifi.dbs.elki.math.MeanVariance;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.Alias;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
@@ -83,6 +85,7 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleIntPair;
@Title("LOCI: Fast Outlier Detection Using the Local Correlation Integral")
@Description("Algorithm to compute outliers based on the Local Correlation Integral")
@Reference(authors = "S. Papadimitriou, H. Kitagawa, P. B. Gibbons, C. Faloutsos", title = "LOCI: Fast Outlier Detection Using the Local Correlation Integral", booktitle = "Proc. 19th IEEE Int. Conf. on Data Engineering (ICDE '03), Bangalore, India, 2003", url = "http://dx.doi.org/10.1109/ICDE.2003.1260802")
+@Alias({"de.lmu.ifi.dbs.elki.algorithm.outlier.LOCI"})
public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
@@ -150,9 +153,9 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
// LOCI preprocessing step
WritableDataStore<ArrayList<DoubleIntPair>> interestingDistances = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_SORTED, ArrayList.class);
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- DistanceDBIDResult<D> neighbors = rangeQuery.getRangeForDBID(iditer, rmax);
+ DistanceDBIDList<D> neighbors = rangeQuery.getRangeForDBID(iditer, rmax);
// build list of critical distances
- ArrayList<DoubleIntPair> cdist = new ArrayList<DoubleIntPair>(neighbors.size() << 1);
+ ArrayList<DoubleIntPair> cdist = new ArrayList<>(neighbors.size() << 1);
{
for(int i = 0; i < neighbors.size(); i++) {
DistanceDBIDPair<D> r = neighbors.get(i);
@@ -203,7 +206,7 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
if(maxneig >= nmin) {
D range = distFunc.getDistanceFactory().fromDouble(maxdist);
// Compute the largest neighborhood we will need.
- DistanceDBIDResult<D> maxneighbors = rangeQuery.getRangeForDBID(iditer, range);
+ DistanceDBIDList<D> maxneighbors = rangeQuery.getRangeForDBID(iditer, range);
// TODO: Ensure the set is sorted. Should be a no-op with most indexes.
// For any critical distance, compute the normalized MDEF score.
for(DoubleIntPair c : cdist) {
@@ -218,7 +221,7 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
// compute \hat{n}(p_i, r, \alpha) and the corresponding \simga_{MDEF}
MeanVariance mv_n_r_alpha = new MeanVariance();
// TODO: optimize for double distances
- for (DistanceDBIDResultIter<D> neighbor = maxneighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ for (DistanceDBIDListIter<D> neighbor = maxneighbors.iter(); neighbor.valid(); neighbor.advance()) {
// Stop at radius r
if(neighbor.getDistance().doubleValue() > r) {
break;
@@ -256,10 +259,10 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
if(progressLOCI != null) {
progressLOCI.ensureCompleted(LOG);
}
- Relation<Double> scoreResult = new MaterializedRelation<Double>("LOCI normalized MDEF", "loci-mdef-outlier", TypeUtil.DOUBLE, mdef_norm, relation.getDBIDs());
+ Relation<Double> scoreResult = new MaterializedRelation<>("LOCI normalized MDEF", "loci-mdef-outlier", TypeUtil.DOUBLE, mdef_norm, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0.0);
OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
- result.addChildResult(new MaterializedRelation<Double>("LOCI MDEF Radius", "loci-critical-radius", TypeUtil.DOUBLE, mdef_radius, relation.getDBIDs()));
+ result.addChildResult(new MaterializedRelation<>("LOCI MDEF Radius", "loci-critical-radius", TypeUtil.DOUBLE, mdef_radius, relation.getDBIDs()));
return result;
}
@@ -313,7 +316,7 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
final D distanceFactory = (distanceFunction != null) ? distanceFunction.getDistanceFactory() : null;
- final DistanceParameter<D> rmaxP = new DistanceParameter<D>(RMAX_ID, distanceFactory);
+ final DistanceParameter<D> rmaxP = new DistanceParameter<>(RMAX_ID, distanceFactory);
if(config.grab(rmaxP)) {
rmax = rmaxP.getValue();
}
@@ -331,7 +334,7 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
@Override
protected LOCI<O, D> makeInstance() {
- return new LOCI<O, D>(distanceFunction, rmax, nmin, alpha);
+ return new LOCI<>(distanceFunction, rmax, nmin, alpha);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOF.java
new file mode 100644
index 00000000..302dafe6
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOF.java
@@ -0,0 +1,293 @@
+package de.lmu.ifi.dbs.elki.algorithm.outlier.lof;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList;
+import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
+import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
+import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
+import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.logging.progress.StepProgress;
+import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.Alias;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+
+/**
+ * <p>
+ * Algorithm to compute density-based local outlier factors in a database based
+ * on a specified parameter {@link Parameterizer#K_ID} ({@code -lof.k}).
+ * </p>
+ *
+ * <p>
+ * The original LOF parameter was called &quot;minPts&quot;, but for consistency
+ * within ELKI we have renamed this parameter to &quot;k&quot;.
+ * </p>
+ *
+ * <p>
+ * Reference: <br>
+ * M. M. Breunig, H.-P. Kriegel, R. Ng, J. Sander: LOF: Identifying
+ * Density-Based Local Outliers. <br>
+ * In: Proc. 2nd ACM SIGMOD Int. Conf. on Management of Data (SIGMOD'00),
+ * Dallas, TX, 2000.
+ * </p>
+ *
+ * @author Erich Schubert
+ * @author Elke Achtert
+ *
+ * @apiviz.has KNNQuery
+ *
+ * @param <O> the type of DatabaseObjects handled by this Algorithm
+ * @param <D> Distance type
+ */
+@Title("LOF: Local Outlier Factor")
+@Description("Algorithm to compute density-based local outlier factors in a database based on the neighborhood size parameter 'k'")
+@Reference(authors = "M. M. Breunig, H.-P. Kriegel, R. Ng, and J. Sander", title = "LOF: Identifying Density-Based Local Outliers", booktitle = "Proc. 2nd ACM SIGMOD Int. Conf. on Management of Data (SIGMOD '00), Dallas, TX, 2000", url = "http://dx.doi.org/10.1145/342009.335388")
+@Alias({ "de.lmu.ifi.dbs.elki.algorithm.outlier.LOF", "outlier.LOF", "LOF" })
+public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging LOG = Logging.getLogger(LOF.class);
+
+ /**
+ * Holds the value of {@link Parameterizer#K_ID}.
+ */
+ protected int k = 2;
+
+ /**
+ * Constructor.
+ *
+ * @param k the value of k
+ * @param distanceFunction the neighborhood distance function
+ */
+ public LOF(int k, DistanceFunction<? super O, D> distanceFunction) {
+ super(distanceFunction);
+ this.k = k + 1;
+ }
+
+ /**
+ * Performs the Generalized LOF_SCORE algorithm on the given database.
+ *
+ * @param database Database to query
+ * @param relation Data to process
+ * @return LOF outlier result
+ */
+ public OutlierResult run(Database database, Relation<O> relation) {
+ StepProgress stepprog = LOG.isVerbose() ? new StepProgress("LOF", 3) : null;
+ DistanceQuery<O, D> dq = database.getDistanceQuery(relation, getDistanceFunction());
+ // "HEAVY" flag for knn query since it is used more than once
+ KNNQuery<O, D> knnq = database.getKNNQuery(dq, k, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
+ // No optimized kNN query - use a preprocessor!
+ if (!(knnq instanceof PreprocessorKNNQuery)) {
+ if (stepprog != null) {
+ stepprog.beginStep(1, "Materializing LOF neighborhoods.", LOG);
+ }
+ MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<>(relation, getDistanceFunction(), k);
+ knnq = preproc.getKNNQuery(dq, k);
+ }
+ DBIDs ids = relation.getDBIDs();
+
+ // Compute LRDs
+ if (stepprog != null) {
+ stepprog.beginStep(2, "Computing LRDs.", LOG);
+ }
+ WritableDoubleDataStore lrds = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
+ {
+ FiniteProgress lrdsProgress = LOG.isVerbose() ? new FiniteProgress("LRD", ids.size(), LOG) : null;
+ for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ final KNNList<D> neighbors = knnq.getKNNForDBID(iter, k);
+ double sum = 0.0;
+ int count = 0;
+ if (neighbors instanceof DoubleDistanceKNNList) {
+ // Fast version for double distances
+ for (DoubleDistanceDBIDListIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) {
+ if (DBIDUtil.equal(neighbor, iter)) {
+ continue;
+ }
+ KNNList<D> neighborsNeighbors = knnq.getKNNForDBID(neighbor, k);
+ final double nkdist;
+ if (neighborsNeighbors instanceof DoubleDistanceKNNList) {
+ nkdist = ((DoubleDistanceKNNList) neighborsNeighbors).doubleKNNDistance();
+ } else {
+ nkdist = neighborsNeighbors.getKNNDistance().doubleValue();
+ }
+ sum += Math.max(neighbor.doubleDistance(), nkdist);
+ count++;
+ }
+ } else {
+ for (DistanceDBIDListIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ if (DBIDUtil.equal(neighbor, iter)) {
+ continue;
+ }
+ KNNList<D> neighborsNeighbors = knnq.getKNNForDBID(neighbor, k);
+ sum += Math.max(neighbor.getDistance().doubleValue(), neighborsNeighbors.getKNNDistance().doubleValue());
+ count++;
+ }
+ }
+ // Avoid division by 0
+ final double lrd = (sum > 0) ? (count / sum) : Double.POSITIVE_INFINITY;
+ lrds.putDouble(iter, lrd);
+ if (lrdsProgress != null) {
+ lrdsProgress.incrementProcessed(LOG);
+ }
+ }
+ if (lrdsProgress != null) {
+ lrdsProgress.ensureCompleted(LOG);
+ }
+ }
+
+ // compute LOF_SCORE of each db object
+ if (stepprog != null) {
+ stepprog.beginStep(3, "Computing LOFs.", LOG);
+ }
+ WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
+ // track the maximum value for normalization.
+ DoubleMinMax lofminmax = new DoubleMinMax();
+ {
+ FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("LOF_SCORE for objects", ids.size(), LOG) : null;
+ for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ final double lof;
+ final double lrdp = lrds.doubleValue(iter);
+ final KNNList<D> neighbors = knnq.getKNNForDBID(iter, k);
+ if (!Double.isInfinite(lrdp)) {
+ double sum = 0.0;
+ int count = 0;
+ for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ // skip the point itself
+ if (DBIDUtil.equal(neighbor, iter)) {
+ continue;
+ }
+ final double val = lrds.doubleValue(neighbor);
+ sum += val;
+ count++;
+ if (Double.isInfinite(val)) {
+ break;
+ }
+ }
+ lof = sum / (lrdp * count);
+ } else {
+ lof = 1.0;
+ }
+ lofs.putDouble(iter, lof);
+ // update minimum and maximum
+ lofminmax.put(lof);
+
+ if (progressLOFs != null) {
+ progressLOFs.incrementProcessed(LOG);
+ }
+ }
+ if (progressLOFs != null) {
+ progressLOFs.ensureCompleted(LOG);
+ }
+ }
+
+ if (stepprog != null) {
+ stepprog.setCompleted(LOG);
+ }
+
+ // Build result representation.
+ Relation<Double> scoreResult = new MaterializedRelation<>("Local Outlier Factor", "lof-outlier", TypeUtil.DOUBLE, lofs, ids);
+ OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
+ OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
+
+ return result;
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(getDistanceFunction().getInputTypeRestriction());
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ /**
+ * Parameter to specify the number of nearest neighbors of an object to be
+ * considered for computing its LOF_SCORE, must be an integer greater than
+ * 1.
+ */
+ public static final OptionID K_ID = new OptionID("lof.k", "The number of nearest neighbors of an object to be considered for computing its LOF_SCORE.");
+
+ /**
+ * The neighborhood size to use.
+ */
+ protected int k = 2;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ final IntParameter pK = new IntParameter(K_ID);
+ pK.addConstraint(new GreaterConstraint(1));
+ if (config.grab(pK)) {
+ k = pK.getValue();
+ }
+ }
+
+ @Override
+ protected LOF<O, D> makeInstance() {
+ return new LOF<>(k, distanceFunction);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LoOP.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LoOP.java
index 5da06983..15ff690a 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LoOP.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LoOP.java
@@ -1,10 +1,10 @@
-package de.lmu.ifi.dbs.elki.algorithm.outlier;
+package de.lmu.ifi.dbs.elki.algorithm.outlier.lof;
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -24,6 +24,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
*/
import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
import de.lmu.ifi.dbs.elki.data.type.CombinedTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
@@ -34,17 +35,17 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList;
+import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceDBIDResultIter;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceKNNList;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor;
import de.lmu.ifi.dbs.elki.logging.Logging;
@@ -56,6 +57,7 @@ import de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore;
+import de.lmu.ifi.dbs.elki.utilities.Alias;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
@@ -85,6 +87,7 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
@Title("LoOP: Local Outlier Probabilities")
@Description("Variant of the LOF algorithm normalized using statistical values.")
@Reference(authors = "H.-P. Kriegel, P. Kröger, E. Schubert, A. Zimek", title = "LoOP: Local Outlier Probabilities", booktitle = "Proceedings of the 18th International Conference on Information and Knowledge Management (CIKM), Hong Kong, China, 2009", url = "http://dx.doi.org/10.1145/1645953.1646195")
+@Alias({ "de.lmu.ifi.dbs.elki.algorithm.outlier.LoOP", "LoOP", "outlier.LoOP" })
public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
@@ -188,7 +191,7 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O
if (stepprog != null) {
stepprog.beginStep(1, "Materializing neighborhoods with respect to reference neighborhood distance function.", LOG);
}
- MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<O, D>(relation, comparisonDistanceFunction, kcomp);
+ MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<>(relation, comparisonDistanceFunction, kcomp);
database.addIndex(preproc);
DistanceQuery<O, D> cdq = database.getDistanceQuery(relation, comparisonDistanceFunction);
knnComp = preproc.getKNNQuery(cdq, kreach, DatabaseQuery.HINT_HEAVY_USE);
@@ -205,7 +208,7 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O
knnComp = QueryUtil.getKNNQuery(relation, comparisonDistanceFunction, kreach);
knnReach = QueryUtil.getKNNQuery(relation, reachabilityDistanceFunction, kcomp);
}
- return new Pair<KNNQuery<O, D>, KNNQuery<O, D>>(knnComp, knnReach);
+ return new Pair<>(knnComp, knnReach);
}
/**
@@ -241,13 +244,13 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O
}
FiniteProgress prdsProgress = LOG.isVerbose() ? new FiniteProgress("pdists", relation.size(), LOG) : null;
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- final KNNResult<D> neighbors = knnReach.getKNNForDBID(iditer, kreach);
+ final KNNList<D> neighbors = knnReach.getKNNForDBID(iditer, kreach);
mean.reset();
// use first kref neighbors as reference set
int ks = 0;
// TODO: optimize for double distances
if (neighbors instanceof DoubleDistanceKNNList) {
- for (DoubleDistanceDBIDResultIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) {
+ for (DoubleDistanceDBIDListIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) {
if (objectIsInKNN || !DBIDUtil.equal(neighbor, iditer)) {
final double d = neighbor.doubleDistance();
mean.put(d * d);
@@ -258,7 +261,7 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O
}
}
} else {
- for (DistanceDBIDResultIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ for (DistanceDBIDListIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
if (objectIsInKNN || !DBIDUtil.equal(neighbor, iditer)) {
double d = neighbor.getDistance().doubleValue();
mean.put(d * d);
@@ -287,7 +290,7 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O
FiniteProgress progressPLOFs = LOG.isVerbose() ? new FiniteProgress("PLOFs for objects", relation.size(), LOG) : null;
MeanVariance mv = new MeanVariance();
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- final KNNResult<D> neighbors = knnComp.getKNNForDBID(iditer, kcomp);
+ final KNNList<D> neighbors = knnComp.getKNNForDBID(iditer, kcomp);
mv.reset();
// use first kref neighbors as comparison set.
int ks = 0;
@@ -340,7 +343,7 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O
}
// Build result representation.
- Relation<Double> scoreResult = new MaterializedRelation<Double>("Local Outlier Probabilities", "loop-outlier", TypeUtil.DOUBLE, loops, relation.getDBIDs());
+ Relation<Double> scoreResult = new MaterializedRelation<>("Local Outlier Probabilities", "loop-outlier", TypeUtil.DOUBLE, loops, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new ProbabilisticOutlierScore();
return new OutlierResult(scoreMeta, scoreResult);
}
@@ -403,7 +406,7 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O
kcomp = kcompP.intValue();
}
- final ObjectParameter<DistanceFunction<O, D>> compDistP = new ObjectParameter<DistanceFunction<O, D>>(COMPARISON_DISTANCE_FUNCTION_ID, DistanceFunction.class, EuclideanDistanceFunction.class);
+ final ObjectParameter<DistanceFunction<O, D>> compDistP = new ObjectParameter<>(COMPARISON_DISTANCE_FUNCTION_ID, DistanceFunction.class, EuclideanDistanceFunction.class);
if (config.grab(compDistP)) {
comparisonDistanceFunction = compDistP.instantiateClass(config);
}
@@ -417,7 +420,7 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O
kreach = kcomp;
}
- final ObjectParameter<DistanceFunction<O, D>> reachDistP = new ObjectParameter<DistanceFunction<O, D>>(REACHABILITY_DISTANCE_FUNCTION_ID, DistanceFunction.class, true);
+ final ObjectParameter<DistanceFunction<O, D>> reachDistP = new ObjectParameter<>(REACHABILITY_DISTANCE_FUNCTION_ID, DistanceFunction.class, true);
if (config.grab(reachDistP)) {
reachabilityDistanceFunction = reachDistP.instantiateClass(config);
}
@@ -433,7 +436,7 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O
@Override
protected LoOP<O, D> makeInstance() {
DistanceFunction<O, D> realreach = (reachabilityDistanceFunction != null) ? reachabilityDistanceFunction : comparisonDistanceFunction;
- return new LoOP<O, D>(kreach, kcomp, realreach, comparisonDistanceFunction, lambda);
+ return new LoOP<>(kreach, kcomp, realreach, comparisonDistanceFunction, lambda);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OnlineLOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/OnlineLOF.java
index bac5db36..c01c914f 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OnlineLOF.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/OnlineLOF.java
@@ -1,30 +1,31 @@
-package de.lmu.ifi.dbs.elki.algorithm.outlier;
-/*
-This file is part of ELKI:
-Environment for Developing KDD-Applications Supported by Index-Structures
-
-Copyright (C) 2012
-Ludwig-Maximilians-Universität München
-Lehr- und Forschungseinheit für Datenbanksysteme
-ELKI Development Team
-
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU Affero General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU Affero General Public License for more details.
-
-You should have received a copy of the GNU Affero General Public License
-along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
+package de.lmu.ifi.dbs.elki.algorithm.outlier.lof;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
import java.util.List;
-import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.QueryUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
@@ -33,6 +34,8 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDList;
+import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
@@ -40,8 +43,6 @@ import de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery;
import de.lmu.ifi.dbs.elki.database.query.rknn.RKNNQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.index.preprocessed.knn.AbstractMaterializeKNNPreprocessor;
import de.lmu.ifi.dbs.elki.index.preprocessed.knn.KNNChangeEvent;
@@ -53,11 +54,7 @@ import de.lmu.ifi.dbs.elki.logging.progress.StepProgress;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
/**
@@ -66,10 +63,10 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
*
* @author Elke Achtert
*
- * @apiviz.has LOF.LOFResult oneway - - updates
+ * @apiviz.has FlexibleLOF.LOFResult oneway - - updates
*/
-// TODO: related to publication?
-public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> {
+// TODO: related to publication?
+public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends FlexibleLOF<O, D> {
/**
* The logger for this class.
*/
@@ -78,12 +75,13 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> {
/**
* Constructor.
*
- * @param k the value of k
+ * @param krefer The number of neighbors for reference
+ * @param kreach The number of neighbors for reachability distance
* @param neighborhoodDistanceFunction the neighborhood distance function
* @param reachabilityDistanceFunction the reachability distance function
*/
- public OnlineLOF(int k, DistanceFunction<? super O, D> neighborhoodDistanceFunction, DistanceFunction<? super O, D> reachabilityDistanceFunction) {
- super(k, neighborhoodDistanceFunction, reachabilityDistanceFunction);
+ public OnlineLOF(int krefer, int kreach, DistanceFunction<? super O, D> neighborhoodDistanceFunction, DistanceFunction<? super O, D> reachabilityDistanceFunction) {
+ super(krefer, kreach, neighborhoodDistanceFunction, reachabilityDistanceFunction);
}
/**
@@ -92,10 +90,10 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> {
* the preprocessors.
*/
@Override
- public OutlierResult run(Relation<O> relation) {
+ public OutlierResult run(Database database, Relation<O> relation) {
StepProgress stepprog = LOG.isVerbose() ? new StepProgress("OnlineLOF", 3) : null;
- Pair<Pair<KNNQuery<O, D>, KNNQuery<O, D>>, Pair<RKNNQuery<O, D>, RKNNQuery<O, D>>> queries = getKNNAndRkNNQueries(relation, stepprog);
+ Pair<Pair<KNNQuery<O, D>, KNNQuery<O, D>>, Pair<RKNNQuery<O, D>, RKNNQuery<O, D>>> queries = getKNNAndRkNNQueries(database, relation, stepprog);
KNNQuery<O, D> kNNRefer = queries.getFirst().getFirst();
KNNQuery<O, D> kNNReach = queries.getFirst().getSecond();
RKNNQuery<O, D> rkNNRefer = queries.getSecond().getFirst();
@@ -107,8 +105,8 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> {
// add listener
KNNListener l = new LOFKNNListener(lofResult);
- ((MaterializeKNNPreprocessor<O, D>)((PreprocessorKNNQuery<O, D, ? extends KNNResult<D>>) lofResult.getKNNRefer()).getPreprocessor()).addKNNListener(l);
- ((MaterializeKNNPreprocessor<O, D>)((PreprocessorKNNQuery<O, D, ? extends KNNResult<D>>) lofResult.getKNNReach()).getPreprocessor()).addKNNListener(l);
+ ((MaterializeKNNPreprocessor<O, D>) ((PreprocessorKNNQuery<O, D, ? extends KNNList<D>>) lofResult.getKNNRefer()).getPreprocessor()).addKNNListener(l);
+ ((MaterializeKNNPreprocessor<O, D>) ((PreprocessorKNNQuery<O, D, ? extends KNNList<D>>) lofResult.getKNNReach()).getPreprocessor()).addKNNListener(l);
return lofResult.getResult();
}
@@ -120,50 +118,49 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> {
* @param stepprog Progress logger
* @return the kNN and rkNN queries
*/
- private Pair<Pair<KNNQuery<O, D>, KNNQuery<O, D>>, Pair<RKNNQuery<O, D>, RKNNQuery<O, D>>> getKNNAndRkNNQueries(Relation<O> relation, StepProgress stepprog) {
+ private Pair<Pair<KNNQuery<O, D>, KNNQuery<O, D>>, Pair<RKNNQuery<O, D>, RKNNQuery<O, D>>> getKNNAndRkNNQueries(Database database, Relation<O> relation, StepProgress stepprog) {
// Use "HEAVY" flag, since this is an online algorithm
- KNNQuery<O, D> kNNRefer = QueryUtil.getKNNQuery(relation, neighborhoodDistanceFunction, k, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
- RKNNQuery<O, D> rkNNRefer = QueryUtil.getRKNNQuery(relation, neighborhoodDistanceFunction, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
+ KNNQuery<O, D> kNNRefer = QueryUtil.getKNNQuery(relation, referenceDistanceFunction, krefer, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
+ RKNNQuery<O, D> rkNNRefer = QueryUtil.getRKNNQuery(relation, referenceDistanceFunction, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
// No optimized kNN query or RkNN query - use a preprocessor!
- if(kNNRefer == null || rkNNRefer == null) {
- if(stepprog != null) {
+ if (kNNRefer == null || rkNNRefer == null) {
+ if (stepprog != null) {
stepprog.beginStep(1, "Materializing neighborhood w.r.t. reference neighborhood distance function.", LOG);
}
- MaterializeKNNAndRKNNPreprocessor<O, D> preproc = new MaterializeKNNAndRKNNPreprocessor<O, D>(relation, neighborhoodDistanceFunction, k);
- DistanceQuery<O, D> ndq = relation.getDatabase().getDistanceQuery(relation, neighborhoodDistanceFunction);
- kNNRefer = preproc.getKNNQuery(ndq, k, DatabaseQuery.HINT_HEAVY_USE);
- rkNNRefer = preproc.getRKNNQuery(ndq, k, DatabaseQuery.HINT_HEAVY_USE);
+ MaterializeKNNAndRKNNPreprocessor<O, D> preproc = new MaterializeKNNAndRKNNPreprocessor<>(relation, referenceDistanceFunction, krefer);
+ DistanceQuery<O, D> ndq = database.getDistanceQuery(relation, referenceDistanceFunction);
+ kNNRefer = preproc.getKNNQuery(ndq, krefer, DatabaseQuery.HINT_HEAVY_USE);
+ rkNNRefer = preproc.getRKNNQuery(ndq, krefer, DatabaseQuery.HINT_HEAVY_USE);
// add as index
relation.getDatabase().addIndex(preproc);
- }
- else {
- if(stepprog != null) {
+ } else {
+ if (stepprog != null) {
stepprog.beginStep(1, "Optimized neighborhood w.r.t. reference neighborhood distance function provided by database.", LOG);
}
}
- KNNQuery<O, D> kNNReach = QueryUtil.getKNNQuery(relation, reachabilityDistanceFunction, k, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
+ KNNQuery<O, D> kNNReach = QueryUtil.getKNNQuery(relation, reachabilityDistanceFunction, kreach, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
RKNNQuery<O, D> rkNNReach = QueryUtil.getRKNNQuery(relation, reachabilityDistanceFunction, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
- if(kNNReach == null || rkNNReach == null) {
- if(stepprog != null) {
+ if (kNNReach == null || rkNNReach == null) {
+ if (stepprog != null) {
stepprog.beginStep(2, "Materializing neighborhood w.r.t. reachability distance function.", LOG);
}
ListParameterization config = new ListParameterization();
config.addParameter(AbstractMaterializeKNNPreprocessor.Factory.DISTANCE_FUNCTION_ID, reachabilityDistanceFunction);
- config.addParameter(AbstractMaterializeKNNPreprocessor.Factory.K_ID, k);
- MaterializeKNNAndRKNNPreprocessor<O, D> preproc = new MaterializeKNNAndRKNNPreprocessor<O, D>(relation, reachabilityDistanceFunction, k);
- DistanceQuery<O, D> rdq = relation.getDatabase().getDistanceQuery(relation, reachabilityDistanceFunction);
- kNNReach = preproc.getKNNQuery(rdq, k, DatabaseQuery.HINT_HEAVY_USE);
- rkNNReach = preproc.getRKNNQuery(rdq, k, DatabaseQuery.HINT_HEAVY_USE);
+ config.addParameter(AbstractMaterializeKNNPreprocessor.Factory.K_ID, kreach);
+ MaterializeKNNAndRKNNPreprocessor<O, D> preproc = new MaterializeKNNAndRKNNPreprocessor<>(relation, reachabilityDistanceFunction, kreach);
+ DistanceQuery<O, D> rdq = database.getDistanceQuery(relation, reachabilityDistanceFunction);
+ kNNReach = preproc.getKNNQuery(rdq, kreach, DatabaseQuery.HINT_HEAVY_USE);
+ rkNNReach = preproc.getRKNNQuery(rdq, kreach, DatabaseQuery.HINT_HEAVY_USE);
// add as index
relation.getDatabase().addIndex(preproc);
}
- Pair<KNNQuery<O, D>, KNNQuery<O, D>> kNNPair = new Pair<KNNQuery<O, D>, KNNQuery<O, D>>(kNNRefer, kNNReach);
- Pair<RKNNQuery<O, D>, RKNNQuery<O, D>> rkNNPair = new Pair<RKNNQuery<O, D>, RKNNQuery<O, D>>(rkNNRefer, rkNNReach);
+ Pair<KNNQuery<O, D>, KNNQuery<O, D>> kNNPair = new Pair<>(kNNRefer, kNNReach);
+ Pair<RKNNQuery<O, D>, RKNNQuery<O, D>> rkNNPair = new Pair<>(rkNNRefer, rkNNReach);
- return new Pair<Pair<KNNQuery<O, D>, KNNQuery<O, D>>, Pair<RKNNQuery<O, D>, RKNNQuery<O, D>>>(kNNPair, rkNNPair);
+ return new Pair<>(kNNPair, rkNNPair);
}
/**
@@ -201,24 +198,20 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> {
AbstractMaterializeKNNPreprocessor<O, D, ?> p1 = ((PreprocessorKNNQuery<O, D, ?>) lofResult.getKNNRefer()).getPreprocessor();
AbstractMaterializeKNNPreprocessor<O, D, ?> p2 = ((PreprocessorKNNQuery<O, D, ?>) lofResult.getKNNReach()).getPreprocessor();
- if(firstEventReceived == null) {
- if(e.getSource().equals(p1) && e.getSource().equals(p2)) {
+ if (firstEventReceived == null) {
+ if (e.getSource().equals(p1) && e.getSource().equals(p2)) {
kNNsChanged(e, e);
- }
- else {
+ } else {
firstEventReceived = e;
}
- }
- else {
- if(e.getSource().equals(p1) && firstEventReceived.getSource().equals(p2)) {
+ } else {
+ if (e.getSource().equals(p1) && firstEventReceived.getSource().equals(p2)) {
kNNsChanged(e, firstEventReceived);
firstEventReceived = null;
- }
- else if(e.getSource().equals(p2) && firstEventReceived.getSource().equals(p1)) {
+ } else if (e.getSource().equals(p2) && firstEventReceived.getSource().equals(p1)) {
kNNsChanged(firstEventReceived, e);
firstEventReceived = null;
- }
- else {
+ } else {
throw new UnsupportedOperationException("Event sources do not fit!");
}
}
@@ -232,20 +225,18 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> {
* @param e2 the change event of the second preprocessor
*/
private void kNNsChanged(KNNChangeEvent e1, KNNChangeEvent e2) {
- if(!e1.getType().equals(e2.getType())) {
+ if (!e1.getType().equals(e2.getType())) {
throw new UnsupportedOperationException("Event types do not fit: " + e1.getType() + " != " + e2.getType());
}
- if(!e1.getObjects().equals(e2.getObjects())) {
+ if (!e1.getObjects().equals(e2.getObjects())) {
throw new UnsupportedOperationException("Objects do not fit: " + e1.getObjects() + " != " + e2.getObjects());
}
- if(e1.getType().equals(KNNChangeEvent.Type.DELETE)) {
+ if (e1.getType().equals(KNNChangeEvent.Type.DELETE)) {
kNNsRemoved(e1.getObjects(), e1.getUpdates(), e2.getUpdates(), lofResult);
- }
- else if(e1.getType().equals(KNNChangeEvent.Type.INSERT)) {
+ } else if (e1.getType().equals(KNNChangeEvent.Type.INSERT)) {
kNNsInserted(e1.getObjects(), e1.getUpdates(), e2.getUpdates(), lofResult);
- }
- else {
+ } else {
throw new UnsupportedOperationException("Unsupported event type: " + e1.getType());
}
}
@@ -264,38 +255,38 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> {
StepProgress stepprog = LOG.isVerbose() ? new StepProgress(3) : null;
// recompute lrds
- if(stepprog != null) {
+ if (stepprog != null) {
stepprog.beginStep(1, "Recompute LRDs.", LOG);
}
ArrayDBIDs lrd_ids = DBIDUtil.ensureArray(DBIDUtil.union(insertions, updates2));
- List<? extends DistanceDBIDResult<D>> reachDistRKNNs = lofResult.getRkNNReach().getRKNNForBulkDBIDs(lrd_ids, k);
+ List<? extends DistanceDBIDList<D>> reachDistRKNNs = lofResult.getRkNNReach().getRKNNForBulkDBIDs(lrd_ids, kreach);
ArrayDBIDs affected_lrd_id_candidates = mergeIDs(reachDistRKNNs, lrd_ids);
ArrayModifiableDBIDs affected_lrd_ids = DBIDUtil.newArray(affected_lrd_id_candidates.size());
WritableDoubleDataStore new_lrds = computeLRDs(affected_lrd_id_candidates, lofResult.getKNNReach());
for (DBIDIter iter = affected_lrd_id_candidates.iter(); iter.valid(); iter.advance()) {
double new_lrd = new_lrds.doubleValue(iter);
double old_lrd = lofResult.getLrds().doubleValue(iter);
- if(Double.isNaN(old_lrd) || old_lrd != new_lrd) {
+ if (Double.isNaN(old_lrd) || old_lrd != new_lrd) {
lofResult.getLrds().putDouble(iter, new_lrd);
affected_lrd_ids.add(iter);
}
}
// recompute lofs
- if(stepprog != null) {
+ if (stepprog != null) {
stepprog.beginStep(2, "Recompute LOFS.", LOG);
}
- List<? extends DistanceDBIDResult<D>> primDistRKNNs = lofResult.getRkNNRefer().getRKNNForBulkDBIDs(affected_lrd_ids, k);
+ List<? extends DistanceDBIDList<D>> primDistRKNNs = lofResult.getRkNNRefer().getRKNNForBulkDBIDs(affected_lrd_ids, krefer);
ArrayDBIDs affected_lof_ids = mergeIDs(primDistRKNNs, affected_lrd_ids, insertions, updates1);
recomputeLOFs(affected_lof_ids, lofResult);
// fire result changed
- if(stepprog != null) {
+ if (stepprog != null) {
stepprog.beginStep(3, "Inform listeners.", LOG);
}
lofResult.getResult().getHierarchy().resultChanged(lofResult.getResult());
- if(stepprog != null) {
+ if (stepprog != null) {
stepprog.setCompleted(LOG);
}
}
@@ -314,7 +305,7 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> {
StepProgress stepprog = LOG.isVerbose() ? new StepProgress(4) : null;
// delete lrds and lofs
- if(stepprog != null) {
+ if (stepprog != null) {
stepprog.beginStep(1, "Delete old LRDs and LOFs.", LOG);
}
for (DBIDIter iter = deletions.iter(); iter.valid(); iter.advance()) {
@@ -323,38 +314,38 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> {
}
// recompute lrds
- if(stepprog != null) {
+ if (stepprog != null) {
stepprog.beginStep(2, "Recompute LRDs.", LOG);
}
ArrayDBIDs lrd_ids = DBIDUtil.ensureArray(updates2);
- List<? extends DistanceDBIDResult<D>> reachDistRKNNs = lofResult.getRkNNReach().getRKNNForBulkDBIDs(lrd_ids, k);
+ List<? extends DistanceDBIDList<D>> reachDistRKNNs = lofResult.getRkNNReach().getRKNNForBulkDBIDs(lrd_ids, kreach);
ArrayDBIDs affected_lrd_id_candidates = mergeIDs(reachDistRKNNs, lrd_ids);
ArrayModifiableDBIDs affected_lrd_ids = DBIDUtil.newArray(affected_lrd_id_candidates.size());
WritableDoubleDataStore new_lrds = computeLRDs(affected_lrd_id_candidates, lofResult.getKNNReach());
for (DBIDIter iter = affected_lrd_id_candidates.iter(); iter.valid(); iter.advance()) {
double new_lrd = new_lrds.doubleValue(iter);
double old_lrd = lofResult.getLrds().doubleValue(iter);
- if(old_lrd != new_lrd) {
+ if (old_lrd != new_lrd) {
lofResult.getLrds().putDouble(iter, new_lrd);
affected_lrd_ids.add(iter);
}
}
// recompute lofs
- if(stepprog != null) {
+ if (stepprog != null) {
stepprog.beginStep(3, "Recompute LOFS.", LOG);
}
- List<? extends DistanceDBIDResult<D>> primDistRKNNs = lofResult.getRkNNRefer().getRKNNForBulkDBIDs(affected_lrd_ids, k);
+ List<? extends DistanceDBIDList<D>> primDistRKNNs = lofResult.getRkNNRefer().getRKNNForBulkDBIDs(affected_lrd_ids, krefer);
ArrayDBIDs affected_lof_ids = mergeIDs(primDistRKNNs, affected_lrd_ids, updates1);
recomputeLOFs(affected_lof_ids, lofResult);
// fire result changed
- if(stepprog != null) {
+ if (stepprog != null) {
stepprog.beginStep(4, "Inform listeners.", LOG);
}
lofResult.getResult().getHierarchy().resultChanged(lofResult.getResult());
- if(stepprog != null) {
+ if (stepprog != null) {
stepprog.setCompleted(LOG);
}
}
@@ -367,12 +358,12 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> {
* @return a set containing the ids of the query result and the specified
* ids
*/
- private ArrayModifiableDBIDs mergeIDs(List<? extends DistanceDBIDResult<D>> queryResults, DBIDs... ids) {
+ private ArrayModifiableDBIDs mergeIDs(List<? extends DistanceDBIDList<D>> queryResults, DBIDs... ids) {
ModifiableDBIDs result = DBIDUtil.newHashSet();
- for(DBIDs dbids : ids) {
+ for (DBIDs dbids : ids) {
result.addDBIDs(dbids);
}
- for(DistanceDBIDResult<D> queryResult : queryResults) {
+ for (DistanceDBIDList<D> queryResult : queryResults) {
result.addDBIDs(queryResult);
}
return DBIDUtil.newArray(result);
@@ -394,12 +385,12 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> {
DoubleMinMax new_lofminmax = lofsAndMax.getSecond();
// Actualize meta info
- if(new_lofminmax.isValid() && lofResult.getResult().getOutlierMeta().getActualMaximum() < new_lofminmax.getMax()) {
+ if (new_lofminmax.isValid() && lofResult.getResult().getOutlierMeta().getActualMaximum() < new_lofminmax.getMax()) {
BasicOutlierScoreMeta scoreMeta = (BasicOutlierScoreMeta) lofResult.getResult().getOutlierMeta();
scoreMeta.setActualMaximum(new_lofminmax.getMax());
}
- if(new_lofminmax.isValid() && lofResult.getResult().getOutlierMeta().getActualMinimum() > new_lofminmax.getMin()) {
+ if (new_lofminmax.isValid() && lofResult.getResult().getOutlierMeta().getActualMinimum() > new_lofminmax.getMin()) {
BasicOutlierScoreMeta scoreMeta = (BasicOutlierScoreMeta) lofResult.getResult().getOutlierMeta();
scoreMeta.setActualMinimum(new_lofminmax.getMin());
}
@@ -412,49 +403,16 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> {
}
/**
- * Parameterization class.
- *
- * @author Erich Schubert
- *
- * @apiviz.exclude
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
*/
- public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
- /**
- * The neighborhood size to use
- */
- protected int k = 2;
-
- /**
- * Neighborhood distance function.
- */
- protected DistanceFunction<O, D> neighborhoodDistanceFunction = null;
-
- /**
- * Reachability distance function.
- */
- protected DistanceFunction<O, D> reachabilityDistanceFunction = null;
-
- @Override
- protected void makeOptions(Parameterization config) {
- super.makeOptions(config);
-
- final IntParameter pK = new IntParameter(K_ID);
- pK.addConstraint(new GreaterConstraint(1));
- if(config.grab(pK)) {
- k = pK.getValue();
- }
-
- final ObjectParameter<DistanceFunction<O, D>> reachDistP = new ObjectParameter<DistanceFunction<O, D>>(REACHABILITY_DISTANCE_FUNCTION_ID, DistanceFunction.class, true);
- if(config.grab(reachDistP)) {
- reachabilityDistanceFunction = reachDistP.instantiateClass(config);
- }
- }
-
+ public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends FlexibleLOF.Parameterizer<O, D> {
@Override
protected OnlineLOF<O, D> makeInstance() {
- // Default is to re-use the same distance
- DistanceFunction<O, D> rdist = (reachabilityDistanceFunction != null) ? reachabilityDistanceFunction : distanceFunction;
- return new OnlineLOF<O, D>(k, distanceFunction, rdist);
+ return new OnlineLOF<>(kreach, krefer, distanceFunction, reachabilityDistanceFunction);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleKernelDensityLOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimpleKernelDensityLOF.java
index 1c104c08..2ff7534a 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleKernelDensityLOF.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimpleKernelDensityLOF.java
@@ -1,10 +1,10 @@
-package de.lmu.ifi.dbs.elki.algorithm.outlier;
+package de.lmu.ifi.dbs.elki.algorithm.outlier.lof;
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -24,10 +24,12 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
*/
import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.CombinedTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.QueryUtil;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
@@ -35,6 +37,10 @@ import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList;
+import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
@@ -43,18 +49,14 @@ import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceDBIDResultIter;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceKNNList;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.logging.progress.StepProgress;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
-import de.lmu.ifi.dbs.elki.math.statistics.EpanechnikovKernelDensityFunction;
-import de.lmu.ifi.dbs.elki.math.statistics.KernelDensityFunction;
+import de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions.EpanechnikovKernelDensityFunction;
+import de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions.KernelDensityFunction;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta;
@@ -107,10 +109,11 @@ public class SimpleKernelDensityLOF<O extends NumberVector<?>, D extends NumberD
/**
* Run the naive kernel density LOF algorithm.
*
+ * @param database Database to query
* @param relation Data to process
* @return LOF outlier result
*/
- public OutlierResult run(Relation<O> relation) {
+ public OutlierResult run(Database database, Relation<O> relation) {
StepProgress stepprog = LOG.isVerbose() ? new StepProgress("KernelDensityLOF", 3) : null;
final int dim = RelationUtil.dimensionality(relation);
@@ -124,9 +127,9 @@ public class SimpleKernelDensityLOF<O extends NumberVector<?>, D extends NumberD
if (stepprog != null) {
stepprog.beginStep(1, "Materializing neighborhoods w.r.t. distance function.", LOG);
}
- MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<O, D>(relation, getDistanceFunction(), k);
- relation.getDatabase().addIndex(preproc);
- DistanceQuery<O, D> rdq = relation.getDatabase().getDistanceQuery(relation, getDistanceFunction());
+ MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<>(relation, getDistanceFunction(), k);
+ database.addIndex(preproc);
+ DistanceQuery<O, D> rdq = database.getDistanceQuery(relation, getDistanceFunction());
knnq = preproc.getKNNQuery(rdq, k);
}
@@ -137,12 +140,12 @@ public class SimpleKernelDensityLOF<O extends NumberVector<?>, D extends NumberD
WritableDoubleDataStore dens = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
FiniteProgress densProgress = LOG.isVerbose() ? new FiniteProgress("Densities", ids.size(), LOG) : null;
for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
- final KNNResult<D> neighbors = knnq.getKNNForDBID(it, k);
+ final KNNList<D> neighbors = knnq.getKNNForDBID(it, k);
int count = 0;
double sum = 0.0;
if (neighbors instanceof DoubleDistanceKNNList) {
// Fast version for double distances
- for (DoubleDistanceDBIDResultIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) {
+ for (DoubleDistanceDBIDListIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) {
if (DBIDUtil.equal(neighbor, it)) {
continue;
}
@@ -152,7 +155,7 @@ public class SimpleKernelDensityLOF<O extends NumberVector<?>, D extends NumberD
count++;
}
} else {
- for (DistanceDBIDResultIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ for (DistanceDBIDListIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
if (DBIDUtil.equal(neighbor, it)) {
continue;
}
@@ -185,7 +188,7 @@ public class SimpleKernelDensityLOF<O extends NumberVector<?>, D extends NumberD
final double lrdp = dens.doubleValue(it);
final double lof;
if (lrdp > 0) {
- final KNNResult<D> neighbors = knnq.getKNNForDBID(it, k);
+ final KNNList<D> neighbors = knnq.getKNNForDBID(it, k);
double sum = 0.0;
int count = 0;
for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
@@ -217,7 +220,7 @@ public class SimpleKernelDensityLOF<O extends NumberVector<?>, D extends NumberD
}
// Build result representation.
- Relation<Double> scoreResult = new MaterializedRelation<Double>("Kernel Density Local Outlier Factor", "kernel-density-slof-outlier", TypeUtil.DOUBLE, lofs, ids);
+ Relation<Double> scoreResult = new MaterializedRelation<>("Kernel Density Local Outlier Factor", "kernel-density-slof-outlier", TypeUtil.DOUBLE, lofs, ids);
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
@@ -264,13 +267,13 @@ public class SimpleKernelDensityLOF<O extends NumberVector<?>, D extends NumberD
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final IntParameter pK = new IntParameter(LOF.K_ID);
+ final IntParameter pK = new IntParameter(LOF.Parameterizer.K_ID);
pK.addConstraint(new GreaterConstraint(1));
if (config.grab(pK)) {
k = pK.getValue();
}
- ObjectParameter<KernelDensityFunction> kernelP = new ObjectParameter<KernelDensityFunction>(KERNEL_ID, KernelDensityFunction.class, EpanechnikovKernelDensityFunction.class);
+ ObjectParameter<KernelDensityFunction> kernelP = new ObjectParameter<>(KERNEL_ID, KernelDensityFunction.class, EpanechnikovKernelDensityFunction.class);
if (config.grab(kernelP)) {
kernel = kernelP.instantiateClass(config);
}
@@ -278,7 +281,7 @@ public class SimpleKernelDensityLOF<O extends NumberVector<?>, D extends NumberD
@Override
protected SimpleKernelDensityLOF<O, D> makeInstance() {
- return new SimpleKernelDensityLOF<O, D>(k, distanceFunction, kernel);
+ return new SimpleKernelDensityLOF<>(k, distanceFunction, kernel);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleLOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimplifiedLOF.java
index 48505ed5..413eaca1 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleLOF.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimplifiedLOF.java
@@ -1,10 +1,10 @@
-package de.lmu.ifi.dbs.elki.algorithm.outlier;
+package de.lmu.ifi.dbs.elki.algorithm.outlier.lof;
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -24,8 +24,10 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
*/
import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.QueryUtil;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
@@ -33,6 +35,10 @@ import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList;
+import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
@@ -40,10 +46,6 @@ import de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceDBIDResultIter;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceKNNList;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor;
import de.lmu.ifi.dbs.elki.logging.Logging;
@@ -53,6 +55,8 @@ import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.Alias;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -61,6 +65,14 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
* A simplified version of the original LOF algorithm, which does not use the
* reachability distance, yielding less stable results on inliers.
*
+ * Reference:
+ * <p>
+ * Erich Schubert, Arthur Zimek, Hans-Peter Kriegel<br />
+ * Local outlier detection reconsidered: a generalized view on locality with
+ * applications to spatial, video, and network outlier detection<br />
+ * In: Data Mining and Knowledge Discovery
+ * </p>
+ *
* @author Erich Schubert
*
* @apiviz.has KNNQuery
@@ -68,11 +80,13 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
* @param <O> the type of DatabaseObjects handled by this Algorithm
* @param <D> Distance type
*/
-public class SimpleLOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm {
+@Reference(authors = "Erich Schubert, Arthur Zimek, Hans-Peter Kriegel", title = "Local outlier detection reconsidered: a generalized view on locality with applications to spatial, video, and network outlier detection", booktitle = "Data Mining and Knowledge Discovery", url = "http://dx.doi.org/10.1007/s10618-012-0300-z")
+@Alias({ "SimpleLOF", "outlier.SimpleLOF", "de.lmu.ifi.dbs.elki.algorithm.outlier.SimpleLOF" })
+public class SimplifiedLOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
- private static final Logging LOG = Logging.getLogger(SimpleLOF.class);
+ private static final Logging LOG = Logging.getLogger(SimplifiedLOF.class);
/**
* Parameter k.
@@ -84,7 +98,7 @@ public class SimpleLOF<O, D extends NumberDistance<D, ?>> extends AbstractDistan
*
* @param k the value of k
*/
- public SimpleLOF(int k, DistanceFunction<? super O, D> distance) {
+ public SimplifiedLOF(int k, DistanceFunction<? super O, D> distance) {
super(distance);
this.k = k + 1;
}
@@ -92,10 +106,11 @@ public class SimpleLOF<O, D extends NumberDistance<D, ?>> extends AbstractDistan
/**
* Run the Simple LOF algorithm.
*
+ * @param database Database to query
* @param relation Data to process
* @return LOF outlier result
*/
- public OutlierResult run(Relation<O> relation) {
+ public OutlierResult run(Database database, Relation<O> relation) {
StepProgress stepprog = LOG.isVerbose() ? new StepProgress("SimpleLOF", 3) : null;
DBIDs ids = relation.getDBIDs();
@@ -107,9 +122,9 @@ public class SimpleLOF<O, D extends NumberDistance<D, ?>> extends AbstractDistan
if (stepprog != null) {
stepprog.beginStep(1, "Materializing neighborhoods w.r.t. distance function.", LOG);
}
- MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<O, D>(relation, getDistanceFunction(), k);
- relation.getDatabase().addIndex(preproc);
- DistanceQuery<O, D> rdq = relation.getDatabase().getDistanceQuery(relation, getDistanceFunction());
+ MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<>(relation, getDistanceFunction(), k);
+ database.addIndex(preproc);
+ DistanceQuery<O, D> rdq = database.getDistanceQuery(relation, getDistanceFunction());
knnq = preproc.getKNNQuery(rdq, k);
}
@@ -120,12 +135,12 @@ public class SimpleLOF<O, D extends NumberDistance<D, ?>> extends AbstractDistan
WritableDoubleDataStore dens = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
FiniteProgress densProgress = LOG.isVerbose() ? new FiniteProgress("Densities", ids.size(), LOG) : null;
for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
- final KNNResult<D> neighbors = knnq.getKNNForDBID(it, k);
+ final KNNList<D> neighbors = knnq.getKNNForDBID(it, k);
double sum = 0.0;
int count = 0;
if (neighbors instanceof DoubleDistanceKNNList) {
// Fast version for double distances
- for (DoubleDistanceDBIDResultIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) {
+ for (DoubleDistanceDBIDListIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) {
if (DBIDUtil.equal(neighbor, it)) {
continue;
}
@@ -133,7 +148,7 @@ public class SimpleLOF<O, D extends NumberDistance<D, ?>> extends AbstractDistan
count++;
}
} else {
- for (DistanceDBIDResultIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ for (DistanceDBIDListIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
if (DBIDUtil.equal(neighbor, it)) {
continue;
}
@@ -165,7 +180,7 @@ public class SimpleLOF<O, D extends NumberDistance<D, ?>> extends AbstractDistan
final double lrdp = dens.doubleValue(it);
final double lof;
if (lrdp > 0) {
- final KNNResult<D> neighbors = knnq.getKNNForDBID(it, k);
+ final KNNList<D> neighbors = knnq.getKNNForDBID(it, k);
double sum = 0.0;
int count = 0;
for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
@@ -197,7 +212,7 @@ public class SimpleLOF<O, D extends NumberDistance<D, ?>> extends AbstractDistan
}
// Build result representation.
- Relation<Double> scoreResult = new MaterializedRelation<Double>("Simple Local Outlier Factor", "simple-lof-outlier", TypeUtil.DOUBLE, lofs, ids);
+ Relation<Double> scoreResult = new MaterializedRelation<>("Simple Local Outlier Factor", "simple-lof-outlier", TypeUtil.DOUBLE, lofs, ids);
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
@@ -234,7 +249,7 @@ public class SimpleLOF<O, D extends NumberDistance<D, ?>> extends AbstractDistan
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final IntParameter pK = new IntParameter(LOF.K_ID);
+ final IntParameter pK = new IntParameter(LOF.Parameterizer.K_ID);
pK.addConstraint(new GreaterConstraint(1));
if (config.grab(pK)) {
k = pK.getValue();
@@ -242,8 +257,8 @@ public class SimpleLOF<O, D extends NumberDistance<D, ?>> extends AbstractDistan
}
@Override
- protected SimpleLOF<O, D> makeInstance() {
- return new SimpleLOF<O, D>(k, distanceFunction);
+ protected SimplifiedLOF<O, D> makeInstance() {
+ return new SimplifiedLOF<>(k, distanceFunction);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/package-info.java
new file mode 100644
index 00000000..48d4b16a
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/package-info.java
@@ -0,0 +1,27 @@
+/**
+ * <p>LOF family of outlier detection algorithms.</p>
+ */
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package de.lmu.ifi.dbs.elki.algorithm.outlier.lof;
+
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/ExternalDoubleOutlierScore.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/ExternalDoubleOutlierScore.java
index f230fd3b..0d0f7303 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/ExternalDoubleOutlierScore.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/ExternalDoubleOutlierScore.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.meta;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -202,7 +202,7 @@ public class ExternalDoubleOutlierScore extends AbstractAlgorithm<OutlierResult>
else {
meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
}
- Relation<Double> scoresult = new MaterializedRelation<Double>("External Outlier", "external-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs());
+ Relation<Double> scoresult = new MaterializedRelation<>("External Outlier", "external-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs());
OutlierResult or = new OutlierResult(meta, scoresult);
// Apply scaling
@@ -327,7 +327,7 @@ public class ExternalDoubleOutlierScore extends AbstractAlgorithm<OutlierResult>
inverted = inverstedF.getValue();
}
- ObjectParameter<ScalingFunction> scalingP = new ObjectParameter<ScalingFunction>(SCALING_ID, ScalingFunction.class, IdentityScaling.class);
+ ObjectParameter<ScalingFunction> scalingP = new ObjectParameter<>(SCALING_ID, ScalingFunction.class, IdentityScaling.class);
if(config.grab(scalingP)) {
scaling = scalingP.instantiateClass(config);
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java
index b53a0942..22c20fc3 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.meta;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -28,11 +28,12 @@ import java.util.BitSet;
import java.util.Random;
import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
-import de.lmu.ifi.dbs.elki.algorithm.outlier.LOF;
import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.lof.LOF;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
@@ -127,25 +128,26 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements
/**
* Run the algorithm on a data set.
*
+ * @param database Database context
* @param relation Relation to use
* @return Outlier detection result
*/
- public OutlierResult run(Relation<NumberVector<?>> relation) {
+ public OutlierResult run(Database database, Relation<NumberVector<?>> relation) {
final int dbdim = RelationUtil.dimensionality(relation);
final int mindim = dbdim >> 1;
final int maxdim = dbdim - 1;
final Random rand = rnd.getRandom();
- ArrayList<OutlierResult> results = new ArrayList<OutlierResult>(num);
+ ArrayList<OutlierResult> results = new ArrayList<>(num);
{
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("LOF iterations", num, LOG) : null;
for (int i = 0; i < num; i++) {
BitSet dimset = randomSubspace(dbdim, mindim, maxdim, rand);
SubspaceEuclideanDistanceFunction df = new SubspaceEuclideanDistanceFunction(dimset);
- LOF<NumberVector<?>, DoubleDistance> lof = new LOF<NumberVector<?>, DoubleDistance>(k, df);
+ LOF<NumberVector<?>, DoubleDistance> lof = new LOF<>(k, df);
// run LOF and collect the result
- OutlierResult result = lof.run(relation);
+ OutlierResult result = lof.run(database, relation);
results.add(result);
if (prog != null) {
prog.incrementProcessed(LOG);
@@ -219,7 +221,7 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements
}
}
OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
- Relation<Double> scoreres = new MaterializedRelation<Double>("Feature bagging", "fb-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs());
+ Relation<Double> scoreres = new MaterializedRelation<>("Feature bagging", "fb-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs());
return new OutlierResult(meta, scoreres);
}
@@ -314,7 +316,7 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final IntParameter pK = new IntParameter(LOF.K_ID);
+ final IntParameter pK = new IntParameter(LOF.Parameterizer.K_ID);
pK.addConstraint(new GreaterConstraint(1));
if (config.grab(pK)) {
k = pK.getValue();
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/HiCS.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/HiCS.java
index 15b94322..69608293 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/HiCS.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/HiCS.java
@@ -33,13 +33,12 @@ import java.util.Set;
import java.util.TreeSet;
import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
-import de.lmu.ifi.dbs.elki.algorithm.outlier.LOF;
import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.lof.LOF;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.VectorUtil;
import de.lmu.ifi.dbs.elki.data.VectorUtil.SortDBIDsBySingleDimension;
import de.lmu.ifi.dbs.elki.data.projection.NumericalFeatureSelection;
-import de.lmu.ifi.dbs.elki.data.projection.Projection;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.ProxyDatabase;
@@ -66,6 +65,7 @@ import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap;
import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TopBoundedHeap;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
@@ -172,7 +172,6 @@ public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe
*/
public OutlierResult run(Relation<V> relation) {
final DBIDs ids = relation.getDBIDs();
- final NumberVector.Factory<V, ?> factory = RelationUtil.getNumberVectorFactory(relation);
ArrayList<ArrayDBIDs> subspaceIndex = buildOneDimIndexes(relation);
Set<HiCSSubspace> subspaces = calculateSubspaces(relation, subspaceIndex, rnd.getRandom());
@@ -180,7 +179,7 @@ public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe
if (LOG.isVerbose()) {
LOG.verbose("Number of high-contrast subspaces: " + subspaces.size());
}
- List<Relation<Double>> results = new ArrayList<Relation<Double>>();
+ List<Relation<Double>> results = new ArrayList<>();
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Calculating Outlier scores for high Contrast subspaces", subspaces.size(), LOG) : null;
// run outlier detection and collect the result
@@ -192,8 +191,7 @@ public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe
}
ProxyDatabase pdb = new ProxyDatabase(ids);
- Projection<V, V> proj = new NumericalFeatureSelection<V>(dimset, factory);
- pdb.addRelation(new ProjectedView<V, V>(relation, proj));
+ pdb.addRelation(new ProjectedView<>(relation, new NumericalFeatureSelection<V>(dimset)));
// run LOF and collect the result
OutlierResult result = outlierAlgorithm.run(pdb);
@@ -221,7 +219,7 @@ public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe
minmax.put(sum);
}
OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
- Relation<Double> scoreres = new MaterializedRelation<Double>("HiCS", "HiCS-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs());
+ Relation<Double> scoreres = new MaterializedRelation<>("HiCS", "HiCS-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs());
return new OutlierResult(meta, scoreres);
}
@@ -236,7 +234,7 @@ public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe
*/
private ArrayList<ArrayDBIDs> buildOneDimIndexes(Relation<? extends NumberVector<?>> relation) {
final int dim = RelationUtil.dimensionality(relation);
- ArrayList<ArrayDBIDs> subspaceIndex = new ArrayList<ArrayDBIDs>(dim + 1);
+ ArrayList<ArrayDBIDs> subspaceIndex = new ArrayList<>(dim + 1);
SortDBIDsBySingleDimension comp = new VectorUtil.SortDBIDsBySingleDimension(relation);
for (int i = 0; i < dim; i++) {
@@ -264,8 +262,8 @@ public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe
dprog.setProcessed(2, LOG);
}
- TreeSet<HiCSSubspace> subspaceList = new TreeSet<HiCSSubspace>(HiCSSubspace.SORT_BY_SUBSPACE);
- TopBoundedHeap<HiCSSubspace> dDimensionalList = new TopBoundedHeap<HiCSSubspace>(cutoff, HiCSSubspace.SORT_BY_CONTRAST_ASC);
+ TreeSet<HiCSSubspace> subspaceList = new TreeSet<>(HiCSSubspace.SORT_BY_SUBSPACE);
+ TopBoundedHeap<HiCSSubspace> dDimensionalList = new TopBoundedHeap<>(cutoff, HiCSSubspace.SORT_BY_CONTRAST_ASC);
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Generating two-element subsets", (dbdim * (dbdim - 1)) >> 1, LOG) : null;
// compute two-element sets of subspaces
for (int i = 0; i < dbdim; i++) {
@@ -291,10 +289,10 @@ public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe
}
// result now contains all d-dimensional sets of subspaces
- ArrayList<HiCSSubspace> candidateList = new ArrayList<HiCSSubspace>(dDimensionalList.size());
- for (HiCSSubspace sub : dDimensionalList) {
- subspaceList.add(sub);
- candidateList.add(sub);
+ ArrayList<HiCSSubspace> candidateList = new ArrayList<>(dDimensionalList.size());
+ for (Heap<HiCSSubspace>.UnorderedIter it = dDimensionalList.unorderedIter(); it.valid(); it.advance()) {
+ subspaceList.add(it.get());
+ candidateList.add(it.get());
}
dDimensionalList.clear();
// candidateList now contains the *m* best d-dimensional sets
@@ -322,8 +320,8 @@ public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe
}
// Prune
for (HiCSSubspace cand : candidateList) {
- for (HiCSSubspace nextSet : dDimensionalList) {
- if (nextSet.contrast > cand.contrast) {
+ for (Heap<HiCSSubspace>.UnorderedIter it = dDimensionalList.unorderedIter(); it.valid(); it.advance()) {
+ if (it.get().contrast > cand.contrast) {
subspaceList.remove(cand);
break;
}
@@ -610,12 +608,12 @@ public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe
alpha = alphaP.doubleValue();
}
- final ObjectParameter<OutlierAlgorithm> algoP = new ObjectParameter<OutlierAlgorithm>(ALGO_ID, OutlierAlgorithm.class, LOF.class);
+ final ObjectParameter<OutlierAlgorithm> algoP = new ObjectParameter<>(ALGO_ID, OutlierAlgorithm.class, LOF.class);
if (config.grab(algoP)) {
outlierAlgorithm = algoP.instantiateClass(config);
}
- final ObjectParameter<GoodnessOfFitTest> testP = new ObjectParameter<GoodnessOfFitTest>(TEST_ID, GoodnessOfFitTest.class, KolmogorovSmirnovTest.class);
+ final ObjectParameter<GoodnessOfFitTest> testP = new ObjectParameter<>(TEST_ID, GoodnessOfFitTest.class, KolmogorovSmirnovTest.class);
if (config.grab(testP)) {
statTest = testP.instantiateClass(config);
}
@@ -634,7 +632,7 @@ public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe
@Override
protected HiCS<V> makeInstance() {
- return new HiCS<V>(m, alpha, outlierAlgorithm, statTest, cutoff, rnd);
+ return new HiCS<>(m, alpha, outlierAlgorithm, statTest, cutoff, rnd);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/RescaleMetaOutlierAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/RescaleMetaOutlierAlgorithm.java
index 387041da..8ebdc27a 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/RescaleMetaOutlierAlgorithm.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/RescaleMetaOutlierAlgorithm.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.meta;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -50,6 +50,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameteriz
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
import de.lmu.ifi.dbs.elki.utilities.scaling.ScalingFunction;
import de.lmu.ifi.dbs.elki.utilities.scaling.outlier.OutlierScalingFunction;
+import de.lmu.ifi.dbs.elki.workflow.AlgorithmStep;
/**
* Scale another outlier score using the given scaling function.
@@ -114,7 +115,7 @@ public class RescaleMetaOutlierAlgorithm extends AbstractAlgorithm<OutlierResult
}
OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), scaling.getMin(), scaling.getMax());
- Relation<Double> scoresult = new MaterializedRelation<Double>("Scaled Outlier", "scaled-outlier", TypeUtil.DOUBLE, scaledscores, scores.getDBIDs());
+ Relation<Double> scoresult = new MaterializedRelation<>("Scaled Outlier", "scaled-outlier", TypeUtil.DOUBLE, scaledscores, scores.getDBIDs());
OutlierResult result = new OutlierResult(meta, scoresult);
result.addChildResult(innerresult);
@@ -167,12 +168,12 @@ public class RescaleMetaOutlierAlgorithm extends AbstractAlgorithm<OutlierResult
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- ObjectParameter<Algorithm> algP = new ObjectParameter<Algorithm>(OptionID.ALGORITHM, OutlierAlgorithm.class);
+ ObjectParameter<Algorithm> algP = new ObjectParameter<>(AlgorithmStep.Parameterizer.ALGORITHM_ID, OutlierAlgorithm.class);
if(config.grab(algP)) {
algorithm = algP.instantiateClass(config);
}
- ObjectParameter<ScalingFunction> scalingP = new ObjectParameter<ScalingFunction>(SCALING_ID, ScalingFunction.class);
+ ObjectParameter<ScalingFunction> scalingP = new ObjectParameter<>(SCALING_ID, ScalingFunction.class);
if(config.grab(scalingP)) {
scaling = scalingP.instantiateClass(config);
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/SimpleOutlierEnsemble.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/SimpleOutlierEnsemble.java
index b7791fc4..d40af384 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/SimpleOutlierEnsemble.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/SimpleOutlierEnsemble.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.meta;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -58,6 +58,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParamet
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectListParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+import de.lmu.ifi.dbs.elki.workflow.AlgorithmStep;
/**
* Simple outlier ensemble method.
@@ -100,7 +101,7 @@ public class SimpleOutlierEnsemble extends AbstractAlgorithm<OutlierResult> impl
int num = algorithms.size();
// Run inner outlier algorithms
ModifiableDBIDs ids = DBIDUtil.newHashSet();
- ArrayList<OutlierResult> results = new ArrayList<OutlierResult>(num);
+ ArrayList<OutlierResult> results = new ArrayList<>(num);
{
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Inner outlier algorithms", num, LOG) : null;
for (Algorithm alg : algorithms) {
@@ -155,7 +156,7 @@ public class SimpleOutlierEnsemble extends AbstractAlgorithm<OutlierResult> impl
}
}
OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
- Relation<Double> scores = new MaterializedRelation<Double>("Simple Outlier Ensemble", "ensemble-outlier", TypeUtil.DOUBLE, sumscore, ids);
+ Relation<Double> scores = new MaterializedRelation<>("Simple Outlier Ensemble", "ensemble-outlier", TypeUtil.DOUBLE, sumscore, ids);
return new OutlierResult(meta, scores);
}
@@ -200,7 +201,7 @@ public class SimpleOutlierEnsemble extends AbstractAlgorithm<OutlierResult> impl
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- ObjectListParameter<OutlierAlgorithm> algP = new ObjectListParameter<OutlierAlgorithm>(OptionID.ALGORITHM, OutlierAlgorithm.class);
+ ObjectListParameter<OutlierAlgorithm> algP = new ObjectListParameter<>(AlgorithmStep.Parameterizer.ALGORITHM_ID, OutlierAlgorithm.class);
if (config.grab(algP)) {
ListParameterization subconfig = new ListParameterization();
ChainedParameterization chain = new ChainedParameterization(subconfig, config);
@@ -208,7 +209,7 @@ public class SimpleOutlierEnsemble extends AbstractAlgorithm<OutlierResult> impl
algorithms = algP.instantiateClasses(chain);
subconfig.logAndClearReportedErrors();
}
- ObjectParameter<EnsembleVoting> votingP = new ObjectParameter<EnsembleVoting>(VOTING_ID, EnsembleVoting.class);
+ ObjectParameter<EnsembleVoting> votingP = new ObjectParameter<>(VOTING_ID, EnsembleVoting.class);
if (config.grab(votingP)) {
voting = votingP.instantiateClass(config);
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/package-info.java
index 7c5dd8b0..f28f8db3 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/package-info.java
@@ -8,7 +8,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2012
+Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/package-info.java
index eca0d876..0ce6f9b5 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/package-info.java
@@ -14,7 +14,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2012
+Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractDistanceBasedSpatialOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractDistanceBasedSpatialOutlier.java
index f37ee182..e059c16c 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractDistanceBasedSpatialOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractDistanceBasedSpatialOutlier.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -25,8 +25,8 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial;
import de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractNeighborhoodOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractNeighborhoodOutlier.java
index d3770504..3b3e71b3 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractNeighborhoodOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractNeighborhoodOutlier.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -88,7 +88,7 @@ public abstract class AbstractNeighborhoodOutlier<O> extends AbstractAlgorithm<O
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final ObjectParameter<NeighborSetPredicate.Factory<O>> param = new ObjectParameter<NeighborSetPredicate.Factory<O>>(NEIGHBORHOOD_ID, NeighborSetPredicate.Factory.class);
+ final ObjectParameter<NeighborSetPredicate.Factory<O>> param = new ObjectParameter<>(NEIGHBORHOOD_ID, NeighborSetPredicate.Factory.class);
if(config.grab(param)) {
npredf = param.instantiateClass(config);
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuGLSBackwardSearchAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuGLSBackwardSearchAlgorithm.java
index cd5670f7..5035cf6f 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuGLSBackwardSearchAlgorithm.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuGLSBackwardSearchAlgorithm.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -28,22 +28,23 @@ import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.QueryUtil;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDVar;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.ProxyView;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
@@ -116,23 +117,24 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?>, D extends
/**
* Run the algorithm
*
+ * @param database Database to process
* @param relationx Spatial relation
* @param relationy Attribute relation
* @return Algorithm result
*/
- public OutlierResult run(Relation<V> relationx, Relation<? extends NumberVector<?>> relationy) {
+ public OutlierResult run(Database database, Relation<V> relationx, Relation<? extends NumberVector<?>> relationy) {
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relationx.getDBIDs(), DataStoreFactory.HINT_STATIC);
DoubleMinMax mm = new DoubleMinMax(0.0, 0.0);
// Outlier detection loop
{
ModifiableDBIDs idview = DBIDUtil.newHashSet(relationx.getDBIDs());
- ProxyView<V> proxy = new ProxyView<V>(relationx.getDatabase(), idview, relationx);
+ ProxyView<V> proxy = new ProxyView<>(database, idview, relationx);
double phialpha = NormalDistribution.standardNormalQuantile(1.0 - alpha *.5);
// Detect outliers while significant.
while(true) {
- Pair<DBID, Double> candidate = singleIteration(proxy, relationy);
+ Pair<DBIDVar, Double> candidate = singleIteration(proxy, relationy);
if(candidate.second < phialpha) {
break;
}
@@ -149,7 +151,7 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?>, D extends
}
}
- Relation<Double> scoreResult = new MaterializedRelation<Double>("GLSSODBackward", "GLSSODbackward-outlier", TypeUtil.DOUBLE, scores, relationx.getDBIDs());
+ Relation<Double> scoreResult = new MaterializedRelation<>("GLSSODBackward", "GLSSODbackward-outlier", TypeUtil.DOUBLE, scores, relationx.getDBIDs());
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(mm.getMin(), mm.getMax(), 0, Double.POSITIVE_INFINITY, 0);
return new OutlierResult(scoreMeta, scoreResult);
}
@@ -161,7 +163,7 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?>, D extends
* @param relationy Attribute relation
* @return Top outlier and associated score
*/
- private Pair<DBID, Double> singleIteration(Relation<V> relationx, Relation<? extends NumberVector<?>> relationy) {
+ private Pair<DBIDVar, Double> singleIteration(Relation<V> relationx, Relation<? extends NumberVector<?>> relationy) {
final int dim = RelationUtil.dimensionality(relationx);
final int dimy = RelationUtil.dimensionality(relationy);
assert (dim == 2);
@@ -203,7 +205,7 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?>, D extends
// Fill the neighborhood matrix F:
{
- KNNResult<D> neighbors = knnQuery.getKNNForDBID(id, k + 1);
+ KNNList<D> neighbors = knnQuery.getKNNForDBID(id, k + 1);
ModifiableDBIDs neighborhood = DBIDUtil.newArray(neighbors.size());
for(DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
if(DBIDUtil.equal(id, neighbor)) {
@@ -237,7 +239,7 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?>, D extends
// calculate the absolute values of standard residuals
Matrix E = F.times(Y.minus(X.times(b))).timesEquals(norm);
- DBID worstid = null;
+ DBIDVar worstid = DBIDUtil.newVar();
double worstscore = Double.NEGATIVE_INFINITY;
int i = 0;
for(DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) {
@@ -245,11 +247,11 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?>, D extends
// double err = Math.abs(E.get(i, 0));
if(err > worstscore) {
worstscore = err;
- worstid = DBIDUtil.deref(id);
+ worstid.set(id);
}
}
- return new Pair<DBID, Double>(worstid, worstscore);
+ return new Pair<>(worstid, worstscore);
}
@Override
@@ -302,7 +304,7 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?>, D extends
@Override
protected CTLuGLSBackwardSearchAlgorithm<V, D> makeInstance() {
- return new CTLuGLSBackwardSearchAlgorithm<V, D>(distanceFunction, k, alpha);
+ return new CTLuGLSBackwardSearchAlgorithm<>(distanceFunction, k, alpha);
}
/**
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMeanMultipleAttributes.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMeanMultipleAttributes.java
index 2caee128..1712dd4f 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMeanMultipleAttributes.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMeanMultipleAttributes.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -124,7 +124,7 @@ public class CTLuMeanMultipleAttributes<N, O extends NumberVector<?>> extends Ab
scores.putDouble(iditer, score);
}
- Relation<Double> scoreResult = new MaterializedRelation<Double>("mean multiple attributes spatial outlier", "mean-multipleattributes-outlier", TypeUtil.DOUBLE, scores, attributes.getDBIDs());
+ Relation<Double> scoreResult = new MaterializedRelation<>("mean multiple attributes spatial outlier", "mean-multipleattributes-outlier", TypeUtil.DOUBLE, scores, attributes.getDBIDs());
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0);
OutlierResult or = new OutlierResult(scoreMeta, scoreResult);
or.addChildResult(npred);
@@ -149,7 +149,7 @@ public class CTLuMeanMultipleAttributes<N, O extends NumberVector<?>> extends Ab
public static class Parameterizer<N, O extends NumberVector<?>> extends AbstractNeighborhoodOutlier.Parameterizer<N> {
@Override
protected CTLuMeanMultipleAttributes<N, O> makeInstance() {
- return new CTLuMeanMultipleAttributes<N, O>(npredf);
+ return new CTLuMeanMultipleAttributes<>(npredf);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianAlgorithm.java
index 7755a459..9848d664 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianAlgorithm.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianAlgorithm.java
@@ -132,7 +132,7 @@ public class CTLuMedianAlgorithm<N> extends AbstractNeighborhoodOutlier<N> {
scores.putDouble(iditer, score);
}
- Relation<Double> scoreResult = new MaterializedRelation<Double>("MO", "Median-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs());
+ Relation<Double> scoreResult = new MaterializedRelation<>("MO", "Median-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0);
OutlierResult or = new OutlierResult(scoreMeta, scoreResult);
or.addChildResult(npred);
@@ -161,7 +161,7 @@ public class CTLuMedianAlgorithm<N> extends AbstractNeighborhoodOutlier<N> {
public static class Parameterizer<N> extends AbstractNeighborhoodOutlier.Parameterizer<N> {
@Override
protected CTLuMedianAlgorithm<N> makeInstance() {
- return new CTLuMedianAlgorithm<N>(npredf);
+ return new CTLuMedianAlgorithm<>(npredf);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianMultipleAttributes.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianMultipleAttributes.java
index 0d515ac7..583958fe 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianMultipleAttributes.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianMultipleAttributes.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -150,7 +150,7 @@ public class CTLuMedianMultipleAttributes<N, O extends NumberVector<?>> extends
scores.putDouble(iditer, score);
}
- Relation<Double> scoreResult = new MaterializedRelation<Double>("Median multiple attributes outlier", "median-outlier", TypeUtil.DOUBLE, scores, attributes.getDBIDs());
+ Relation<Double> scoreResult = new MaterializedRelation<>("Median multiple attributes outlier", "median-outlier", TypeUtil.DOUBLE, scores, attributes.getDBIDs());
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0);
OutlierResult or = new OutlierResult(scoreMeta, scoreResult);
or.addChildResult(npred);
@@ -175,7 +175,7 @@ public class CTLuMedianMultipleAttributes<N, O extends NumberVector<?>> extends
public static class Parameterizer<N, O extends NumberVector<?>> extends AbstractNeighborhoodOutlier.Parameterizer<N> {
@Override
protected CTLuMedianMultipleAttributes<N, O> makeInstance() {
- return new CTLuMedianMultipleAttributes<N, O>(npredf);
+ return new CTLuMedianMultipleAttributes<>(npredf);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMoranScatterplotOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMoranScatterplotOutlier.java
index 3b876bba..da527af0 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMoranScatterplotOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMoranScatterplotOutlier.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -136,7 +136,7 @@ public class CTLuMoranScatterplotOutlier<N> extends AbstractNeighborhoodOutlier<
scores.putDouble(iditer, score);
}
- Relation<Double> scoreResult = new MaterializedRelation<Double>("MoranOutlier", "Moran Scatterplot Outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs());
+ Relation<Double> scoreResult = new MaterializedRelation<>("MoranOutlier", "Moran Scatterplot Outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, 0);
OutlierResult or = new OutlierResult(scoreMeta, scoreResult);
or.addChildResult(npred);
@@ -165,7 +165,7 @@ public class CTLuMoranScatterplotOutlier<N> extends AbstractNeighborhoodOutlier<
public static class Parameterizer<N> extends AbstractNeighborhoodOutlier.Parameterizer<N> {
@Override
protected CTLuMoranScatterplotOutlier<N> makeInstance() {
- return new CTLuMoranScatterplotOutlier<N>(npredf);
+ return new CTLuMoranScatterplotOutlier<>(npredf);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuRandomWalkEC.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuRandomWalkEC.java
index ec92afd7..c8efe4da 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuRandomWalkEC.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuRandomWalkEC.java
@@ -38,12 +38,11 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.distance.KNNHeap;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNHeap;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNUtil;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
@@ -137,7 +136,7 @@ public class CTLuRandomWalkEC<N, D extends NumberDistance<D, ?>> extends Abstrac
// construct the relation Matrix of the ec-graph
Matrix E = new Matrix(ids.size(), ids.size());
- KNNHeap<D> heap = KNNUtil.newHeap(distFunc.getDistanceFactory(), k);
+ KNNHeap<D> heap = DBIDUtil.newHeap(distFunc.getDistanceFactory(), k);
{
int i = 0;
for(DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) {
@@ -226,7 +225,7 @@ public class CTLuRandomWalkEC<N, D extends NumberDistance<D, ?>> extends Abstrac
scores.putDouble(id, score);
}
- Relation<Double> scoreResult = new MaterializedRelation<Double>("randomwalkec", "RandomWalkEC", TypeUtil.DOUBLE, scores, relation.getDBIDs());
+ Relation<Double> scoreResult = new MaterializedRelation<>("randomwalkec", "RandomWalkEC", TypeUtil.DOUBLE, scores, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0.0);
return new OutlierResult(scoreMeta, scoreResult);
}
@@ -329,7 +328,7 @@ public class CTLuRandomWalkEC<N, D extends NumberDistance<D, ?>> extends Abstrac
@Override
protected CTLuRandomWalkEC<N, D> makeInstance() {
- return new CTLuRandomWalkEC<N, D>(distanceFunction, alpha, c, k);
+ return new CTLuRandomWalkEC<>(distanceFunction, alpha, c, k);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuScatterplotOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuScatterplotOutlier.java
index 295c7414..bcbbfd2a 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuScatterplotOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuScatterplotOutlier.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -160,7 +160,7 @@ public class CTLuScatterplotOutlier<N> extends AbstractNeighborhoodOutlier<N> {
}
}
// build representation
- Relation<Double> scoreResult = new MaterializedRelation<Double>("SPO", "Scatterplot-Outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs());
+ Relation<Double> scoreResult = new MaterializedRelation<>("SPO", "Scatterplot-Outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0);
OutlierResult or = new OutlierResult(scoreMeta, scoreResult);
or.addChildResult(npred);
@@ -189,7 +189,7 @@ public class CTLuScatterplotOutlier<N> extends AbstractNeighborhoodOutlier<N> {
public static class Parameterizer<N> extends AbstractNeighborhoodOutlier.Parameterizer<N> {
@Override
protected CTLuScatterplotOutlier<N> makeInstance() {
- return new CTLuScatterplotOutlier<N>(npredf);
+ return new CTLuScatterplotOutlier<>(npredf);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuZTestOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuZTestOutlier.java
index 02573a06..d6cb5a50 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuZTestOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuZTestOutlier.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -133,7 +133,7 @@ public class CTLuZTestOutlier<N> extends AbstractNeighborhoodOutlier<N> {
}
// Wrap result
- Relation<Double> scoreResult = new MaterializedRelation<Double>("ZTest", "Z Test score", TypeUtil.DOUBLE, scores, relation.getDBIDs());
+ Relation<Double> scoreResult = new MaterializedRelation<>("ZTest", "Z Test score", TypeUtil.DOUBLE, scores, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0);
OutlierResult or = new OutlierResult(scoreMeta, scoreResult);
or.addChildResult(npred);
@@ -162,7 +162,7 @@ public class CTLuZTestOutlier<N> extends AbstractNeighborhoodOutlier<N> {
public static class Parameterizer<N> extends AbstractNeighborhoodOutlier.Parameterizer<N> {
@Override
protected CTLuZTestOutlier<N> makeInstance() {
- return new CTLuZTestOutlier<N>(npredf);
+ return new CTLuZTestOutlier<>(npredf);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SLOM.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SLOM.java
index 720fa39f..08c3e29b 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SLOM.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SLOM.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -187,7 +187,7 @@ public class SLOM<N, O, D extends NumberDistance<D, ?>> extends AbstractDistance
slomminmax.put(slom);
}
- Relation<Double> scoreResult = new MaterializedRelation<Double>("SLOM", "slom-outlier", TypeUtil.DOUBLE, sloms, relation.getDBIDs());
+ Relation<Double> scoreResult = new MaterializedRelation<>("SLOM", "slom-outlier", TypeUtil.DOUBLE, sloms, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(slomminmax.getMin(), slomminmax.getMax(), 0.0, Double.POSITIVE_INFINITY);
OutlierResult or = new OutlierResult(scoreMeta, scoreResult);
or.addChildResult(npred);
@@ -218,7 +218,7 @@ public class SLOM<N, O, D extends NumberDistance<D, ?>> extends AbstractDistance
public static class Parameterizer<N, O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedSpatialOutlier.Parameterizer<N, O, D> {
@Override
protected SLOM<N, O, D> makeInstance() {
- return new SLOM<N, O, D>(npredf, distanceFunction);
+ return new SLOM<>(npredf, distanceFunction);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SOF.java
index a6f39a60..a2605f39 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SOF.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SOF.java
@@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2012
+Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -46,7 +46,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
/**
* The Spatial Outlier Factor (SOF) is a spatial
- * {@link de.lmu.ifi.dbs.elki.algorithm.outlier.LOF LOF} variation.
+ * {@link de.lmu.ifi.dbs.elki.algorithm.outlier.lof.LOF LOF} variation.
*
* Since the "reachability distance" of LOF cannot be used canonically in the
* bichromatic case, this part of LOF is dropped and the exact distance is used
@@ -138,7 +138,7 @@ public class SOF<N, O, D extends NumberDistance<D, ?>> extends AbstractDistanceB
}
// Build result representation.
- Relation<Double> scoreResult = new MaterializedRelation<Double>("Spatial Outlier Factor", "sof-outlier", TypeUtil.DOUBLE, lofs, relation.getDBIDs());
+ Relation<Double> scoreResult = new MaterializedRelation<>("Spatial Outlier Factor", "sof-outlier", TypeUtil.DOUBLE, lofs, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
OutlierResult or = new OutlierResult(scoreMeta, scoreResult);
or.addChildResult(npred);
@@ -164,7 +164,7 @@ public class SOF<N, O, D extends NumberDistance<D, ?>> extends AbstractDistanceB
public static class Parameterizer<N, O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedSpatialOutlier.Parameterizer<N, O, D> {
@Override
protected SOF<N, O, D> makeInstance() {
- return new SOF<N, O, D>(npredf, distanceFunction);
+ return new SOF<>(npredf, distanceFunction);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/TrimmedMeanApproach.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/TrimmedMeanApproach.java
index 9aa21b66..e07ce480 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/TrimmedMeanApproach.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/TrimmedMeanApproach.java
@@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2012
+Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -186,7 +186,7 @@ public class TrimmedMeanApproach<N> extends AbstractNeighborhoodOutlier<N> {
minmax.put(score);
}
//
- Relation<Double> scoreResult = new MaterializedRelation<Double>("TrimmedMean", "Trimmed Mean Score", TypeUtil.DOUBLE, scores, relation.getDBIDs());
+ Relation<Double> scoreResult = new MaterializedRelation<>("TrimmedMean", "Trimmed Mean Score", TypeUtil.DOUBLE, scores, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0);
OutlierResult or = new OutlierResult(scoreMeta, scoreResult);
or.addChildResult(npred);
@@ -237,7 +237,7 @@ public class TrimmedMeanApproach<N> extends AbstractNeighborhoodOutlier<N> {
@Override
protected TrimmedMeanApproach<N> makeInstance() {
- return new TrimmedMeanApproach<N>(npredf, p);
+ return new TrimmedMeanApproach<>(npredf, p);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/AbstractPrecomputedNeighborhood.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/AbstractPrecomputedNeighborhood.java
index 2c706ce0..ef237928 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/AbstractPrecomputedNeighborhood.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/AbstractPrecomputedNeighborhood.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExtendedNeighborhood.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExtendedNeighborhood.java
index 4aa96b25..c4fc4407 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExtendedNeighborhood.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExtendedNeighborhood.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -202,7 +202,7 @@ public class ExtendedNeighborhood extends AbstractPrecomputedNeighborhood {
* @return Inner neighborhood.
*/
protected static <O> NeighborSetPredicate.Factory<O> getParameterInnerNeighborhood(Parameterization config) {
- final ObjectParameter<NeighborSetPredicate.Factory<O>> param = new ObjectParameter<NeighborSetPredicate.Factory<O>>(NEIGHBORHOOD_ID, NeighborSetPredicate.Factory.class);
+ final ObjectParameter<NeighborSetPredicate.Factory<O>> param = new ObjectParameter<>(NEIGHBORHOOD_ID, NeighborSetPredicate.Factory.class);
if(config.grab(param)) {
return param.instantiateClass(config);
}
@@ -233,7 +233,7 @@ public class ExtendedNeighborhood extends AbstractPrecomputedNeighborhood {
@Override
protected ExtendedNeighborhood.Factory<O> makeInstance() {
- return new ExtendedNeighborhood.Factory<O>(inner, steps);
+ return new ExtendedNeighborhood.Factory<>(inner, steps);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExternalNeighborhood.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExternalNeighborhood.java
index 01052c1f..96896bd8 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExternalNeighborhood.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExternalNeighborhood.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -36,6 +36,7 @@ import de.lmu.ifi.dbs.elki.data.ExternalID;
import de.lmu.ifi.dbs.elki.data.LabelList;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStore;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
@@ -119,8 +120,8 @@ public class ExternalNeighborhood extends AbstractPrecomputedNeighborhood {
}
@Override
- public NeighborSetPredicate instantiate(Relation<?> database) {
- DataStore<DBIDs> store = loadNeighbors(database);
+ public NeighborSetPredicate instantiate(Relation<?> relation) {
+ DataStore<DBIDs> store = loadNeighbors(relation.getDatabase(), relation);
ExternalNeighborhood neighborhood = new ExternalNeighborhood(store);
return neighborhood;
}
@@ -133,8 +134,8 @@ public class ExternalNeighborhood extends AbstractPrecomputedNeighborhood {
/**
* Method to load the external neighbors.
*/
- private DataStore<DBIDs> loadNeighbors(Relation<?> database) {
- final WritableDataStore<DBIDs> store = DataStoreUtil.makeStorage(database.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC | DataStoreFactory.HINT_TEMP, DBIDs.class);
+ private DataStore<DBIDs> loadNeighbors(Database database, Relation<?> relation) {
+ final WritableDataStore<DBIDs> store = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC | DataStoreFactory.HINT_TEMP, DBIDs.class);
if(LOG.isVerbose()) {
LOG.verbose("Loading external neighborhoods.");
@@ -146,11 +147,11 @@ public class ExternalNeighborhood extends AbstractPrecomputedNeighborhood {
// Build a map label/ExternalId -> DBID
// (i.e. a reverse index!)
// TODO: move this into the database layer to share?
- Map<String, DBID> lblmap = new HashMap<String, DBID>(database.size() << 1);
+ Map<String, DBID> lblmap = new HashMap<>(relation.size() << 1);
{
- Relation<LabelList> olq = database.getDatabase().getRelation(TypeUtil.LABELLIST);
- Relation<ExternalID> eidq = database.getDatabase().getRelation(TypeUtil.EXTERNALID);
- for(DBIDIter iditer = database.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ Relation<LabelList> olq = database.getRelation(TypeUtil.LABELLIST);
+ Relation<ExternalID> eidq = database.getRelation(TypeUtil.EXTERNALID);
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
if(eidq != null) {
ExternalID eid = eidq.get(iditer);
if(eid != null) {
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/NeighborSetPredicate.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/NeighborSetPredicate.java
index b52f8e91..25283d5c 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/NeighborSetPredicate.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/NeighborSetPredicate.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/PrecomputedKNearestNeighborNeighborhood.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/PrecomputedKNearestNeighborNeighborhood.java
index f6000ef0..c43ebba7 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/PrecomputedKNearestNeighborNeighborhood.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/PrecomputedKNearestNeighborNeighborhood.java
@@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2012
+Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -32,10 +32,10 @@ import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
@@ -119,7 +119,7 @@ public class PrecomputedKNearestNeighborNeighborhood<D extends Distance<D>> exte
// TODO: use bulk?
WritableDataStore<DBIDs> s = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, DBIDs.class);
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- KNNResult<D> neighbors = knnQuery.getKNNForDBID(iditer, k);
+ KNNList<D> neighbors = knnQuery.getKNNForDBID(iditer, k);
ArrayModifiableDBIDs neighbours = DBIDUtil.newArray(neighbors.size());
for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
neighbours.add(neighbor);
@@ -172,7 +172,7 @@ public class PrecomputedKNearestNeighborNeighborhood<D extends Distance<D>> exte
if(config.grab(kP)) {
k = kP.getValue();
}
- final ObjectParameter<DistanceFunction<? super O, D>> distP = new ObjectParameter<DistanceFunction<? super O, D>>(DISTANCEFUNCTION_ID, DistanceFunction.class);
+ final ObjectParameter<DistanceFunction<? super O, D>> distP = new ObjectParameter<>(DISTANCEFUNCTION_ID, DistanceFunction.class);
if(config.grab(distP)) {
distFunc = distP.instantiateClass(config);
}
@@ -180,7 +180,7 @@ public class PrecomputedKNearestNeighborNeighborhood<D extends Distance<D>> exte
@Override
protected PrecomputedKNearestNeighborNeighborhood.Factory<O, D> makeInstance() {
- return new PrecomputedKNearestNeighborNeighborhood.Factory<O, D>(k, distFunc);
+ return new PrecomputedKNearestNeighborNeighborhood.Factory<>(k, distFunc);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/package-info.java
index 47ca5ad2..fd51ca22 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/package-info.java
@@ -5,7 +5,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2012
+Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/LinearWeightedExtendedNeighborhood.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/LinearWeightedExtendedNeighborhood.java
index f1c68577..05bf2f18 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/LinearWeightedExtendedNeighborhood.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/LinearWeightedExtendedNeighborhood.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.weighted;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -89,7 +89,7 @@ public class LinearWeightedExtendedNeighborhood implements WeightedNeighborSetPr
@Override
public Collection<DoubleDBIDPair> getWeightedNeighbors(DBIDRef reference) {
ModifiableDBIDs seen = DBIDUtil.newHashSet();
- List<DoubleDBIDPair> result = new ArrayList<DoubleDBIDPair>();
+ List<DoubleDBIDPair> result = new ArrayList<>();
// Add starting object
result.add(DBIDUtil.newPair(computeWeight(0), reference));
@@ -194,7 +194,7 @@ public class LinearWeightedExtendedNeighborhood implements WeightedNeighborSetPr
* @return Inner neighborhood.
*/
protected static <O> NeighborSetPredicate.Factory<O> getParameterInnerNeighborhood(Parameterization config) {
- final ObjectParameter<NeighborSetPredicate.Factory<O>> param = new ObjectParameter<NeighborSetPredicate.Factory<O>>(NEIGHBORHOOD_ID, NeighborSetPredicate.Factory.class);
+ final ObjectParameter<NeighborSetPredicate.Factory<O>> param = new ObjectParameter<>(NEIGHBORHOOD_ID, NeighborSetPredicate.Factory.class);
if(config.grab(param)) {
return param.instantiateClass(config);
}
@@ -225,7 +225,7 @@ public class LinearWeightedExtendedNeighborhood implements WeightedNeighborSetPr
@Override
protected LinearWeightedExtendedNeighborhood.Factory<O> makeInstance() {
- return new LinearWeightedExtendedNeighborhood.Factory<O>(inner, steps);
+ return new LinearWeightedExtendedNeighborhood.Factory<>(inner, steps);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/UnweightedNeighborhoodAdapter.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/UnweightedNeighborhoodAdapter.java
index c179d81f..9bdb7d51 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/UnweightedNeighborhoodAdapter.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/UnweightedNeighborhoodAdapter.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.weighted;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -64,7 +64,7 @@ public class UnweightedNeighborhoodAdapter implements WeightedNeighborSetPredica
@Override
public Collection<DoubleDBIDPair> getWeightedNeighbors(DBIDRef reference) {
DBIDs neighbors = inner.getNeighborDBIDs(reference);
- ArrayList<DoubleDBIDPair> adapted = new ArrayList<DoubleDBIDPair>(neighbors.size());
+ ArrayList<DoubleDBIDPair> adapted = new ArrayList<>(neighbors.size());
for(DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
adapted.add(DBIDUtil.newPair(1.0, iter));
}
@@ -130,7 +130,7 @@ public class UnweightedNeighborhoodAdapter implements WeightedNeighborSetPredica
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- ObjectParameter<NeighborSetPredicate.Factory<O>> innerP = new ObjectParameter<NeighborSetPredicate.Factory<O>>(INNER_ID, NeighborSetPredicate.Factory.class);
+ ObjectParameter<NeighborSetPredicate.Factory<O>> innerP = new ObjectParameter<>(INNER_ID, NeighborSetPredicate.Factory.class);
if(config.grab(innerP)) {
inner = innerP.instantiateClass(config);
}
@@ -138,7 +138,7 @@ public class UnweightedNeighborhoodAdapter implements WeightedNeighborSetPredica
@Override
protected UnweightedNeighborhoodAdapter.Factory<O> makeInstance() {
- return new UnweightedNeighborhoodAdapter.Factory<O>(inner);
+ return new UnweightedNeighborhoodAdapter.Factory<>(inner);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/WeightedNeighborSetPredicate.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/WeightedNeighborSetPredicate.java
index 16d37587..ca0fa620 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/WeightedNeighborSetPredicate.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/WeightedNeighborSetPredicate.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.weighted;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/package-info.java
index 39165cfd..d7c7a797 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/package-info.java
@@ -5,7 +5,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2012
+Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/package-info.java
index 13bf3f25..5a65d8c1 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/package-info.java
@@ -5,7 +5,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2012
+Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OUTRES.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OUTRES.java
index 1965914d..ae04fef4 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OUTRES.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OUTRES.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.subspace;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -37,26 +37,28 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
-import de.lmu.ifi.dbs.elki.database.ids.DistanceDBIDPair;
-import de.lmu.ifi.dbs.elki.database.ids.DoubleDistanceDBIDPair;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDList;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDPair;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDList;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDPair;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDPairList;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.distance.ModifiableDoubleDistanceDBIDList;
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceDBIDList;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceDBIDResultIter;
import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.math.MeanVariance;
-import de.lmu.ifi.dbs.elki.math.statistics.EpanechnikovKernelDensityFunction;
-import de.lmu.ifi.dbs.elki.math.statistics.KernelDensityFunction;
import de.lmu.ifi.dbs.elki.math.statistics.distribution.GammaDistribution;
+import de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions.EpanechnikovKernelDensityFunction;
+import de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions.KernelDensityFunction;
import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
@@ -145,7 +147,7 @@ public class OUTRES<V extends NumberVector<?>> extends AbstractAlgorithm<Outlier
}
OutlierScoreMeta meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0., 1., 1.);
- OutlierResult outresResult = new OutlierResult(meta, new MaterializedRelation<Double>("OUTRES", "outres-score", TypeUtil.DOUBLE, ranks, relation.getDBIDs()));
+ OutlierResult outresResult = new OutlierResult(meta, new MaterializedRelation<>("OUTRES", "outres-score", TypeUtil.DOUBLE, ranks, relation.getDBIDs()));
return outresResult;
}
@@ -175,7 +177,7 @@ public class OUTRES<V extends NumberVector<?>> extends AbstractAlgorithm<Outlier
final DoubleDistance range = new DoubleDistance(adjustedEps * 2.);
RangeQuery<V, DoubleDistance> rq = QueryUtil.getRangeQuery(kernel.relation, df, range);
- DistanceDBIDResult<DoubleDistance> neighc = rq.getRangeForDBID(id, range);
+ DistanceDBIDList<DoubleDistance> neighc = rq.getRangeForDBID(id, range);
DoubleDistanceDBIDList neigh = refineRange(neighc, adjustedEps);
if(neigh.size() > 2) {
// Relevance test
@@ -183,7 +185,7 @@ public class OUTRES<V extends NumberVector<?>> extends AbstractAlgorithm<Outlier
final double density = kernel.subspaceDensity(subspace, neigh);
// Compute mean and standard deviation for densities of neighbors.
meanv.reset();
- for (DoubleDistanceDBIDResultIter neighbor = neigh.iter(); neighbor.valid(); neighbor.advance()) {
+ for (DoubleDistanceDBIDListIter neighbor = neigh.iter(); neighbor.valid(); neighbor.advance()) {
DoubleDistanceDBIDList n2 = subsetNeighborhoodQuery(neighc, neighbor, df, adjustedEps, kernel);
meanv.put(kernel.subspaceDensity(subspace, n2));
}
@@ -208,10 +210,10 @@ public class OUTRES<V extends NumberVector<?>> extends AbstractAlgorithm<Outlier
* @param adjustedEps New epsilon
* @return refined list
*/
- private DoubleDistanceDBIDList refineRange(DistanceDBIDResult<DoubleDistance> neighc, double adjustedEps) {
- DoubleDistanceDBIDList n = new DoubleDistanceDBIDList(neighc.size());
+ private DoubleDistanceDBIDList refineRange(DistanceDBIDList<DoubleDistance> neighc, double adjustedEps) {
+ ModifiableDoubleDistanceDBIDList n = new DoubleDistanceDBIDPairList(neighc.size());
// We don't have a guarantee for this list to be sorted
- for (DistanceDBIDResultIter<DoubleDistance> neighbor = neighc.iter(); neighbor.valid(); neighbor.advance()) {
+ for (DistanceDBIDListIter<DoubleDistance> neighbor = neighc.iter(); neighbor.valid(); neighbor.advance()) {
DistanceDBIDPair<DoubleDistance> p = neighbor.getDistancePair();
if(p instanceof DoubleDistanceDBIDPair) {
if(((DoubleDistanceDBIDPair) p).doubleDistance() <= adjustedEps) {
@@ -238,10 +240,10 @@ public class OUTRES<V extends NumberVector<?>> extends AbstractAlgorithm<Outlier
* @param kernel Kernel
* @return Neighbors of neighbor object
*/
- private DoubleDistanceDBIDList subsetNeighborhoodQuery(DistanceDBIDResult<DoubleDistance> neighc, DBIDRef dbid, PrimitiveDoubleDistanceFunction<? super V> df, double adjustedEps, KernelDensityEstimator kernel) {
- DoubleDistanceDBIDList n = new DoubleDistanceDBIDList(neighc.size());
+ private DoubleDistanceDBIDList subsetNeighborhoodQuery(DistanceDBIDList<DoubleDistance> neighc, DBIDRef dbid, PrimitiveDoubleDistanceFunction<? super V> df, double adjustedEps, KernelDensityEstimator kernel) {
+ ModifiableDoubleDistanceDBIDList n = new DoubleDistanceDBIDPairList(neighc.size());
V query = kernel.relation.get(dbid);
- for (DistanceDBIDResultIter<DoubleDistance> neighbor = neighc.iter(); neighbor.valid(); neighbor.advance()) {
+ for (DistanceDBIDListIter<DoubleDistance> neighbor = neighc.iter(); neighbor.valid(); neighbor.advance()) {
DistanceDBIDPair<DoubleDistance> p = neighbor.getDistancePair();
double dist = df.doubleDistance(query, kernel.relation.get(p));
if(dist <= adjustedEps) {
@@ -348,7 +350,7 @@ public class OUTRES<V extends NumberVector<?>> extends AbstractAlgorithm<Outlier
final double bandwidth = optimalBandwidth(subspace.cardinality());
double density = 0;
- for (DoubleDistanceDBIDResultIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ for (DoubleDistanceDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
double v = neighbor.doubleDistance() / bandwidth;
if(v < 1) {
density += 1 - (v * v);
@@ -426,7 +428,7 @@ public class OUTRES<V extends NumberVector<?>> extends AbstractAlgorithm<Outlier
@Override
protected OUTRES<O> makeInstance() {
- return new OUTRES<O>(eps);
+ return new OUTRES<>(eps);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OutRankS1.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OutRankS1.java
index 79243213..96c8875f 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OutRankS1.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OutRankS1.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.subspace;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -133,7 +133,7 @@ public class OutRankS1 extends AbstractAlgorithm<OutlierResult> implements Outli
}
}
- Relation<Double> scoreResult = new MaterializedRelation<Double>("OutRank-S1", "OUTRANK_S1", TypeUtil.DOUBLE, score, ids);
+ Relation<Double> scoreResult = new MaterializedRelation<>("OutRank-S1", "OUTRANK_S1", TypeUtil.DOUBLE, score, ids);
OutlierScoreMeta meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0, Double.POSITIVE_INFINITY);
OutlierResult res = new OutlierResult(meta, scoreResult);
res.addChildResult(clustering);
@@ -181,7 +181,7 @@ public class OutRankS1 extends AbstractAlgorithm<OutlierResult> implements Outli
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- ObjectParameter<SubspaceClusteringAlgorithm<? extends SubspaceModel<?>>> algP = new ObjectParameter<SubspaceClusteringAlgorithm<? extends SubspaceModel<?>>>(ALGORITHM_ID, SubspaceClusteringAlgorithm.class);
+ ObjectParameter<SubspaceClusteringAlgorithm<? extends SubspaceModel<?>>> algP = new ObjectParameter<>(ALGORITHM_ID, SubspaceClusteringAlgorithm.class);
if (config.grab(algP)) {
algorithm = algP.instantiateClass(config);
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/SOD.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/SOD.java
index 35a780cd..b2255e67 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/SOD.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/SOD.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.subspace;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -164,7 +164,7 @@ public class SOD<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
progress.incrementProcessed(LOG);
}
DBIDs knnList = getNearestNeighbors(relation, snnInstance, iter);
- SODModel<V> model = new SODModel<V>(relation, knnList, alpha, relation.get(iter));
+ SODModel<V> model = new SODModel<>(relation, knnList, alpha, relation.get(iter));
sod_models.put(iter, model);
minmax.put(model.getSod());
}
@@ -172,7 +172,7 @@ public class SOD<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
progress.ensureCompleted(LOG);
}
// combine results.
- Relation<SODModel<?>> models = new MaterializedRelation<SODModel<?>>("Subspace Outlier Model", "sod-outlier", new SimpleTypeInformation<SODModel<?>>(SODModel.class), sod_models, relation.getDBIDs());
+ Relation<SODModel<?>> models = new MaterializedRelation<>("Subspace Outlier Model", "sod-outlier", new SimpleTypeInformation<SODModel<?>>(SODModel.class), sod_models, relation.getDBIDs());
OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
OutlierResult sodResult = new OutlierResult(meta, new SODProxyScoreResult(models, relation.getDBIDs()));
// also add the models.
@@ -194,7 +194,7 @@ public class SOD<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
*/
private DBIDs getNearestNeighbors(Relation<V> relation, SimilarityQuery<V, D> simQ, DBIDRef queryObject) {
// similarityFunction.getPreprocessor().getParameters();
- Heap<DoubleDBIDPair> nearestNeighbors = new TiedTopBoundedHeap<DoubleDBIDPair>(knn);
+ Heap<DoubleDBIDPair> nearestNeighbors = new TiedTopBoundedHeap<>(knn);
for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
if (!DBIDUtil.equal(iter, queryObject)) {
double sim = simQ.similarity(queryObject, iter).doubleValue();
@@ -454,7 +454,7 @@ public class SOD<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final ObjectParameter<SimilarityFunction<V, D>> simP = new ObjectParameter<SimilarityFunction<V, D>>(SIM_ID, SimilarityFunction.class, SharedNearestNeighborSimilarityFunction.class);
+ final ObjectParameter<SimilarityFunction<V, D>> simP = new ObjectParameter<>(SIM_ID, SimilarityFunction.class, SharedNearestNeighborSimilarityFunction.class);
if (config.grab(simP)) {
similarityFunction = simP.instantiateClass(config);
}
@@ -474,7 +474,7 @@ public class SOD<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
@Override
protected SOD<V, D> makeInstance() {
- return new SOD<V, D>(knn, alpha, similarityFunction);
+ return new SOD<>(knn, alpha, similarityFunction);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/package-info.java
index 8b1c80df..c3951821 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/package-info.java
@@ -7,7 +7,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/ByLabelOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/ByLabelOutlier.java
index ae95abfa..d10eaef8 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/ByLabelOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/ByLabelOutlier.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.trivial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -117,7 +117,7 @@ public class ByLabelOutlier extends AbstractAlgorithm<OutlierResult> implements
final double score = (pattern.matcher(label).matches()) ? 1 : 0;
scores.putDouble(iditer, score);
}
- Relation<Double> scoreres = new MaterializedRelation<Double>("By label outlier scores", "label-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs());
+ Relation<Double> scoreres = new MaterializedRelation<>("By label outlier scores", "label-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs());
OutlierScoreMeta meta = new ProbabilisticOutlierScore();
return new OutlierResult(meta, scoreres);
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAllOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAllOutlier.java
index 35a85d51..44a7975f 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAllOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAllOutlier.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.trivial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -73,7 +73,7 @@ public class TrivialAllOutlier extends AbstractAlgorithm<OutlierResult> implemen
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
scores.putDouble(iditer, 1.0);
}
- Relation<Double> scoreres = new MaterializedRelation<Double>("Trivial all-outlier score", "all-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs());
+ Relation<Double> scoreres = new MaterializedRelation<>("Trivial all-outlier score", "all-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs());
OutlierScoreMeta meta = new ProbabilisticOutlierScore();
return new OutlierResult(meta, scoreres);
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAverageCoordinateOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAverageCoordinateOutlier.java
new file mode 100644
index 00000000..84e3ad41
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAverageCoordinateOutlier.java
@@ -0,0 +1,98 @@
+package de.lmu.ifi.dbs.elki.algorithm.outlier.trivial;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.math.Mean;
+import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+
+/**
+ * Trivial method that takes the average of all dimensions (for one-dimensional
+ * data that is just the actual value!) as outlier score. Mostly useful to
+ * convert a 1d data set into a score, e.g. in geodata.
+ *
+ * @author Erich Schubert
+ */
+public class TrivialAverageCoordinateOutlier extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
+ /**
+ * Our logger.
+ */
+ private static final Logging logger = Logging.getLogger(TrivialAverageCoordinateOutlier.class);
+
+ /**
+ * Constructor.
+ */
+ public TrivialAverageCoordinateOutlier() {
+ super();
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(TypeUtil.NUMBER_VECTOR_VARIABLE_LENGTH);
+ }
+
+ /**
+ * Run the actual algorithm.
+ *
+ * @param relation Relation
+ * @return Result
+ */
+ public OutlierResult run(Relation<? extends NumberVector<?>> relation) {
+ WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT);
+ DoubleMinMax minmax = new DoubleMinMax();
+ Mean m = new Mean();
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ m.reset();
+ NumberVector<?> nv = relation.get(iditer);
+ for (int i = 0; i < nv.getDimensionality(); i++) {
+ m.put(nv.doubleValue(i + 1));
+ }
+ final double score = m.getMean();
+ scores.putDouble(iditer, score);
+ minmax.put(score);
+ }
+ Relation<Double> scoreres = new MaterializedRelation<Double>("Trivial mean score", "mean-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs());
+ OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
+ return new OutlierResult(meta, scoreres);
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return logger;
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialGeneratedOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialGeneratedOutlier.java
index e4c3861f..285b00df 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialGeneratedOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialGeneratedOutlier.java
@@ -1,9 +1,10 @@
package de.lmu.ifi.dbs.elki.algorithm.outlier.trivial;
+
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -48,8 +49,11 @@ import de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessEqualConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
@@ -68,11 +72,6 @@ public class TrivialGeneratedOutlier extends AbstractAlgorithm<OutlierResult> im
private static final Logging LOG = Logging.getLogger(TrivialGeneratedOutlier.class);
/**
- * Expected share of outliers
- */
- public static final OptionID EXPECT_ID = new OptionID("modeloutlier.expect", "Expected amount of outliers, for making the scores more intuitive.");
-
- /**
* Expected share of outliers.
*/
double expect = 0.01;
@@ -96,19 +95,18 @@ public class TrivialGeneratedOutlier extends AbstractAlgorithm<OutlierResult> im
@Override
public TypeInformation[] getInputTypeRestriction() {
- return TypeUtil.array(TypeUtil.NUMBER_VECTOR_FIELD, new SimpleTypeInformation<Model>(Model.class), TypeUtil.GUESSED_LABEL);
+ return TypeUtil.array(TypeUtil.NUMBER_VECTOR_FIELD, new SimpleTypeInformation<>(Model.class), TypeUtil.GUESSED_LABEL);
}
@Override
public OutlierResult run(Database database) {
Relation<NumberVector<?>> vecs = database.getRelation(TypeUtil.NUMBER_VECTOR_FIELD);
- Relation<Model> models = database.getRelation(new SimpleTypeInformation<Model>(Model.class));
+ Relation<Model> models = database.getRelation(new SimpleTypeInformation<>(Model.class));
// Prefer a true class label
try {
Relation<?> relation = database.getRelation(TypeUtil.CLASSLABEL);
return run(models, vecs, relation);
- }
- catch(NoSupportedDataTypeException e) {
+ } catch (NoSupportedDataTypeException e) {
// Otherwise, try any labellike.
return run(models, vecs, database.getRelation(TypeUtil.GUESSED_LABEL));
}
@@ -125,54 +123,62 @@ public class TrivialGeneratedOutlier extends AbstractAlgorithm<OutlierResult> im
public OutlierResult run(Relation<Model> models, Relation<NumberVector<?>> vecs, Relation<?> labels) {
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(models.getDBIDs(), DataStoreFactory.HINT_HOT);
- // Adjustment constant
- final double minscore = expect / (expect + 1);
-
- HashSet<GeneratorSingleCluster> generators = new HashSet<GeneratorSingleCluster>();
- for(DBIDIter iditer = models.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ HashSet<GeneratorSingleCluster> generators = new HashSet<>();
+ for (DBIDIter iditer = models.iterDBIDs(); iditer.valid(); iditer.advance()) {
Model model = models.get(iditer);
- if(model instanceof GeneratorSingleCluster) {
+ if (model instanceof GeneratorSingleCluster) {
generators.add((GeneratorSingleCluster) model);
}
}
- if(generators.size() == 0) {
+ if (generators.size() == 0) {
LOG.warning("No generator models found for dataset - all points will be considered outliers.");
}
+ for (GeneratorSingleCluster gen : generators) {
+ for (int i = 0; i < gen.getDim(); i++) {
+ Distribution dist = gen.getDistribution(i);
+ if (!(dist instanceof NormalDistribution)) {
+ throw new AbortException("TrivialGeneratedOutlier currently only supports normal distributions, got: " + dist);
+ }
+ }
+ }
- for(DBIDIter iditer = models.iterDBIDs(); iditer.valid(); iditer.advance()) {
- double score = 0.0;
+ for (DBIDIter iditer = models.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ double score = 1.;
// Convert to a math vector
Vector v = vecs.get(iditer).getColumnVector();
- for(GeneratorSingleCluster gen : generators) {
+ for (GeneratorSingleCluster gen : generators) {
Vector tv = v;
// Transform backwards
- if(gen.getTransformation() != null) {
+ if (gen.getTransformation() != null) {
tv = gen.getTransformation().applyInverse(v);
}
final int dim = tv.getDimensionality();
double lensq = 0.0;
int norm = 0;
- for(int i = 0; i < dim; i++) {
+ for (int i = 0; i < dim; i++) {
Distribution dist = gen.getDistribution(i);
- if(dist instanceof NormalDistribution) {
+ if (dist instanceof NormalDistribution) {
NormalDistribution d = (NormalDistribution) dist;
double delta = (tv.get(i) - d.getMean()) / d.getStddev();
lensq += delta * delta;
norm += 1;
+ } else {
+ throw new AbortException("TrivialGeneratedOutlier currently only supports normal distributions, got: " + dist);
}
}
- if(norm > 0) {
+ if (norm > 0.) {
// The squared distances are ChiSquared distributed
- score = Math.max(score, 1 - ChiSquaredDistribution.cdf(lensq, norm));
+ score = Math.min(score, ChiSquaredDistribution.cdf(lensq, norm));
+ } else {
+ score = 0.;
}
}
- // score inversion.
- score = expect / (expect + score);
- // adjust to 0 to 1 range:
- score = (score - minscore) / (1 - minscore);
+ if (expect < 1) {
+ score = expect * score / (1 - score + expect);
+ }
scores.putDouble(iditer, score);
}
- Relation<Double> scoreres = new MaterializedRelation<Double>("Model outlier scores", "model-outlier", TypeUtil.DOUBLE, scores, models.getDBIDs());
+ Relation<Double> scoreres = new MaterializedRelation<>("Model outlier scores", "model-outlier", TypeUtil.DOUBLE, scores, models.getDBIDs());
OutlierScoreMeta meta = new ProbabilisticOutlierScore(0., 1.);
return new OutlierResult(meta, scoreres);
}
@@ -193,13 +199,20 @@ public class TrivialGeneratedOutlier extends AbstractAlgorithm<OutlierResult> im
/**
* Expected share of outliers
*/
+ public static final OptionID EXPECT_ID = new OptionID("modeloutlier.expect", "Expected amount of outliers, for making the scores more intuitive. When the value is 1, the CDF will be given instead.");
+
+ /**
+ * Expected share of outliers
+ */
double expect;
-
+
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
DoubleParameter expectP = new DoubleParameter(EXPECT_ID, 0.01);
- if(config.grab(expectP)) {
+ expectP.addConstraint(new GreaterConstraint(0.0));
+ expectP.addConstraint(new LessEqualConstraint(1.0));
+ if (config.grab(expectP)) {
expect = expectP.getValue();
}
}
@@ -209,4 +222,4 @@ public class TrivialGeneratedOutlier extends AbstractAlgorithm<OutlierResult> im
return new TrivialGeneratedOutlier(expect);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialNoOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialNoOutlier.java
index 695ff112..ff3d0296 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialNoOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialNoOutlier.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.trivial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -73,7 +73,7 @@ public class TrivialNoOutlier extends AbstractAlgorithm<OutlierResult> implement
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
scores.putDouble(iditer, 0.0);
}
- Relation<Double> scoreres = new MaterializedRelation<Double>("Trivial no-outlier score", "no-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs());
+ Relation<Double> scoreres = new MaterializedRelation<>("Trivial no-outlier score", "no-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs());
OutlierScoreMeta meta = new ProbabilisticOutlierScore();
return new OutlierResult(meta, scoreres);
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/package-info.java
index d49d3565..c927cae4 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/package-info.java
@@ -7,7 +7,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2012
+Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/package-info.java
index 44eb2aba..ba8a7b56 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/package-info.java
@@ -13,7 +13,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2012
+Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/statistics/AddSingleScale.java b/src/de/lmu/ifi/dbs/elki/algorithm/statistics/AddSingleScale.java
index 159fb691..cbae17ca 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/statistics/AddSingleScale.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/statistics/AddSingleScale.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.statistics;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/statistics/AveragePrecisionAtK.java b/src/de/lmu/ifi/dbs/elki/algorithm/statistics/AveragePrecisionAtK.java
index e8165afc..1b87a015 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/statistics/AveragePrecisionAtK.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/statistics/AveragePrecisionAtK.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.statistics;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -27,6 +27,7 @@ import java.util.ArrayList;
import java.util.Collection;
import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.data.ClassLabel;
import de.lmu.ifi.dbs.elki.data.DoubleVector;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
@@ -35,23 +36,23 @@ import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.math.MeanVariance;
import de.lmu.ifi.dbs.elki.result.CollectionResult;
-import de.lmu.ifi.dbs.elki.result.HistogramResult;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessEqualConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.LongParameter;
@@ -60,6 +61,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.LongParameter;
* at k, when ranking the objects by distance.
*
* @author Erich Schubert
+ *
* @param <V> Vector type
* @param <D> Distance type
*/
@@ -73,38 +75,51 @@ public class AveragePrecisionAtK<V extends Object, D extends NumberDistance<D, ?
* The parameter k - the number of neighbors to retrieve.
*/
private int k;
-
+
/**
* Relative number of object to use in sampling.
*/
private double sampling = 1.0;
-
+
/**
* Random sampling seed.
*/
private Long seed = null;
/**
+ * Include query object in evaluation.
+ */
+ private boolean includeSelf;
+
+ /**
* Constructor.
*
* @param distanceFunction Distance function
* @param k K parameter
* @param sampling Sampling rate
* @param seed Random sampling seed (may be null)
+ * @param includeSelf Include query object in evaluation
*/
- public AveragePrecisionAtK(DistanceFunction<? super V, D> distanceFunction, int k, double sampling, Long seed) {
+ public AveragePrecisionAtK(DistanceFunction<? super V, D> distanceFunction, int k, double sampling, Long seed, boolean includeSelf) {
super(distanceFunction);
this.k = k;
this.sampling = sampling;
this.seed = seed;
+ this.includeSelf = includeSelf;
}
- @Override
- public HistogramResult<DoubleVector> run(Database database) {
- final Relation<V> relation = database.getRelation(getInputTypeRestriction()[0]);
- final Relation<Object> lrelation = database.getRelation(getInputTypeRestriction()[1]);
+ /**
+ * Run the algorithm
+ *
+ * @param database Database to run on (for kNN queries)
+ * @param relation Relation for distance computations
+ * @param lrelation Relation for class label comparison
+ * @return Vectors containing mean and standard deviation.
+ */
+ public CollectionResult<DoubleVector> run(Database database, Relation<V> relation, Relation<ClassLabel> lrelation) {
final DistanceQuery<V, D> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
- final KNNQuery<V, D> knnQuery = database.getKNNQuery(distQuery, k);
+ final int qk = k + (includeSelf ? 0 : 1);
+ final KNNQuery<V, D> knnQuery = database.getKNNQuery(distQuery, qk);
MeanVariance[] mvs = MeanVariance.newArray(k);
@@ -115,53 +130,56 @@ public class AveragePrecisionAtK<V extends Object, D extends NumberDistance<D, ?
} else {
ids = relation.getDBIDs();
}
-
- if(LOG.isVerbose()) {
+
+ if (LOG.isVerbose()) {
LOG.verbose("Processing points...");
}
FiniteProgress objloop = LOG.isVerbose() ? new FiniteProgress("Computing nearest neighbors", ids.size(), LOG) : null;
// sort neighbors
- for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
- KNNResult<D> knn = knnQuery.getKNNForDBID(iter, k);
+ for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ KNNList<D> knn = knnQuery.getKNNForDBID(iter, qk);
Object label = lrelation.get(iter);
int positive = 0, i = 0;
- for (DBIDIter ri = knn.iter(); i < k && ri.valid(); ri.advance(), i++) {
+ for (DBIDIter ri = knn.iter(); i < k && ri.valid(); ri.advance()) {
+ if (!includeSelf && DBIDUtil.equal(iter, ri)) {
+ continue;
+ }
Object olabel = lrelation.get(ri);
- if(label == null) {
- if(olabel == null) {
+ if (label == null) {
+ if (olabel == null) {
positive += 1;
}
- }
- else {
- if(label.equals(olabel)) {
+ } else {
+ if (label.equals(olabel)) {
positive += 1;
}
}
final double precision = positive / (double) (i + 1);
mvs[i].put(precision);
+ i++;
}
- if(objloop != null) {
+ if (objloop != null) {
objloop.incrementProcessed(LOG);
}
}
- if(objloop != null) {
+ if (objloop != null) {
objloop.ensureCompleted(LOG);
}
// Collections.sort(results);
// Transform Histogram into a Double Vector array.
- Collection<DoubleVector> res = new ArrayList<DoubleVector>(k);
- for(int i = 0; i < k; i++) {
+ Collection<DoubleVector> res = new ArrayList<>(k);
+ for (int i = 0; i < k; i++) {
DoubleVector row = new DoubleVector(new double[] { mvs[i].getMean(), mvs[i].getSampleStddev() });
res.add(row);
}
- return new HistogramResult<DoubleVector>("Average Precision", "average-precision", res);
+ return new CollectionResult<>("Average Precision", "average-precision", res);
}
@Override
public TypeInformation[] getInputTypeRestriction() {
- return TypeUtil.array(getDistanceFunction().getInputTypeRestriction(), TypeUtil.GUESSED_LABEL);
+ return TypeUtil.array(getDistanceFunction().getInputTypeRestriction(), TypeUtil.CLASSLABEL);
}
@Override
@@ -193,26 +211,36 @@ public class AveragePrecisionAtK<V extends Object, D extends NumberDistance<D, ?
public static final OptionID SEED_ID = new OptionID("avep.sampling-seed", "Random seed for deterministic sampling.");
/**
+ * Parameter to include the query object.
+ */
+ public static final OptionID INCLUDESELF_ID = new OptionID("avep.includeself", "Include the query object in the evaluation.");
+
+ /**
* Neighborhood size.
*/
protected int k = 20;
-
+
/**
* Relative amount of data to sample.
*/
protected double sampling = 1.0;
-
+
/**
* Random sampling seed.
*/
protected Long seed = null;
+ /**
+ * Include query object in evaluation.
+ */
+ protected boolean includeSelf;
+
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
final IntParameter kP = new IntParameter(K_ID);
kP.addConstraint(new GreaterEqualConstraint(2));
- if(config.grab(kP)) {
+ if (config.grab(kP)) {
k = kP.getValue();
}
final DoubleParameter samplingP = new DoubleParameter(SAMPLING_ID);
@@ -227,11 +255,15 @@ public class AveragePrecisionAtK<V extends Object, D extends NumberDistance<D, ?
if (config.grab(rndP)) {
seed = rndP.getValue();
}
+ final Flag includeP = new Flag(INCLUDESELF_ID);
+ if (config.grab(includeP)) {
+ includeSelf = includeP.isTrue();
+ }
}
@Override
protected AveragePrecisionAtK<V, D> makeInstance() {
- return new AveragePrecisionAtK<V, D>(distanceFunction, k, sampling, seed);
+ return new AveragePrecisionAtK<>(distanceFunction, k, sampling, seed, includeSelf);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/statistics/DistanceStatisticsWithClasses.java b/src/de/lmu/ifi/dbs/elki/algorithm/statistics/DistanceStatisticsWithClasses.java
index ebf588b6..3c8e1635 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/statistics/DistanceStatisticsWithClasses.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/statistics/DistanceStatisticsWithClasses.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.statistics;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -284,7 +284,7 @@ public class DistanceStatisticsWithClasses<O, D extends NumberDistance<D, ?>> ex
}
long bnum = inum + onum;
- Collection<DoubleVector> binstat = new ArrayList<DoubleVector>(numbin);
+ Collection<DoubleVector> binstat = new ArrayList<>(numbin);
for (ObjHistogram.Iter<long[]> iter = histogram.iter(); iter.valid(); iter.advance()) {
final long[] value = iter.getValue();
final double icof = (inum == 0) ? 0 : ((double) value[0]) / inum / histogram.getBinsize();
@@ -294,7 +294,7 @@ public class DistanceStatisticsWithClasses<O, D extends NumberDistance<D, ?>> ex
DoubleVector row = new DoubleVector(new double[] { iter.getCenter(), icof, icaf, ocof, ocaf });
binstat.add(row);
}
- HistogramResult<DoubleVector> result = new HistogramResult<DoubleVector>("Distance Histogram", "distance-histogram", binstat);
+ HistogramResult<DoubleVector> result = new HistogramResult<>("Distance Histogram", "distance-histogram", binstat);
result.addHeader("Absolute minimum distance (abs): " + gminmax.getMin());
result.addHeader("Absolute maximum distance (abs): " + gminmax.getMax());
@@ -319,8 +319,8 @@ public class DistanceStatisticsWithClasses<O, D extends NumberDistance<D, ?>> ex
Random rnd = new Random();
// estimate minimum and maximum.
int k = (int) Math.max(25, Math.pow(relation.size(), 0.2));
- TreeSet<DoubleDBIDPair> minhotset = new TreeSet<DoubleDBIDPair>();
- TreeSet<DoubleDBIDPair> maxhotset = new TreeSet<DoubleDBIDPair>(Collections.reverseOrder());
+ TreeSet<DoubleDBIDPair> minhotset = new TreeSet<>();
+ TreeSet<DoubleDBIDPair> maxhotset = new TreeSet<>(Collections.reverseOrder());
int randomsize = (int) Math.max(25, Math.pow(relation.size(), 0.2));
double rprob = ((double) randomsize) / size;
@@ -336,7 +336,7 @@ public class DistanceStatisticsWithClasses<O, D extends NumberDistance<D, ?>> ex
maxhotset.add(DBIDUtil.newPair(Double.MIN_VALUE, firstid));
for (; iter.valid(); iter.advance()) {
// generate candidates for min distance.
- ArrayList<DoubleDBIDPair> np = new ArrayList<DoubleDBIDPair>(k * 2 + randomsize * 2);
+ ArrayList<DoubleDBIDPair> np = new ArrayList<>(k * 2 + randomsize * 2);
for (DoubleDBIDPair pair : minhotset) {
// skip the object itself
if (DBIDUtil.equal(iter, pair)) {
@@ -355,7 +355,7 @@ public class DistanceStatisticsWithClasses<O, D extends NumberDistance<D, ?>> ex
shrinkHeap(minhotset, k);
// generate candidates for max distance.
- ArrayList<DoubleDBIDPair> np2 = new ArrayList<DoubleDBIDPair>(k * 2 + randomsize * 2);
+ ArrayList<DoubleDBIDPair> np2 = new ArrayList<>(k * 2 + randomsize * 2);
for (DoubleDBIDPair pair : minhotset) {
// skip the object itself
if (DBIDUtil.equal(iter, pair)) {
@@ -479,7 +479,7 @@ public class DistanceStatisticsWithClasses<O, D extends NumberDistance<D, ?>> ex
sampling = samplingF.getValue();
}
- ArrayList<Parameter<?>> exclusive = new ArrayList<Parameter<?>>();
+ ArrayList<Parameter<?>> exclusive = new ArrayList<>();
exclusive.add(exactF);
exclusive.add(samplingF);
config.checkConstraint(new OnlyOneIsAllowedToBeSetGlobalConstraint(exclusive));
@@ -487,7 +487,7 @@ public class DistanceStatisticsWithClasses<O, D extends NumberDistance<D, ?>> ex
@Override
protected DistanceStatisticsWithClasses<O, D> makeInstance() {
- return new DistanceStatisticsWithClasses<O, D>(distanceFunction, numbin, exact, sampling);
+ return new DistanceStatisticsWithClasses<>(distanceFunction, numbin, exact, sampling);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/statistics/EvaluateRankingQuality.java b/src/de/lmu/ifi/dbs/elki/algorithm/statistics/EvaluateRankingQuality.java
index 1643d378..76e5ef66 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/statistics/EvaluateRankingQuality.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/statistics/EvaluateRankingQuality.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.statistics;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -41,22 +41,24 @@ import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair;
+import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.evaluation.roc.ROC;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.math.MathUtil;
+import de.lmu.ifi.dbs.elki.math.MeanVariance;
import de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
import de.lmu.ifi.dbs.elki.result.CollectionResult;
import de.lmu.ifi.dbs.elki.result.HistogramResult;
import de.lmu.ifi.dbs.elki.utilities.datastructures.histogram.MeanVarianceStaticHistogram;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.histogram.ObjHistogram;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
@@ -126,8 +128,8 @@ public class EvaluateRankingQuality<V extends NumberVector<?>, D extends NumberD
Collection<Cluster<Model>> split = (new ByLabelOrAllInOneClustering()).run(database).getAllClusters();
// Compute cluster averages and covariance matrix
- HashMap<Cluster<?>, Vector> averages = new HashMap<Cluster<?>, Vector>(split.size());
- HashMap<Cluster<?>, Matrix> covmats = new HashMap<Cluster<?>, Matrix>(split.size());
+ HashMap<Cluster<?>, Vector> averages = new HashMap<>(split.size());
+ HashMap<Cluster<?>, Matrix> covmats = new HashMap<>(split.size());
for (Cluster<?> clus : split) {
CovarianceMatrix covmat = CovarianceMatrix.make(relation, clus.getIDs());
averages.put(clus, covmat.getMeanVector());
@@ -143,7 +145,7 @@ public class EvaluateRankingQuality<V extends NumberVector<?>, D extends NumberD
// sort neighbors
for (Cluster<?> clus : split) {
- ArrayList<DoubleDBIDPair> cmem = new ArrayList<DoubleDBIDPair>(clus.size());
+ ArrayList<DoubleDBIDPair> cmem = new ArrayList<>(clus.size());
Vector av = averages.get(clus);
Matrix covm = covmats.get(clus);
@@ -154,7 +156,7 @@ public class EvaluateRankingQuality<V extends NumberVector<?>, D extends NumberD
Collections.sort(cmem);
for (int ind = 0; ind < cmem.size(); ind++) {
- KNNResult<D> knn = knnQuery.getKNNForDBID(cmem.get(ind), relation.size());
+ KNNList<D> knn = knnQuery.getKNNForDBID(cmem.get(ind), relation.size());
double result = ROC.computeROCAUCDistanceResult(relation.size(), clus, knn);
hist.put(((double) ind) / clus.size(), result);
@@ -170,12 +172,12 @@ public class EvaluateRankingQuality<V extends NumberVector<?>, D extends NumberD
// Collections.sort(results);
// Transform Histogram into a Double Vector array.
- Collection<DoubleVector> res = new ArrayList<DoubleVector>(relation.size());
- for (MeanVarianceStaticHistogram.Iter iter = hist.iter(); iter.valid(); iter.advance()) {
+ Collection<DoubleVector> res = new ArrayList<>(relation.size());
+ for (ObjHistogram.Iter<MeanVariance> iter = hist.iter(); iter.valid(); iter.advance()) {
DoubleVector row = new DoubleVector(new double[] { iter.getCenter(), iter.getValue().getCount(), iter.getValue().getMean(), iter.getValue().getSampleVariance() });
res.add(row);
}
- return new HistogramResult<DoubleVector>("Ranking Quality Histogram", "ranking-histogram", res);
+ return new HistogramResult<>("Ranking Quality Histogram", "ranking-histogram", res);
}
@Override
@@ -213,7 +215,7 @@ public class EvaluateRankingQuality<V extends NumberVector<?>, D extends NumberD
@Override
protected EvaluateRankingQuality<V, D> makeInstance() {
- return new EvaluateRankingQuality<V, D>(distanceFunction, numbins);
+ return new EvaluateRankingQuality<>(distanceFunction, numbins);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/statistics/RankingQualityHistogram.java b/src/de/lmu/ifi/dbs/elki/algorithm/statistics/RankingQualityHistogram.java
index 6d8167a5..58018029 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/statistics/RankingQualityHistogram.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/statistics/RankingQualityHistogram.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.statistics;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -35,11 +35,11 @@ import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.evaluation.roc.ROC;
import de.lmu.ifi.dbs.elki.logging.Logging;
@@ -126,7 +126,7 @@ public class RankingQualityHistogram<O, D extends NumberDistance<D, ?>> extends
// sort neighbors
for (Cluster<?> clus : split) {
for (DBIDIter iter = clus.getIDs().iter(); iter.valid(); iter.advance()) {
- KNNResult<D> knn = knnQuery.getKNNForDBID(iter, relation.size());
+ KNNList<D> knn = knnQuery.getKNNForDBID(iter, relation.size());
double result = ROC.computeROCAUCDistanceResult(relation.size(), clus, knn);
mv.put(result);
@@ -142,12 +142,12 @@ public class RankingQualityHistogram<O, D extends NumberDistance<D, ?>> extends
}
// Transform Histogram into a Double Vector array.
- Collection<DoubleVector> res = new ArrayList<DoubleVector>(relation.size());
+ Collection<DoubleVector> res = new ArrayList<>(relation.size());
for (DoubleStaticHistogram.Iter iter = hist.iter(); iter.valid(); iter.advance()) {
DoubleVector row = new DoubleVector(new double[] { iter.getCenter(), iter.getValue() });
res.add(row);
}
- HistogramResult<DoubleVector> result = new HistogramResult<DoubleVector>("Ranking Quality Histogram", "ranking-histogram", res);
+ HistogramResult<DoubleVector> result = new HistogramResult<>("Ranking Quality Histogram", "ranking-histogram", res);
result.addHeader("Mean: " + mv.getMean() + " Variance: " + mv.getSampleVariance());
return result;
}
@@ -187,7 +187,7 @@ public class RankingQualityHistogram<O, D extends NumberDistance<D, ?>> extends
@Override
protected RankingQualityHistogram<O, D> makeInstance() {
- return new RankingQualityHistogram<O, D>(distanceFunction, numbins);
+ return new RankingQualityHistogram<>(distanceFunction, numbins);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/statistics/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/statistics/package-info.java
index e706d586..8e566101 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/statistics/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/statistics/package-info.java
@@ -8,7 +8,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2012
+Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team