summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/algorithm
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/algorithm')
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/APRIORI.java41
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/AbstractAlgorithm.java12
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/AbstractDistanceBasedAlgorithm.java7
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/AbstractPrimitiveDistanceBasedAlgorithm.java5
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/Algorithm.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/DependencyDerivator.java137
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/DummyAlgorithm.java9
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/KNNDistanceOrder.java24
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/KNNJoin.java201
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/MaterializeDistances.java16
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/NullAlgorithm.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/benchmark/KNNBenchmarkAlgorithm.java303
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/benchmark/RangeQueryBenchmarkAlgorithm.java357
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/benchmark/package-info.java30
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedClustering.java87
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedDBSCAN.java45
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/DBSCAN.java71
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/DeLiClu.java43
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/EM.java150
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/NaiveMeanShiftClustering.java279
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICS.java60
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICSXi.java38
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/SLINK.java851
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/SNNClustering.java52
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/CASH.java367
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/COPAC.java37
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ERiC.java85
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/FourC.java8
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/HiCO.java99
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/LMCLUS.java200
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ORCLUS.java212
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/CASHInterval.java47
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/CASHIntervalSplit.java19
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/ParameterizationFunction.java530
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/CorePredicate.java5
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/EpsilonNeighborPredicate.java92
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/GeneralizedDBSCAN.java59
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/MinPtsCorePredicate.java45
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/NeighborPredicate.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeans.java36
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeansInitialization.java24
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/FirstKInitialMeans.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeans.java8
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansInitialization.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansLloyd.java87
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansMacQueen.java85
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansPlusPlusInitialMeans.java19
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMediansLloyd.java67
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsEM.java71
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsPAM.java95
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/PAMInitialMeans.java14
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyChosenInitialMeans.java13
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyGeneratedInitialMeans.java23
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/package-info.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/CLIQUE.java156
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DiSH.java259
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/HiSC.java17
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PROCLUS.java310
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PreDeCon.java8
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SUBCLU.java132
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUESubspace.java18
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUEUnit.java22
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelClustering.java13
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelHierarchicalClustering.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByModelClustering.java9
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/TrivialAllInOne.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/TrivialAllNoise.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/ABOD.java296
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/ALOCI.java112
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractAggarwalYuOutlier.java93
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractDBOutlier.java10
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuEvolutionary.java166
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuNaive.java33
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/COP.java385
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierDetection.java28
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierScore.java8
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/EMOutlier.java8
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianModel.java18
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianUniformMixture.java26
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/HilOut.java117
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/INFLO.java113
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNOutlier.java86
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNWeightOutlier.java37
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/LDF.java342
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/LDOF.java33
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOCI.java49
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOF.java165
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/LoOP.java193
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/OPTICSOF.java22
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/OnlineLOF.java57
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/OutlierAlgorithm.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/ReferenceBasedOutlierDetection.java40
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleCOP.java236
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleKernelDensityLOF.java284
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleLOF.java249
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/ExternalDoubleOutlierScore.java16
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java172
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/HiCS.java307
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/RescaleMetaOutlierAlgorithm.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/SimpleOutlierEnsemble.java222
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/package-info.java5
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/package-info.java7
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractDistanceBasedSpatialOutlier.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractNeighborhoodOutlier.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuGLSBackwardSearchAlgorithm.java115
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMeanMultipleAttributes.java31
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianAlgorithm.java96
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianMultipleAttributes.java33
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMoranScatterplotOutlier.java31
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuRandomWalkEC.java182
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuScatterplotOutlier.java40
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuZTestOutlier.java35
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SLOM.java39
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SOF.java23
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/TrimmedMeanApproach.java66
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/AbstractPrecomputedNeighborhood.java9
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExtendedNeighborhood.java37
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExternalNeighborhood.java37
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/NeighborSetPredicate.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/PrecomputedKNearestNeighborNeighborhood.java21
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/LinearWeightedExtendedNeighborhood.java29
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/UnweightedNeighborhoodAdapter.java14
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/WeightedNeighborSetPredicate.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OUTRES.java94
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OutRankS1.java27
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/SOD.java113
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/ByLabelOutlier.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAllOutlier.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialGeneratedOutlier.java12
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialNoOutlier.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/package-info.java5
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/statistics/AddSingleScale.java92
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/statistics/AveragePrecisionAtK.java70
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/statistics/DistanceStatisticsWithClasses.java302
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/statistics/EvaluateRankingQuality.java89
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/statistics/RankingQualityHistogram.java64
136 files changed, 7731 insertions, 4095 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/APRIORI.java b/src/de/lmu/ifi/dbs/elki/algorithm/APRIORI.java
index 65339257..70706df8 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/APRIORI.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/APRIORI.java
@@ -26,7 +26,7 @@ package de.lmu.ifi.dbs.elki.algorithm;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
-import java.util.Hashtable;
+import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -36,16 +36,16 @@ import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.result.AprioriResult;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.IntervalConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessEqualConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.OneMustBeSetGlobalConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.OnlyOneIsAllowedToBeSetGlobalConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
@@ -72,14 +72,14 @@ public class APRIORI extends AbstractAlgorithm<AprioriResult> {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(APRIORI.class);
+ private static final Logging LOG = Logging.getLogger(APRIORI.class);
/**
* Optional parameter to specify the threshold for minimum frequency, must be
* a double greater than or equal to 0 and less than or equal to 1.
* Alternatively to parameter {@link #MINSUPP_ID}).
*/
- public static final OptionID MINFREQ_ID = OptionID.getOrCreateOptionID("apriori.minfreq", "Threshold for minimum frequency as percentage value " + "(alternatively to parameter apriori.minsupp).");
+ public static final OptionID MINFREQ_ID = new OptionID("apriori.minfreq", "Threshold for minimum frequency as percentage value " + "(alternatively to parameter apriori.minsupp).");
/**
* Parameter to specify the threshold for minimum support as minimally
@@ -88,7 +88,7 @@ public class APRIORI extends AbstractAlgorithm<AprioriResult> {
* {@link #MINSUPP_ID} is slightly preferable over setting {@link #MINFREQ_ID}
* in terms of efficiency.
*/
- public static final OptionID MINSUPP_ID = OptionID.getOrCreateOptionID("apriori.minsupp", "Threshold for minimum support as minimally required number of transactions " + "(alternatively to parameter apriori.minfreq" + " - setting apriori.minsupp is slightly preferable over setting " + "apriori.minfreq in terms of efficiency).");
+ public static final OptionID MINSUPP_ID = new OptionID("apriori.minsupp", "Threshold for minimum support as minimally required number of transactions " + "(alternatively to parameter apriori.minfreq" + " - setting apriori.minsupp is slightly preferable over setting " + "apriori.minfreq in terms of efficiency).");
/**
* Holds the value of {@link #MINFREQ_ID}.
@@ -128,13 +128,13 @@ public class APRIORI extends AbstractAlgorithm<AprioriResult> {
* @return the AprioriResult learned by this APRIORI
*/
public AprioriResult run(Database database, Relation<BitVector> relation) {
- Map<BitSet, Integer> support = new Hashtable<BitSet, Integer>();
+ Map<BitSet, Integer> support = new HashMap<BitSet, Integer>();
List<BitSet> solution = new ArrayList<BitSet>();
final int size = relation.size();
if(size > 0) {
int dim;
try {
- dim = DatabaseUtil.dimensionality(relation);
+ dim = RelationUtil.dimensionality(relation);
}
catch(UnsupportedOperationException e) {
dim = 0;
@@ -145,9 +145,9 @@ public class APRIORI extends AbstractAlgorithm<AprioriResult> {
candidates[i].set(i);
}
while(candidates.length > 0) {
- StringBuffer msg = new StringBuffer();
+ StringBuilder msg = new StringBuilder();
BitSet[] frequentItemsets = frequentItemsets(support, candidates, relation);
- if(logger.isVerbose()) {
+ if(LOG.isVerbose()) {
msg.append("\ncandidates").append(Arrays.asList(candidates));
msg.append("\nfrequentItemsets").append(Arrays.asList(frequentItemsets));
}
@@ -156,9 +156,9 @@ public class APRIORI extends AbstractAlgorithm<AprioriResult> {
}
BitSet[] joined = join(frequentItemsets);
candidates = prune(support, joined, size);
- if(logger.isVerbose()) {
+ if(LOG.isVerbose()) {
msg.append("\ncandidates after pruning").append(Arrays.asList(candidates));
- logger.verbose(msg.toString());
+ LOG.verbose(msg.toString());
}
}
}
@@ -300,7 +300,7 @@ public class APRIORI extends AbstractAlgorithm<AprioriResult> {
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -312,32 +312,35 @@ public class APRIORI extends AbstractAlgorithm<AprioriResult> {
*/
public static class Parameterizer extends AbstractParameterizer {
/**
- * Parameter for minFreq
+ * Parameter for minFreq.
*/
protected Double minfreq = null;
/**
- * Parameter for minSupp
+ * Parameter for minSupp.
*/
protected Integer minsupp = null;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- DoubleParameter minfreqP = new DoubleParameter(MINFREQ_ID, true);
- minfreqP.addConstraint(new IntervalConstraint(0, IntervalConstraint.IntervalBoundary.CLOSE, 1, IntervalConstraint.IntervalBoundary.CLOSE));
+ DoubleParameter minfreqP = new DoubleParameter(MINFREQ_ID);
+ minfreqP.setOptional(true);
+ minfreqP.addConstraint(new GreaterEqualConstraint(0));
+ minfreqP.addConstraint(new LessEqualConstraint(1));
if(config.grab(minfreqP)) {
minfreq = minfreqP.getValue();
}
- IntParameter minsuppP = new IntParameter(MINSUPP_ID, true);
+ IntParameter minsuppP = new IntParameter(MINSUPP_ID);
+ minsuppP.setOptional(true);
minsuppP.addConstraint(new GreaterEqualConstraint(0));
if(config.grab(minsuppP)) {
minsupp = minsuppP.getValue();
}
// global parameter constraints
- ArrayList<Parameter<?, ?>> globalConstraints = new ArrayList<Parameter<?, ?>>();
+ ArrayList<Parameter<?>> globalConstraints = new ArrayList<Parameter<?>>();
globalConstraints.add(minfreqP);
globalConstraints.add(minsuppP);
config.checkConstraint(new OnlyOneIsAllowedToBeSetGlobalConstraint(globalConstraints));
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/AbstractAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/AbstractAlgorithm.java
index 30e6e226..b36df094 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/AbstractAlgorithm.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/AbstractAlgorithm.java
@@ -110,10 +110,6 @@ public abstract class AbstractAlgorithm<R extends Result> implements Algorithm {
if(runmethod1 != null) {
try {
- StringBuffer buf = new StringBuffer();
- for(Class<?> cls : signature1) {
- buf.append(cls.toString()).append(",");
- }
return (R) runmethod1.invoke(this, relations1);
}
catch(IllegalArgumentException e) {
@@ -134,10 +130,6 @@ public abstract class AbstractAlgorithm<R extends Result> implements Algorithm {
}
else if(runmethod2 != null) {
try {
- StringBuffer buf = new StringBuffer();
- for(Class<?> cls : signature1) {
- buf.append(cls.toString()).append(",");
- }
return (R) runmethod2.invoke(this, relations2);
}
catch(IllegalArgumentException e) {
@@ -174,10 +166,10 @@ public abstract class AbstractAlgorithm<R extends Result> implements Algorithm {
*
* @return the static logger
*/
- abstract protected Logging getLogger();
+ protected abstract Logging getLogger();
/**
- * Make a default distance function configuration option
+ * Make a default distance function configuration option.
*
* @param <F> Distance function type
* @param defaultDistanceFunction Default value
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/AbstractDistanceBasedAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/AbstractDistanceBasedAlgorithm.java
index 70d4ba3a..e9d638dc 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/AbstractDistanceBasedAlgorithm.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/AbstractDistanceBasedAlgorithm.java
@@ -47,9 +47,9 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
*/
public abstract class AbstractDistanceBasedAlgorithm<O, D extends Distance<D>, R extends Result> extends AbstractAlgorithm<R> {
/**
- * OptionID for {@link #DISTANCE_FUNCTION_ID}
+ * OptionID for {@link #DISTANCE_FUNCTION_ID}.
*/
- public static final OptionID DISTANCE_FUNCTION_ID = OptionID.getOrCreateOptionID("algorithm.distancefunction", "Distance function to determine the distance between database objects.");
+ public static final OptionID DISTANCE_FUNCTION_ID = new OptionID("algorithm.distancefunction", "Distance function to determine the distance between database objects.");
/**
* Holds the instance of the distance function specified by
@@ -84,6 +84,9 @@ public abstract class AbstractDistanceBasedAlgorithm<O, D extends Distance<D>, R
* @apiviz.exclude
*/
public abstract static class Parameterizer<O, D extends Distance<D>> extends AbstractParameterizer {
+ /**
+ * The distance function to use.
+ */
protected DistanceFunction<O, D> distanceFunction;
@Override
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/AbstractPrimitiveDistanceBasedAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/AbstractPrimitiveDistanceBasedAlgorithm.java
index 4fa12e11..7bca1931 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/AbstractPrimitiveDistanceBasedAlgorithm.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/AbstractPrimitiveDistanceBasedAlgorithm.java
@@ -45,7 +45,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
* @param <D> the type of Distance used by this Algorithm
* @param <R> the type of result to retrieve from this Algorithm
*/
-public abstract class AbstractPrimitiveDistanceBasedAlgorithm<O, D extends Distance<D>, R extends Result> extends AbstractAlgorithm<R> {
+public abstract class AbstractPrimitiveDistanceBasedAlgorithm<O, D extends Distance<?>, R extends Result> extends AbstractAlgorithm<R> {
/**
* Holds the instance of the distance function specified by
* {@link AbstractDistanceBasedAlgorithm#DISTANCE_FUNCTION_ID}.
@@ -79,6 +79,9 @@ public abstract class AbstractPrimitiveDistanceBasedAlgorithm<O, D extends Dista
* @apiviz.exclude
*/
public abstract static class Parameterizer<O, D extends Distance<D>> extends AbstractParameterizer {
+ /**
+ * Distance function to use.
+ */
protected PrimitiveDistanceFunction<O, D> distanceFunction;
@Override
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/Algorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/Algorithm.java
index ae221ca7..e5a4cc07 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/Algorithm.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/Algorithm.java
@@ -61,5 +61,5 @@ public interface Algorithm extends Parameterizable {
*
* @return Type restriction
*/
- public TypeInformation[] getInputTypeRestriction();
+ TypeInformation[] getInputTypeRestriction();
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/DependencyDerivator.java b/src/de/lmu/ifi/dbs/elki/algorithm/DependencyDerivator.java
index e0eabf5c..2992ae4a 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/DependencyDerivator.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/DependencyDerivator.java
@@ -34,18 +34,18 @@ import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
-import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid;
import de.lmu.ifi.dbs.elki.math.linearalgebra.LinearEquationSystem;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAFilteredResult;
import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAFilteredRunner;
import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.FormatUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
@@ -63,6 +63,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
* attributes of a given dataset based on a linear correlation PCA.
* </p>
*
+ * <p>
* Reference: <br>
* E. Achtert, C. Böhm, H.-P. Kriegel, P. Kröger, A. Zimek: Deriving
* Quantitative Dependencies for Correlation Clusters. <br>
@@ -76,32 +77,32 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@Title("Dependency Derivator: Deriving numerical inter-dependencies on data")
@Description("Derives an equality-system describing dependencies between attributes in a correlation-cluster")
@Reference(authors = "E. Achtert, C. Böhm, H.-P. Kriegel, P. Kröger, A. Zimek", title = "Deriving Quantitative Dependencies for Correlation Clusters", booktitle = "Proc. 12th Int. Conf. on Knowledge Discovery and Data Mining (KDD '06), Philadelphia, PA 2006.", url = "http://dx.doi.org/10.1145/1150402.1150408")
-public class DependencyDerivator<V extends NumberVector<V, ?>, D extends Distance<D>> extends AbstractPrimitiveDistanceBasedAlgorithm<V, D, CorrelationAnalysisSolution<V>> {
+public class DependencyDerivator<V extends NumberVector<?>, D extends Distance<D>> extends AbstractPrimitiveDistanceBasedAlgorithm<V, D, CorrelationAnalysisSolution<V>> {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(DependencyDerivator.class);
+ private static final Logging LOG = Logging.getLogger(DependencyDerivator.class);
/**
* Flag to use random sample (use knn query around centroid, if flag is not
* set).
*/
- public static final OptionID DEPENDENCY_DERIVATOR_RANDOM_SAMPLE = OptionID.getOrCreateOptionID("derivator.randomSample", "Flag to use random sample (use knn query around centroid, if flag is not set).");
+ public static final OptionID DEPENDENCY_DERIVATOR_RANDOM_SAMPLE = new OptionID("derivator.randomSample", "Flag to use random sample (use knn query around centroid, if flag is not set).");
/**
* Parameter to specify the threshold for output accuracy fraction digits,
* must be an integer equal to or greater than 0.
*/
- public static final OptionID OUTPUT_ACCURACY_ID = OptionID.getOrCreateOptionID("derivator.accuracy", "Threshold for output accuracy fraction digits.");
+ public static final OptionID OUTPUT_ACCURACY_ID = new OptionID("derivator.accuracy", "Threshold for output accuracy fraction digits.");
/**
* Optional parameter to specify the threshold for the size of the random
* sample to use, must be an integer greater than 0.
- * <p/>
+ * <p>
* Default value: the size of the complete dataset
* </p>
*/
- public static final OptionID SAMPLE_SIZE_ID = OptionID.getOrCreateOptionID("derivator.sampleSize", "Threshold for the size of the random sample to use. " + "Default value is size of the complete dataset.");
+ public static final OptionID SAMPLE_SIZE_ID = new OptionID("derivator.sampleSize", "Threshold for the size of the random sample to use. " + "Default value is size of the complete dataset.");
/**
* Holds the value of {@link #SAMPLE_SIZE_ID}.
@@ -116,7 +117,7 @@ public class DependencyDerivator<V extends NumberVector<V, ?>, D extends Distanc
/**
* Number format for output of solution.
*/
- public final NumberFormat NF;
+ private final NumberFormat nf;
/**
* Flag for random sampling vs. kNN
@@ -134,7 +135,7 @@ public class DependencyDerivator<V extends NumberVector<V, ?>, D extends Distanc
*/
public DependencyDerivator(PrimitiveDistanceFunction<V, D> distanceFunction, NumberFormat nf, PCAFilteredRunner<V> pca, int sampleSize, boolean randomsample) {
super(distanceFunction);
- this.NF = nf;
+ this.nf = nf;
this.pca = pca;
this.sampleSize = sampleSize;
this.randomsample = randomsample;
@@ -150,26 +151,27 @@ public class DependencyDerivator<V extends NumberVector<V, ?>, D extends Distanc
* DependencyDerivator
*/
public CorrelationAnalysisSolution<V> run(Database database, Relation<V> relation) {
- if(logger.isVerbose()) {
- logger.verbose("retrieving database objects...");
+ if(LOG.isVerbose()) {
+ LOG.verbose("retrieving database objects...");
}
- V centroidDV = DatabaseUtil.centroid(relation);
+ Centroid centroid = Centroid.make(relation);
+ V centroidDV = centroid.toVector(relation);
DBIDs ids;
if(this.sampleSize > 0) {
if(randomsample) {
- ids = DBIDUtil.randomSample(relation.getDBIDs(), this.sampleSize, 1l);
+ ids = DBIDUtil.randomSample(relation.getDBIDs(), this.sampleSize, 1L);
}
else {
DistanceQuery<V, D> distanceQuery = database.getDistanceQuery(relation, getDistanceFunction());
KNNResult<D> queryResults = database.getKNNQuery(distanceQuery, this.sampleSize).getKNNForObject(centroidDV, this.sampleSize);
- ids = DBIDUtil.newHashSet(queryResults.asDBIDs());
+ ids = DBIDUtil.newHashSet(queryResults);
}
}
else {
ids = relation.getDBIDs();
}
- return generateModel(relation, ids, centroidDV);
+ return generateModel(relation, ids, centroid);
}
/**
@@ -181,8 +183,7 @@ public class DependencyDerivator<V extends NumberVector<V, ?>, D extends Distanc
* @return a matrix of equations describing the dependencies
*/
public CorrelationAnalysisSolution<V> generateModel(Relation<V> db, DBIDs ids) {
- V centroidDV = DatabaseUtil.centroid(db, ids);
- return generateModel(db, ids, centroidDV);
+ return generateModel(db, ids, Centroid.make(db, ids));
}
/**
@@ -190,13 +191,13 @@ public class DependencyDerivator<V extends NumberVector<V, ?>, D extends Distanc
*
* @param db the database
* @param ids the set of ids
- * @param centroidDV the centroid
+ * @param centroid the centroid
* @return a matrix of equations describing the dependencies
*/
- public CorrelationAnalysisSolution<V> generateModel(Relation<V> db, DBIDs ids, V centroidDV) {
+ public CorrelationAnalysisSolution<V> generateModel(Relation<V> db, DBIDs ids, Vector centroid) {
CorrelationAnalysisSolution<V> sol;
- if(logger.isDebuggingFine()) {
- logger.debugFine("PCA...");
+ if(LOG.isDebuggingFine()) {
+ LOG.debugFine("PCA...");
}
PCAFilteredResult pcares = pca.processIds(ids, db);
@@ -206,7 +207,6 @@ public class DependencyDerivator<V extends NumberVector<V, ?>, D extends Distanc
// Matrix strongEigenvectors =
// pca.getEigenvectors().times(pca.selectionMatrixOfStrongEigenvectors());
Matrix strongEigenvectors = pcares.getStrongEigenvectors();
- Vector centroid = centroidDV.getColumnVector();
// TODO: what if we don't have any weak eigenvectors?
if(weakEigenvectors.getColumnDimensionality() == 0) {
@@ -214,50 +214,49 @@ public class DependencyDerivator<V extends NumberVector<V, ?>, D extends Distanc
}
else {
Matrix transposedWeakEigenvectors = weakEigenvectors.transpose();
- if(logger.isDebugging()) {
+ if(LOG.isDebugging()) {
StringBuilder log = new StringBuilder();
log.append("Strong Eigenvectors:\n");
- log.append(FormatUtil.format(pcares.getEigenvectors().times(pcares.selectionMatrixOfStrongEigenvectors()), NF)).append('\n');
+ log.append(FormatUtil.format(pcares.getEigenvectors().times(pcares.selectionMatrixOfStrongEigenvectors()), nf)).append('\n');
log.append("Transposed weak Eigenvectors:\n");
- log.append(FormatUtil.format(transposedWeakEigenvectors, NF)).append('\n');
+ log.append(FormatUtil.format(transposedWeakEigenvectors, nf)).append('\n');
log.append("Eigenvalues:\n");
log.append(FormatUtil.format(pcares.getEigenvalues(), " , ", 2));
- logger.debugFine(log.toString());
+ LOG.debugFine(log.toString());
}
- Vector B = transposedWeakEigenvectors.times(centroid);
- if(logger.isDebugging()) {
+ Vector b = transposedWeakEigenvectors.times(centroid);
+ if(LOG.isDebugging()) {
StringBuilder log = new StringBuilder();
log.append("Centroid:\n").append(centroid).append('\n');
log.append("tEV * Centroid\n");
- log.append(B);
- logger.debugFine(log.toString());
+ log.append(b);
+ LOG.debugFine(log.toString());
}
// +1 == + B.getColumnDimensionality()
Matrix gaussJordan = new Matrix(transposedWeakEigenvectors.getRowDimensionality(), transposedWeakEigenvectors.getColumnDimensionality() + 1);
gaussJordan.setMatrix(0, transposedWeakEigenvectors.getRowDimensionality() - 1, 0, transposedWeakEigenvectors.getColumnDimensionality() - 1, transposedWeakEigenvectors);
- gaussJordan.setCol(transposedWeakEigenvectors.getColumnDimensionality(), B);
+ gaussJordan.setCol(transposedWeakEigenvectors.getColumnDimensionality(), b);
- if(logger.isDebuggingFiner()) {
- logger.debugFiner("Gauss-Jordan-Elimination of " + FormatUtil.format(gaussJordan, NF));
+ if(LOG.isDebuggingFiner()) {
+ LOG.debugFiner("Gauss-Jordan-Elimination of " + FormatUtil.format(gaussJordan, nf));
}
double[][] a = new double[transposedWeakEigenvectors.getRowDimensionality()][transposedWeakEigenvectors.getColumnDimensionality()];
double[][] we = transposedWeakEigenvectors.getArrayRef();
- double[] b = B.getArrayRef();
System.arraycopy(we, 0, a, 0, transposedWeakEigenvectors.getRowDimensionality());
- LinearEquationSystem lq = new LinearEquationSystem(a, b);
+ LinearEquationSystem lq = new LinearEquationSystem(a, b.getArrayRef());
lq.solveByTotalPivotSearch();
sol = new CorrelationAnalysisSolution<V>(lq, db, strongEigenvectors, pcares.getWeakEigenvectors(), pcares.similarityMatrix(), centroid);
- if(logger.isDebuggingFine()) {
+ if(LOG.isDebuggingFine()) {
StringBuilder log = new StringBuilder();
log.append("Solution:\n");
log.append("Standard deviation ").append(sol.getStandardDeviation());
- log.append(lq.equationsToString(NF.getMaximumFractionDigits()));
- logger.debugFine(log.toString());
+ log.append(lq.equationsToString(nf.getMaximumFractionDigits()));
+ LOG.debugFine(log.toString());
}
}
return sol;
@@ -270,7 +269,7 @@ public class DependencyDerivator<V extends NumberVector<V, ?>, D extends Distanc
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -280,55 +279,59 @@ public class DependencyDerivator<V extends NumberVector<V, ?>, D extends Distanc
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<V, ?>, D extends Distance<D>> extends AbstractPrimitiveDistanceBasedAlgorithm.Parameterizer<V, D> {
+ public static class Parameterizer<V extends NumberVector<?>, D extends Distance<D>> extends AbstractPrimitiveDistanceBasedAlgorithm.Parameterizer<V, D> {
+ /**
+ * Output accuracy.
+ */
protected int outputAccuracy = 0;
+ /**
+ * Sample size.
+ */
protected int sampleSize = 0;
+ /**
+ * Flag to enable random sampling
+ */
protected boolean randomSample = false;
+ /**
+ * Class to compute PCA with
+ */
protected PCAFilteredRunner<V> pca = null;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- configAccuracy(config);
- configSampleSize(config);
- configRandomSampleFlag(config);
- Class<PCAFilteredRunner<V>> cls = ClassGenericsUtil.uglyCastIntoSubclass(PCAFilteredRunner.class);
- pca = config.tryInstantiate(cls);
- }
-
- public void configRandomSampleFlag(Parameterization config) {
- Flag randomSampleF = new Flag(DEPENDENCY_DERIVATOR_RANDOM_SAMPLE);
- if(config.grab(randomSampleF)) {
- randomSample = randomSampleF.getValue();
+
+ IntParameter outputAccuracyP = new IntParameter(OUTPUT_ACCURACY_ID, 4);
+ outputAccuracyP.addConstraint(new GreaterEqualConstraint(0));
+ if(config.grab(outputAccuracyP)) {
+ outputAccuracy = outputAccuracyP.getValue();
}
- }
-
- public void configSampleSize(Parameterization config) {
- IntParameter sampleSizeP = new IntParameter(SAMPLE_SIZE_ID, true);
+
+ IntParameter sampleSizeP = new IntParameter(SAMPLE_SIZE_ID);
+ sampleSizeP.setOptional(true);
sampleSizeP.addConstraint(new GreaterConstraint(0));
if(config.grab(sampleSizeP)) {
sampleSize = sampleSizeP.getValue();
}
- }
-
- public void configAccuracy(Parameterization config) {
- IntParameter outputAccuracyP = new IntParameter(OUTPUT_ACCURACY_ID, 4);
- outputAccuracyP.addConstraint(new GreaterEqualConstraint(0));
- if(config.grab(outputAccuracyP)) {
- outputAccuracy = outputAccuracyP.getValue();
+
+ Flag randomSampleF = new Flag(DEPENDENCY_DERIVATOR_RANDOM_SAMPLE);
+ if(config.grab(randomSampleF)) {
+ randomSample = randomSampleF.getValue();
}
+ Class<PCAFilteredRunner<V>> cls = ClassGenericsUtil.uglyCastIntoSubclass(PCAFilteredRunner.class);
+ pca = config.tryInstantiate(cls);
}
@Override
protected DependencyDerivator<V, D> makeInstance() {
- NumberFormat NF = NumberFormat.getInstance(Locale.US);
- NF.setMaximumFractionDigits(outputAccuracy);
- NF.setMinimumFractionDigits(outputAccuracy);
+ NumberFormat nf = NumberFormat.getInstance(Locale.US);
+ nf.setMaximumFractionDigits(outputAccuracy);
+ nf.setMinimumFractionDigits(outputAccuracy);
- return new DependencyDerivator<V, D>(distanceFunction, NF, pca, sampleSize, randomSample);
+ return new DependencyDerivator<V, D>(distanceFunction, nf, pca, sampleSize, randomSample);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/DummyAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/DummyAlgorithm.java
index 64188502..0f871535 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/DummyAlgorithm.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/DummyAlgorithm.java
@@ -53,15 +53,14 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
*/
@Title("Dummy Algorithm")
@Description("The algorithm executes an Euclidean 10NN query on all data points, and can be used in unit testing")
-public class DummyAlgorithm<O extends NumberVector<?, ?>> extends AbstractAlgorithm<Result> {
+public class DummyAlgorithm<O extends NumberVector<?>> extends AbstractAlgorithm<Result> {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(DummyAlgorithm.class);
+ private static final Logging LOG = Logging.getLogger(DummyAlgorithm.class);
/**
- * Constructor, adhering to
- * {@link de.lmu.ifi.dbs.elki.utilities.optionhandling.Parameterizable}
+ * Constructor.
*/
public DummyAlgorithm() {
super();
@@ -96,6 +95,6 @@ public class DummyAlgorithm<O extends NumberVector<?, ?>> extends AbstractAlgori
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/KNNDistanceOrder.java b/src/de/lmu/ifi/dbs/elki/algorithm/KNNDistanceOrder.java
index 137ffadf..7e9ce77e 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/KNNDistanceOrder.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/KNNDistanceOrder.java
@@ -34,9 +34,9 @@ import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.result.KNNDistanceOrderResult;
@@ -44,7 +44,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.IntervalConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessEqualConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -63,13 +63,13 @@ public class KNNDistanceOrder<O, D extends Distance<D>> extends AbstractDistance
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(KNNDistanceOrder.class);
+ private static final Logging LOG = Logging.getLogger(KNNDistanceOrder.class);
/**
* Parameter to specify the distance of the k-distant object to be assessed,
* must be an integer greater than 0.
*/
- public static final OptionID K_ID = OptionID.getOrCreateOptionID("knndistanceorder.k", "Specifies the distance of the k-distant object to be assessed.");
+ public static final OptionID K_ID = new OptionID("knndistanceorder.k", "Specifies the distance of the k-distant object to be assessed.");
/**
* Holds the value of {@link #K_ID}.
@@ -81,7 +81,7 @@ public class KNNDistanceOrder<O, D extends Distance<D>> extends AbstractDistance
* be provided in the result, must be a double greater than 0 and less than or
* equal to 1.
*/
- public static final OptionID PERCENTAGE_ID = OptionID.getOrCreateOptionID("knndistanceorder.percentage", "The average percentage of distances randomly choosen to be provided in the result.");
+ public static final OptionID PERCENTAGE_ID = new OptionID("knndistanceorder.percentage", "The average percentage of distances randomly choosen to be provided in the result.");
/**
* Holds the value of {@link #PERCENTAGE_ID}.
@@ -132,7 +132,7 @@ public class KNNDistanceOrder<O, D extends Distance<D>> extends AbstractDistance
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -143,10 +143,19 @@ public class KNNDistanceOrder<O, D extends Distance<D>> extends AbstractDistance
* @apiviz.exclude
*/
public static class Parameterizer<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ /**
+ * Parameter k.
+ */
protected int k;
+ /**
+ * Percentage.
+ */
protected double percentage;
+ /**
+ * Constructor.
+ */
public Parameterizer() {
super();
}
@@ -161,7 +170,8 @@ public class KNNDistanceOrder<O, D extends Distance<D>> extends AbstractDistance
}
DoubleParameter percentageP = new DoubleParameter(PERCENTAGE_ID, 1.0);
- percentageP.addConstraint(new IntervalConstraint(0, IntervalConstraint.IntervalBoundary.OPEN, 1, IntervalConstraint.IntervalBoundary.CLOSE));
+ percentageP.addConstraint(new GreaterConstraint(0));
+ percentageP.addConstraint(new LessEqualConstraint(1));
if(config.grab(percentageP)) {
percentage = percentageP.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/KNNJoin.java b/src/de/lmu/ifi/dbs/elki/algorithm/KNNJoin.java
index 3eb789c7..9e73d959 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/KNNJoin.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/KNNJoin.java
@@ -37,14 +37,16 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.query.DoubleDistanceResultPair;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.DistanceUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.SpatialPrimitiveDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.SpatialPrimitiveDoubleDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceKNNHeap;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNHeap;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNUtil;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.index.tree.LeafEntry;
import de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialEntry;
import de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialIndexTree;
@@ -55,8 +57,6 @@ import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress;
import de.lmu.ifi.dbs.elki.result.ResultUtil;
import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap;
-import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.KNNHeap;
-import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.KNNList;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
@@ -84,20 +84,20 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
*/
@Title("K-Nearest Neighbor Join")
@Description("Algorithm to find the k-nearest neighbors of each object in a spatial database")
-public class KNNJoin<V extends NumberVector<V, ?>, D extends Distance<D>, N extends SpatialNode<N, E>, E extends SpatialEntry> extends AbstractDistanceBasedAlgorithm<V, D, DataStore<KNNList<D>>> {
+public class KNNJoin<V extends NumberVector<?>, D extends Distance<D>, N extends SpatialNode<N, E>, E extends SpatialEntry> extends AbstractDistanceBasedAlgorithm<V, D, DataStore<KNNResult<D>>> {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(KNNJoin.class);
+ private static final Logging LOG = Logging.getLogger(KNNJoin.class);
/**
* Parameter that specifies the k-nearest neighbors to be assigned, must be an
* integer greater than 0. Default value: 1.
*/
- public static final OptionID K_ID = OptionID.getOrCreateOptionID("knnjoin.k", "Specifies the k-nearest neighbors to be assigned.");
+ public static final OptionID K_ID = new OptionID("knnjoin.k", "Specifies the k-nearest neighbors to be assigned.");
/**
- * The k parameter
+ * The k parameter.
*/
int k;
@@ -120,12 +120,12 @@ public class KNNJoin<V extends NumberVector<V, ?>, D extends Distance<D>, N exte
* @return result
*/
@SuppressWarnings("unchecked")
- public WritableDataStore<KNNList<D>> run(Database database, Relation<V> relation) {
- if(!(getDistanceFunction() instanceof SpatialPrimitiveDistanceFunction)) {
+ public WritableDataStore<KNNResult<D>> run(Database database, Relation<V> relation) {
+ if (!(getDistanceFunction() instanceof SpatialPrimitiveDistanceFunction)) {
throw new IllegalStateException("Distance Function must be an instance of " + SpatialPrimitiveDistanceFunction.class.getName());
}
Collection<SpatialIndexTree<N, E>> indexes = ResultUtil.filterResults(database, SpatialIndexTree.class);
- if(indexes.size() != 1) {
+ if (indexes.size() != 1) {
throw new AbortException("KNNJoin found " + indexes.size() + " spatial indexes, expected exactly one.");
}
// FIXME: Ensure were looking at the right relation!
@@ -133,9 +133,6 @@ public class KNNJoin<V extends NumberVector<V, ?>, D extends Distance<D>, N exte
SpatialPrimitiveDistanceFunction<V, D> distFunction = (SpatialPrimitiveDistanceFunction<V, D>) getDistanceFunction();
DBIDs ids = relation.getDBIDs();
- // Optimize for double?
- final boolean doubleOptimize = (getDistanceFunction() instanceof SpatialPrimitiveDoubleDistanceFunction);
-
// data pages
List<E> ps_candidates = new ArrayList<E>(index.getLeaves());
// knn heaps
@@ -143,50 +140,49 @@ public class KNNJoin<V extends NumberVector<V, ?>, D extends Distance<D>, N exte
Heap<Task> pq = new Heap<Task>(ps_candidates.size() * ps_candidates.size() / 10);
// Initialize with the page self-pairing
- for(int i = 0; i < ps_candidates.size(); i++) {
+ for (int i = 0; i < ps_candidates.size(); i++) {
E pr_entry = ps_candidates.get(i);
N pr = index.getNode(pr_entry);
- heaps.add(initHeaps(distFunction, doubleOptimize, pr));
+ heaps.add(initHeaps(distFunction, pr));
}
// Build priority queue
- final int sqsize = ps_candidates.size() * (ps_candidates.size() - 1) / 2;
- if(logger.isDebuggingFine()) {
- logger.debugFine("Number of leaves: " + ps_candidates.size() + " so " + sqsize + " MBR computations.");
+ final int sqsize = ps_candidates.size() * (ps_candidates.size() - 1) >> 1;
+ if (LOG.isDebuggingFine()) {
+ LOG.debugFine("Number of leaves: " + ps_candidates.size() + " so " + sqsize + " MBR computations.");
}
- FiniteProgress mprogress = logger.isVerbose() ? new FiniteProgress("Comparing leaf MBRs", sqsize, logger) : null;
- for(int i = 0; i < ps_candidates.size(); i++) {
+ FiniteProgress mprogress = LOG.isVerbose() ? new FiniteProgress("Comparing leaf MBRs", sqsize, LOG) : null;
+ for (int i = 0; i < ps_candidates.size(); i++) {
E pr_entry = ps_candidates.get(i);
List<KNNHeap<D>> pr_heaps = heaps.get(i);
D pr_knn_distance = computeStopDistance(pr_heaps);
- for(int j = i + 1; j < ps_candidates.size(); j++) {
+ for (int j = i + 1; j < ps_candidates.size(); j++) {
E ps_entry = ps_candidates.get(j);
List<KNNHeap<D>> ps_heaps = heaps.get(j);
D ps_knn_distance = computeStopDistance(ps_heaps);
D minDist = distFunction.minDist(pr_entry, ps_entry);
// Resolve immediately:
- if(minDist.isNullDistance()) {
+ if (minDist.isNullDistance()) {
N pr = index.getNode(ps_candidates.get(i));
N ps = index.getNode(ps_candidates.get(j));
- processDataPagesOptimize(distFunction, doubleOptimize, pr_heaps, ps_heaps, pr, ps);
- }
- else if(minDist.compareTo(pr_knn_distance) <= 0 || minDist.compareTo(ps_knn_distance) <= 0) {
+ processDataPagesOptimize(distFunction, pr_heaps, ps_heaps, pr, ps);
+ } else if (minDist.compareTo(pr_knn_distance) <= 0 || minDist.compareTo(ps_knn_distance) <= 0) {
pq.add(new Task(minDist, i, j));
}
- if(mprogress != null) {
- mprogress.incrementProcessed(logger);
+ if (mprogress != null) {
+ mprogress.incrementProcessed(LOG);
}
}
}
- if(mprogress != null) {
- mprogress.ensureCompleted(logger);
+ if (mprogress != null) {
+ mprogress.ensureCompleted(LOG);
}
// Process the queue
- FiniteProgress qprogress = logger.isVerbose() ? new FiniteProgress("Processing queue", pq.size(), logger) : null;
- IndefiniteProgress fprogress = logger.isVerbose() ? new IndefiniteProgress("Full comparisons", logger) : null;
- while(!pq.isEmpty()) {
+ FiniteProgress qprogress = LOG.isVerbose() ? new FiniteProgress("Processing queue", pq.size(), LOG) : null;
+ IndefiniteProgress fprogress = LOG.isVerbose() ? new IndefiniteProgress("Full comparisons", LOG) : null;
+ while (!pq.isEmpty()) {
Task task = pq.poll();
List<KNNHeap<D>> pr_heaps = heaps.get(task.i);
List<KNNHeap<D>> ps_heaps = heaps.get(task.j);
@@ -194,47 +190,45 @@ public class KNNJoin<V extends NumberVector<V, ?>, D extends Distance<D>, N exte
D ps_knn_distance = computeStopDistance(ps_heaps);
boolean dor = task.mindist.compareTo(pr_knn_distance) <= 0;
boolean dos = task.mindist.compareTo(ps_knn_distance) <= 0;
- if(dor || dos) {
+ if (dor || dos) {
N pr = index.getNode(ps_candidates.get(task.i));
N ps = index.getNode(ps_candidates.get(task.j));
- if(dor && dos) {
- processDataPagesOptimize(distFunction, doubleOptimize, pr_heaps, ps_heaps, pr, ps);
- }
- else {
- if(dor) {
- processDataPagesOptimize(distFunction, doubleOptimize, pr_heaps, null, pr, ps);
- }
- else /* dos */{
- processDataPagesOptimize(distFunction, doubleOptimize, ps_heaps, null, ps, pr);
+ if (dor && dos) {
+ processDataPagesOptimize(distFunction, pr_heaps, ps_heaps, pr, ps);
+ } else {
+ if (dor) {
+ processDataPagesOptimize(distFunction, pr_heaps, null, pr, ps);
+ } else /* dos */{
+ processDataPagesOptimize(distFunction, ps_heaps, null, ps, pr);
}
}
- if(fprogress != null) {
- fprogress.incrementProcessed(logger);
+ if (fprogress != null) {
+ fprogress.incrementProcessed(LOG);
}
}
- if(qprogress != null) {
- qprogress.incrementProcessed(logger);
+ if (qprogress != null) {
+ qprogress.incrementProcessed(LOG);
}
}
- if(qprogress != null) {
- qprogress.ensureCompleted(logger);
+ if (qprogress != null) {
+ qprogress.ensureCompleted(LOG);
}
- if(fprogress != null) {
- fprogress.setCompleted(logger);
+ if (fprogress != null) {
+ fprogress.setCompleted(LOG);
}
- WritableDataStore<KNNList<D>> knnLists = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_STATIC, KNNList.class);
+ WritableDataStore<KNNResult<D>> knnLists = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_STATIC, KNNResult.class);
// FiniteProgress progress = logger.isVerbose() ? new
// FiniteProgress(this.getClass().getName(), relation.size(), logger) :
// null;
- FiniteProgress pageprog = logger.isVerbose() ? new FiniteProgress("Number of processed data pages", ps_candidates.size(), logger) : null;
+ FiniteProgress pageprog = LOG.isVerbose() ? new FiniteProgress("Number of processed data pages", ps_candidates.size(), LOG) : null;
// int processed = 0;
- for(int i = 0; i < ps_candidates.size(); i++) {
+ for (int i = 0; i < ps_candidates.size(); i++) {
N pr = index.getNode(ps_candidates.get(i));
List<KNNHeap<D>> pr_heaps = heaps.get(i);
// Finalize lists
- for(int j = 0; j < pr.getNumEntries(); j++) {
+ for (int j = 0; j < pr.getNumEntries(); j++) {
knnLists.put(((LeafEntry) pr.getEntry(j)).getDBID(), pr_heaps.get(j).toKNNList());
}
// Forget heaps and pq
@@ -244,29 +238,35 @@ public class KNNJoin<V extends NumberVector<V, ?>, D extends Distance<D>, N exte
// if(progress != null) {
// progress.setProcessed(processed, logger);
// }
- if(pageprog != null) {
- pageprog.incrementProcessed(logger);
+ if (pageprog != null) {
+ pageprog.incrementProcessed(LOG);
}
}
// if(progress != null) {
// progress.ensureCompleted(logger);
// }
- if(pageprog != null) {
- pageprog.ensureCompleted(logger);
+ if (pageprog != null) {
+ pageprog.ensureCompleted(LOG);
}
return knnLists;
}
- private List<KNNHeap<D>> initHeaps(SpatialPrimitiveDistanceFunction<V, D> distFunction, final boolean doubleOptimize, N pr) {
- List<KNNHeap<D>> pr_heaps;
+ /**
+ * Initialize the heaps.
+ *
+ * @param distFunction Distance function
+ * @param pr Node to initialize for
+ * @return List of heaps
+ */
+ private List<KNNHeap<D>> initHeaps(SpatialPrimitiveDistanceFunction<V, D> distFunction, N pr) {
+ List<KNNHeap<D>> pr_heaps = new ArrayList<KNNHeap<D>>(pr.getNumEntries());
// Create for each data object a knn heap
- pr_heaps = new ArrayList<KNNHeap<D>>(pr.getNumEntries());
- for(int j = 0; j < pr.getNumEntries(); j++) {
- pr_heaps.add(new KNNHeap<D>(k, distFunction.getDistanceFactory().infiniteDistance()));
+ for (int j = 0; j < pr.getNumEntries(); j++) {
+ pr_heaps.add(KNNUtil.newHeap(distFunction, k));
}
// Self-join first, as this is expected to improve most and cannot be
// pruned.
- processDataPagesOptimize(distFunction, doubleOptimize, pr_heaps, null, pr, pr);
+ processDataPagesOptimize(distFunction, pr_heaps, null, pr, pr);
return pr_heaps;
}
@@ -275,27 +275,26 @@ public class KNNJoin<V extends NumberVector<V, ?>, D extends Distance<D>, N exte
* neighbors of pr in ps.
*
* @param distFunction the distance to use
- * @param doubleOptimize Flag whether to optimize for doubles.
* @param pr the first data page
* @param ps the second data page
* @param pr_heaps the knn lists for each data object in pr
* @param ps_heaps the knn lists for each data object in ps (if ps != pr)
*/
- private void processDataPagesOptimize(SpatialPrimitiveDistanceFunction<V, D> distFunction, final boolean doubleOptimize, List<KNNHeap<D>> pr_heaps, List<KNNHeap<D>> ps_heaps, N pr, N ps) {
- if(doubleOptimize) {
+ @SuppressWarnings("unchecked")
+ private void processDataPagesOptimize(SpatialPrimitiveDistanceFunction<V, D> distFunction, List<? extends KNNHeap<D>> pr_heaps, List<? extends KNNHeap<D>> ps_heaps, N pr, N ps) {
+ if (DistanceUtil.isDoubleDistanceFunction(distFunction)) {
List<?> khp = (List<?>) pr_heaps;
List<?> khs = (List<?>) ps_heaps;
- processDataPagesDouble((SpatialPrimitiveDoubleDistanceFunction<? super V>) distFunction, pr, ps, (List<KNNHeap<DoubleDistance>>) khp, (List<KNNHeap<DoubleDistance>>) khs);
- }
- else {
- for(int j = 0; j < ps.getNumEntries(); j++) {
+ processDataPagesDouble((SpatialPrimitiveDoubleDistanceFunction<? super V>) distFunction, pr, ps, (List<DoubleDistanceKNNHeap>) khp, (List<DoubleDistanceKNNHeap>) khs);
+ } else {
+ for (int j = 0; j < ps.getNumEntries(); j++) {
final SpatialPointLeafEntry s_e = (SpatialPointLeafEntry) ps.getEntry(j);
DBID s_id = s_e.getDBID();
- for(int i = 0; i < pr.getNumEntries(); i++) {
+ for (int i = 0; i < pr.getNumEntries(); i++) {
final SpatialPointLeafEntry r_e = (SpatialPointLeafEntry) pr.getEntry(i);
D distance = distFunction.minDist(s_e, r_e);
pr_heaps.get(i).add(distance, s_id);
- if(pr != ps && ps_heaps != null) {
+ if (pr != ps && ps_heaps != null) {
ps_heaps.get(j).add(distance, r_e.getDBID());
}
}
@@ -313,40 +312,42 @@ public class KNNJoin<V extends NumberVector<V, ?>, D extends Distance<D>, N exte
* @param pr_heaps the knn lists for each data object
* @param ps_heaps the knn lists for each data object in ps
*/
- private void processDataPagesDouble(SpatialPrimitiveDoubleDistanceFunction<? super V> df, N pr, N ps, List<KNNHeap<DoubleDistance>> pr_heaps, List<KNNHeap<DoubleDistance>> ps_heaps) {
+ private void processDataPagesDouble(SpatialPrimitiveDoubleDistanceFunction<? super V> df, N pr, N ps, List<DoubleDistanceKNNHeap> pr_heaps, List<DoubleDistanceKNNHeap> ps_heaps) {
// Compare pairwise
- for(int j = 0; j < ps.getNumEntries(); j++) {
+ for (int j = 0; j < ps.getNumEntries(); j++) {
final SpatialPointLeafEntry s_e = (SpatialPointLeafEntry) ps.getEntry(j);
DBID s_id = s_e.getDBID();
- for(int i = 0; i < pr.getNumEntries(); i++) {
+ for (int i = 0; i < pr.getNumEntries(); i++) {
final SpatialPointLeafEntry r_e = (SpatialPointLeafEntry) pr.getEntry(i);
double distance = df.doubleMinDist(s_e, r_e);
- pr_heaps.get(i).add(new DoubleDistanceResultPair(distance, s_id));
- if(pr != ps && ps_heaps != null) {
- ps_heaps.get(j).add(new DoubleDistanceResultPair(distance, r_e.getDBID()));
+ pr_heaps.get(i).add(distance, s_id);
+ if (pr != ps && ps_heaps != null) {
+ ps_heaps.get(j).add(distance, r_e.getDBID());
}
}
}
}
/**
- * Compute the maximum stop distance
+ * Compute the maximum stop distance.
*
- * @param heaps
+ * @param heaps Heaps list
* @return the k-nearest neighbor distance of pr in ps
*/
private D computeStopDistance(List<KNNHeap<D>> heaps) {
// Update pruning distance
D pr_knn_distance = null;
- for(KNNHeap<D> knnList : heaps) {
+ for (KNNHeap<D> knnList : heaps) {
// set kNN distance of r
- if(pr_knn_distance == null) {
+ if (pr_knn_distance == null) {
pr_knn_distance = knnList.getKNNDistance();
- }
- else {
+ } else {
pr_knn_distance = DistanceUtil.max(knnList.getKNNDistance(), pr_knn_distance);
}
}
+ if (pr_knn_distance == null) {
+ return getDistanceFunction().getDistanceFactory().infiniteDistance();
+ }
return pr_knn_distance;
}
@@ -357,29 +358,38 @@ public class KNNJoin<V extends NumberVector<V, ?>, D extends Distance<D>, N exte
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
- * Task in the processing queue
+ * Task in the processing queue.
*
* @author Erich Schubert
*
* @apiviz.exclude
*/
private class Task implements Comparable<Task> {
+ /**
+ * Minimum distance.
+ */
final D mindist;
+ /**
+ * First offset.
+ */
final int i;
+ /**
+ * Second offset.
+ */
final int j;
/**
* Constructor.
*
- * @param mindist
- * @param i
- * @param j
+ * @param mindist Minimum distance
+ * @param i First offset
+ * @param j Second offset
*/
public Task(D mindist, int i, int j) {
super();
@@ -401,7 +411,10 @@ public class KNNJoin<V extends NumberVector<V, ?>, D extends Distance<D>, N exte
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<V, ?>, D extends Distance<D>, N extends SpatialNode<N, E>, E extends SpatialEntry> extends AbstractPrimitiveDistanceBasedAlgorithm.Parameterizer<V, D> {
+ public static class Parameterizer<V extends NumberVector<?>, D extends Distance<D>, N extends SpatialNode<N, E>, E extends SpatialEntry> extends AbstractPrimitiveDistanceBasedAlgorithm.Parameterizer<V, D> {
+ /**
+ * K parameter.
+ */
protected int k;
@Override
@@ -409,7 +422,7 @@ public class KNNJoin<V extends NumberVector<V, ?>, D extends Distance<D>, N exte
super.makeOptions(config);
IntParameter kP = new IntParameter(K_ID, 1);
kP.addConstraint(new GreaterConstraint(0));
- if(config.grab(kP)) {
+ if (config.grab(kP)) {
k = kP.getValue();
}
}
@@ -419,4 +432,4 @@ public class KNNJoin<V extends NumberVector<V, ?>, D extends Distance<D>, N exte
return new KNNJoin<V, D, N, E>(distanceFunction, k);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/MaterializeDistances.java b/src/de/lmu/ifi/dbs/elki/algorithm/MaterializeDistances.java
index b09f7ac2..95a2a2b9 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/MaterializeDistances.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/MaterializeDistances.java
@@ -31,6 +31,7 @@ import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
@@ -52,13 +53,14 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.CTriple;
* @param <O> Object type
* @param <D> Distance type
*/
+// TODO: use DBIDPair -> D map?
@Title("MaterializeDistances")
@Description("Materialize all distances in the data set to use as cached/precalculated data.")
public class MaterializeDistances<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, CollectionResult<CTriple<DBID, DBID, Double>>> {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(MaterializeDistances.class);
+ private static final Logging LOG = Logging.getLogger(MaterializeDistances.class);
/**
* Constructor.
@@ -71,21 +73,25 @@ public class MaterializeDistances<O, D extends NumberDistance<D, ?>> extends Abs
/**
* Iterates over all points in the database.
+ *
+ * @param database Database to process
+ * @param relation Relation to process
+ * @return Distance matrix
*/
public CollectionResult<CTriple<DBID, DBID, Double>> run(Database database, Relation<O> relation) {
DistanceQuery<O, D> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
final int size = relation.size();
- Collection<CTriple<DBID, DBID, Double>> r = new ArrayList<CTriple<DBID, DBID, Double>>(size * (size + 1) / 2);
+ Collection<CTriple<DBID, DBID, Double>> r = new ArrayList<CTriple<DBID, DBID, Double>>(size * (size + 1) >> 1);
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
for(DBIDIter iditer2 = relation.iterDBIDs(); iditer2.valid(); iditer2.advance()) {
// skip inverted pairs
- if(iditer2.compareDBID(iditer) > 0) {
+ if(DBIDUtil.compare(iditer2, iditer) > 0) {
continue;
}
double d = distFunc.distance(iditer, iditer2).doubleValue();
- r.add(new CTriple<DBID, DBID, Double>(iditer.getDBID(), iditer2.getDBID(), d));
+ r.add(new CTriple<DBID, DBID, Double>(DBIDUtil.deref(iditer), DBIDUtil.deref(iditer2), d));
}
}
return new CollectionResult<CTriple<DBID, DBID, Double>>("Distance Matrix", "distance-matrix", r);
@@ -93,7 +99,7 @@ public class MaterializeDistances<O, D extends NumberDistance<D, ?>> extends Abs
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
@Override
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/NullAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/NullAlgorithm.java
index 490d79fb..abd4c963 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/NullAlgorithm.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/NullAlgorithm.java
@@ -43,7 +43,7 @@ public class NullAlgorithm extends AbstractAlgorithm<Result> {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(NullAlgorithm.class);
+ private static final Logging LOG = Logging.getLogger(NullAlgorithm.class);
/**
* Constructor.
@@ -59,7 +59,7 @@ public class NullAlgorithm extends AbstractAlgorithm<Result> {
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
@Override
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/benchmark/KNNBenchmarkAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/benchmark/KNNBenchmarkAlgorithm.java
new file mode 100644
index 00000000..b0ea8cc1
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/benchmark/KNNBenchmarkAlgorithm.java
@@ -0,0 +1,303 @@
+package de.lmu.ifi.dbs.elki.algorithm.benchmark;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDRange;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.datasource.DatabaseConnection;
+import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.math.MeanVariance;
+import de.lmu.ifi.dbs.elki.result.Result;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
+import de.lmu.ifi.dbs.elki.utilities.Util;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
+
+/**
+ * Benchmarking algorithm that computes the k nearest neighbors for each query
+ * point. The query points can either come from a separate data source, or from
+ * the original database.
+ *
+ * @author Erich Schubert
+ *
+ * @param <O> Object type
+ *
+ * @apiviz.uses KNNQuery
+ */
+public class KNNBenchmarkAlgorithm<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorithm<O, D, Result> {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging LOG = Logging.getLogger(KNNBenchmarkAlgorithm.class);
+
+ /**
+ * Number of neighbors to retrieve.
+ */
+ protected int k = 10;
+
+ /**
+ * The alternate query point source. Optional.
+ */
+ protected DatabaseConnection queries = null;
+
+ /**
+ * Sampling size.
+ */
+ protected double sampling = -1;
+
+ /**
+ * Random generator factory
+ */
+ protected RandomFactory random;
+
+ /**
+ * Constructor.
+ *
+ * @param distanceFunction Distance function to use
+ * @param k K parameter
+ * @param queries Query data set (may be null!)
+ * @param sampling Sampling rate
+ * @param random Random factory
+ */
+ public KNNBenchmarkAlgorithm(DistanceFunction<? super O, D> distanceFunction, int k, DatabaseConnection queries, double sampling, RandomFactory random) {
+ super(distanceFunction);
+ this.k = k;
+ this.queries = queries;
+ this.sampling = sampling;
+ this.random = random;
+ }
+
+ /**
+ * Run the algorithm.
+ *
+ * @param database Database
+ * @param relation Relation
+ * @return Null result
+ */
+ public Result run(Database database, Relation<O> relation) {
+ // Get a distance and kNN query instance.
+ DistanceQuery<O, D> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
+ KNNQuery<O, D> knnQuery = database.getKNNQuery(distQuery, 10);
+
+ // No query set - use original database.
+ if (queries == null) {
+ final DBIDs sample;
+ if (sampling <= 0) {
+ sample = relation.getDBIDs();
+ } else if (sampling < 1.1) {
+ int size = (int) Math.min(sampling * relation.size(), relation.size());
+ sample = DBIDUtil.randomSample(relation.getDBIDs(), size, random);
+ } else {
+ int size = (int) Math.min(sampling, relation.size());
+ sample = DBIDUtil.randomSample(relation.getDBIDs(), size, random);
+ }
+ FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
+ int hash = 0;
+ MeanVariance mv = new MeanVariance();
+ for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
+ KNNResult<D> knns = knnQuery.getKNNForDBID(iditer, k);
+ int ichecksum = 0;
+ for (DBIDIter it = knns.iter(); it.valid(); it.advance()) {
+ ichecksum += it.internalGetIndex();
+ }
+ hash = Util.mixHashCodes(hash, ichecksum);
+ mv.put(knns.size());
+ if (prog != null) {
+ prog.incrementProcessed(LOG);
+ }
+ }
+ if (prog != null) {
+ prog.ensureCompleted(LOG);
+ }
+ if (LOG.isVerbose()) {
+ LOG.verbose("Result hashcode: " + hash);
+ LOG.verbose("Mean number of results: "+mv.toString());
+ }
+ } else {
+ // Separate query set.
+ TypeInformation res = getDistanceFunction().getInputTypeRestriction();
+ MultipleObjectsBundle bundle = queries.loadData();
+ int col = -1;
+ for (int i = 0; i < bundle.metaLength(); i++) {
+ if (res.isAssignableFromType(bundle.meta(i))) {
+ col = i;
+ break;
+ }
+ }
+ if (col < 0) {
+ throw new AbortException("No compatible data type in query input was found. Expected: " + res.toString());
+ }
+ // Random sampling is a bit of hack, sorry.
+ // But currently, we don't (yet) have an "integer random sample" function.
+ DBIDRange sids = DBIDUtil.generateStaticDBIDRange(bundle.dataLength());
+
+ final DBIDs sample;
+ if (sampling <= 0) {
+ sample = sids;
+ } else if (sampling < 1.1) {
+ int size = (int) Math.min(sampling * relation.size(), relation.size());
+ sample = DBIDUtil.randomSample(sids, size, random);
+ } else {
+ int size = (int) Math.min(sampling, sids.size());
+ sample = DBIDUtil.randomSample(sids, size, random);
+ }
+ FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
+ int hash = 0;
+ MeanVariance mv = new MeanVariance();
+ for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
+ int off = sids.binarySearch(iditer);
+ assert (off >= 0);
+ @SuppressWarnings("unchecked")
+ O o = (O) bundle.data(off, col);
+ KNNResult<D> knns = knnQuery.getKNNForObject(o, k);
+ int ichecksum = 0;
+ for (DBIDIter it = knns.iter(); it.valid(); it.advance()) {
+ ichecksum += it.internalGetIndex();
+ }
+ hash = Util.mixHashCodes(hash, ichecksum);
+ mv.put(knns.size());
+ if (prog != null) {
+ prog.incrementProcessed(LOG);
+ }
+ if (LOG.isVerbose()) {
+ LOG.verbose("Result hashcode: " + hash);
+ LOG.verbose("Mean number of results: "+mv.toString());
+ }
+ }
+ if (prog != null) {
+ prog.ensureCompleted(LOG);
+ }
+ }
+ return null;
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(getDistanceFunction().getInputTypeRestriction());
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class
+ *
+ * @apiviz.exclude
+ *
+ * @author Erich Schubert
+ *
+ * @param <O> Object type
+ * @param <D> Distance type
+ */
+ public static class Parameterizer<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ /**
+ * Parameter for the number of neighbors.
+ */
+ public static final OptionID K_ID = new OptionID("knnbench.k", "Number of neighbors to retreive for kNN benchmarking.");
+
+ /**
+ * Parameter for the query dataset.
+ */
+ public static final OptionID QUERY_ID = new OptionID("knnbench.query", "Data source for the queries. If not set, the queries are taken from the database.");
+
+ /**
+ * Parameter for the sampling size.
+ */
+ public static final OptionID SAMPLING_ID = new OptionID("knnbench.sampling", "Sampling size parameter. If the value is less or equal 1, it is assumed to be the relative share. Larger values will be interpreted as integer sizes. By default, all data will be used.");
+
+ /**
+ * Parameter for the random generator
+ */
+ public static final OptionID RANDOM_ID = new OptionID("knnbench.random", "Random generator for sampling.");
+
+ /**
+ * K parameter
+ */
+ protected int k = 10;
+
+ /**
+ * The alternate query point source. Optional.
+ */
+ protected DatabaseConnection queries = null;
+
+ /**
+ * Sampling size.
+ */
+ protected double sampling = -1;
+
+ /**
+ * Random generator factory
+ */
+ protected RandomFactory random;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ IntParameter kP = new IntParameter(K_ID);
+ if (config.grab(kP)) {
+ k = kP.intValue();
+ }
+ ObjectParameter<DatabaseConnection> queryP = new ObjectParameter<DatabaseConnection>(QUERY_ID, DatabaseConnection.class);
+ queryP.setOptional(true);
+ if (config.grab(queryP)) {
+ queries = queryP.instantiateClass(config);
+ }
+ DoubleParameter samplingP = new DoubleParameter(SAMPLING_ID);
+ samplingP.setOptional(true);
+ if (config.grab(samplingP)) {
+ sampling = samplingP.doubleValue();
+ }
+ RandomParameter randomP = new RandomParameter(RANDOM_ID, RandomFactory.DEFAULT);
+ if (config.grab(randomP)) {
+ random = randomP.getValue();
+ }
+ }
+
+ @Override
+ protected KNNBenchmarkAlgorithm<O, D> makeInstance() {
+ return new KNNBenchmarkAlgorithm<O, D>(distanceFunction, k, queries, sampling, random);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/benchmark/RangeQueryBenchmarkAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/benchmark/RangeQueryBenchmarkAlgorithm.java
new file mode 100644
index 00000000..f483321d
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/benchmark/RangeQueryBenchmarkAlgorithm.java
@@ -0,0 +1,357 @@
+package de.lmu.ifi.dbs.elki.algorithm.benchmark;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.NumberVector.Factory;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDRange;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
+import de.lmu.ifi.dbs.elki.datasource.DatabaseConnection;
+import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.math.MeanVariance;
+import de.lmu.ifi.dbs.elki.result.Result;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
+import de.lmu.ifi.dbs.elki.utilities.Util;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
+
+/**
+ * Benchmarking algorithm that computes a range query for each point. The query
+ * points can either come from a separate data source, or from the original
+ * database. In the latter case, the database is expected to have an additional,
+ * 1-dimensional vector field. For the separate data source, the last dimension
+ * will be cut off and used as query radius.
+ *
+ * The simplest data setup clearly is to have an input file:
+ *
+ * <pre>
+ * x y z label
+ * 1 2 3 Example1
+ * 4 5 6 Example2
+ * 7 8 9 Example3
+ * </pre>
+ *
+ * and a query file:
+ *
+ * <pre>
+ * x y z radius
+ * 1 2 3 1.2
+ * 4 5 6 3.3
+ * 7 8 9 4.1
+ * </pre>
+ *
+ * where the additional column is the radius.
+ *
+ * Alternatively, if you work with a single file, you need to use the filter
+ * command <tt>-dbc.filter SplitNumberVectorFilter -split.dims 1,2,3</tt> to
+ * split the relation into a 3-dimensional data vector, and 1 dimensional radius
+ * vector.
+ *
+ * TODO: alternatively, allow using a fixed radius?
+ *
+ * @author Erich Schubert
+ *
+ * @param <O> Vector type
+ * @param <D> Distance type
+ *
+ * @apiviz.uses RangeQuery
+ */
+public class RangeQueryBenchmarkAlgorithm<O extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, Result> {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging LOG = Logging.getLogger(RangeQueryBenchmarkAlgorithm.class);
+
+ /**
+ * The alternate query point source. Optional.
+ */
+ protected DatabaseConnection queries = null;
+
+ /**
+ * Sampling size.
+ */
+ protected double sampling = -1;
+
+ /**
+ * Random generator factory
+ */
+ protected RandomFactory random;
+
+ /**
+ * Constructor.
+ *
+ * @param distanceFunction Distance function to use
+ * @param queries Query data set (may be null!)
+ * @param sampling Sampling rate
+ * @param random Random factory
+ */
+ public RangeQueryBenchmarkAlgorithm(DistanceFunction<? super O, D> distanceFunction, DatabaseConnection queries, double sampling, RandomFactory random) {
+ super(distanceFunction);
+ this.queries = queries;
+ this.sampling = sampling;
+ this.random = random;
+ }
+
+ /**
+ * Run the algorithm, with separate radius relation
+ *
+ * @param database Database
+ * @param relation Relation
+ * @param radrel Radius relation
+ * @return Null result
+ */
+ public Result run(Database database, Relation<O> relation, Relation<NumberVector<?>> radrel) {
+ if (queries != null) {
+ throw new AbortException("This 'run' method will not use the given query set!");
+ }
+ // Get a distance and kNN query instance.
+ DistanceQuery<O, D> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
+ RangeQuery<O, D> rangeQuery = database.getRangeQuery(distQuery);
+ D dfactory = distQuery.getDistanceFactory();
+
+ final DBIDs sample;
+ if (sampling <= 0) {
+ sample = relation.getDBIDs();
+ } else if (sampling < 1.1) {
+ int size = (int) Math.min(sampling * relation.size(), relation.size());
+ sample = DBIDUtil.randomSample(relation.getDBIDs(), size, random);
+ } else {
+ int size = (int) Math.min(sampling, relation.size());
+ sample = DBIDUtil.randomSample(relation.getDBIDs(), size, random);
+ }
+ FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
+ int hash = 0;
+ MeanVariance mv = new MeanVariance();
+ for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
+ D r = dfactory.fromDouble(radrel.get(iditer).doubleValue(0));
+ DistanceDBIDResult<D> rres = rangeQuery.getRangeForDBID(iditer, r);
+ int ichecksum = 0;
+ for (DBIDIter it = rres.iter(); it.valid(); it.advance()) {
+ ichecksum += it.internalGetIndex();
+ }
+ hash = Util.mixHashCodes(hash, ichecksum);
+ mv.put(rres.size());
+ if (prog != null) {
+ prog.incrementProcessed(LOG);
+ }
+ }
+ if (prog != null) {
+ prog.ensureCompleted(LOG);
+ }
+ if (LOG.isVerbose()) {
+ LOG.verbose("Result hashcode: " + hash);
+ LOG.verbose("Mean number of results: "+mv.toString());
+ }
+ return null;
+ }
+
+ /**
+ * Run the algorithm, with a separate query set.
+ *
+ * @param database Database
+ * @param relation Relation
+ * @return Null result
+ */
+ public Result run(Database database, Relation<O> relation) {
+ if (queries == null) {
+ throw new AbortException("A query set is required for this 'run' method.");
+ }
+ // Get a distance and kNN query instance.
+ DistanceQuery<O, D> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
+ RangeQuery<O, D> rangeQuery = database.getRangeQuery(distQuery);
+ D dfactory = distQuery.getDistanceFactory();
+ Factory<O, ?> ofactory = RelationUtil.getNumberVectorFactory(relation);
+ int dim = RelationUtil.dimensionality(relation);
+
+ // Separate query set.
+ TypeInformation res = new VectorFieldTypeInformation<NumberVector<?>>(NumberVector.class, dim + 1);
+ MultipleObjectsBundle bundle = queries.loadData();
+ int col = -1;
+ for (int i = 0; i < bundle.metaLength(); i++) {
+ if (res.isAssignableFromType(bundle.meta(i))) {
+ col = i;
+ break;
+ }
+ }
+ if (col < 0) {
+ StringBuilder buf = new StringBuilder();
+ buf.append("No compatible data type in query input was found. Expected: ");
+ buf.append(res.toString());
+ buf.append(" have: ");
+ for (int i = 0; i < bundle.metaLength(); i++) {
+ if (i > 0) {
+ buf.append(' ');
+ }
+ buf.append(bundle.meta(i).toString());
+ }
+ throw new AbortException(buf.toString());
+ }
+ // Random sampling is a bit of hack, sorry.
+ // But currently, we don't (yet) have an "integer random sample" function.
+ DBIDRange sids = DBIDUtil.generateStaticDBIDRange(bundle.dataLength());
+
+ final DBIDs sample;
+ if (sampling <= 0) {
+ sample = sids;
+ } else if (sampling < 1.1) {
+ int size = (int) Math.min(sampling * relation.size(), relation.size());
+ sample = DBIDUtil.randomSample(sids, size, random);
+ } else {
+ int size = (int) Math.min(sampling, sids.size());
+ sample = DBIDUtil.randomSample(sids, size, random);
+ }
+ FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
+ int hash = 0;
+ MeanVariance mv = new MeanVariance();
+ double[] buf = new double[dim];
+ for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
+ int off = sids.binarySearch(iditer);
+ assert (off >= 0);
+ NumberVector<?> o = (NumberVector<?>) bundle.data(off, col);
+ for (int i = 0; i < dim; i++) {
+ buf[i] = o.doubleValue(i);
+ }
+ O v = ofactory.newNumberVector(buf);
+ D r = dfactory.fromDouble(o.doubleValue(dim));
+ DistanceDBIDResult<D> rres = rangeQuery.getRangeForObject(v, r);
+ int ichecksum = 0;
+ for (DBIDIter it = rres.iter(); it.valid(); it.advance()) {
+ ichecksum += it.internalGetIndex();
+ }
+ hash = Util.mixHashCodes(hash, ichecksum);
+ mv.put(rres.size());
+ if (prog != null) {
+ prog.incrementProcessed(LOG);
+ }
+ }
+ if (prog != null) {
+ prog.ensureCompleted(LOG);
+ }
+ if (LOG.isVerbose()) {
+ LOG.verbose("Result hashcode: " + hash);
+ LOG.verbose("Mean number of results: "+mv.toString());
+ }
+ return null;
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ if (queries == null) {
+ return TypeUtil.array(getDistanceFunction().getInputTypeRestriction(), new VectorFieldTypeInformation<NumberVector<?>>(NumberVector.class, 1));
+ } else {
+ return TypeUtil.array(getDistanceFunction().getInputTypeRestriction());
+ }
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class
+ *
+ * @apiviz.exclude
+ *
+ * @author Erich Schubert
+ *
+ * @param <O> Object type
+ * @param <D> Distance type
+ */
+ public static class Parameterizer<O extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ /**
+ * Parameter for the query dataset.
+ */
+ public static final OptionID QUERY_ID = new OptionID("rangebench.query", "Data source for the queries. If not set, the queries are taken from the database.");
+
+ /**
+ * Parameter for the sampling size.
+ */
+ public static final OptionID SAMPLING_ID = new OptionID("rangebench.sampling", "Sampling size parameter. If the value is less or equal 1, it is assumed to be the relative share. Larger values will be interpreted as integer sizes. By default, all data will be used.");
+
+ /**
+ * Parameter for the random generator
+ */
+ public static final OptionID RANDOM_ID = new OptionID("rangebench.random", "Random generator for sampling.");
+
+ /**
+ * The alternate query point source. Optional.
+ */
+ protected DatabaseConnection queries = null;
+
+ /**
+ * Sampling size.
+ */
+ protected double sampling = -1;
+
+ /**
+ * Random generator factory
+ */
+ protected RandomFactory random;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ ObjectParameter<DatabaseConnection> queryP = new ObjectParameter<DatabaseConnection>(QUERY_ID, DatabaseConnection.class);
+ queryP.setOptional(true);
+ if (config.grab(queryP)) {
+ queries = queryP.instantiateClass(config);
+ }
+ DoubleParameter samplingP = new DoubleParameter(SAMPLING_ID);
+ samplingP.setOptional(true);
+ if (config.grab(samplingP)) {
+ sampling = samplingP.doubleValue();
+ }
+ RandomParameter randomP = new RandomParameter(RANDOM_ID, RandomFactory.DEFAULT);
+ if (config.grab(randomP)) {
+ random = randomP.getValue();
+ }
+ }
+
+ @Override
+ protected RangeQueryBenchmarkAlgorithm<O, D> makeInstance() {
+ return new RangeQueryBenchmarkAlgorithm<O, D>(distanceFunction, queries, sampling, random);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/benchmark/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/benchmark/package-info.java
new file mode 100644
index 00000000..6a98fa64
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/benchmark/package-info.java
@@ -0,0 +1,30 @@
+/**
+ * <p>Benchmarking pseudo algorithms.</p>
+ *
+ * The algorithms in this package are meant to be used in run time benchmarks,
+ * to evalute e.g. the performance of an index structure.
+ */
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package de.lmu.ifi.dbs.elki.algorithm.benchmark;
+
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedClustering.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedClustering.java
index 670a3f0f..05cc2b4f 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedClustering.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedClustering.java
@@ -48,49 +48,19 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
* @param <R> the result we return
* @param <V> the type of FeatureVector handled by this Algorithm
*/
-public abstract class AbstractProjectedClustering<R extends Clustering<?>, V extends NumberVector<V, ?>> extends AbstractAlgorithm<R> implements ClusteringAlgorithm<R> {
+public abstract class AbstractProjectedClustering<R extends Clustering<?>, V extends NumberVector<?>> extends AbstractAlgorithm<R> implements ClusteringAlgorithm<R> {
/**
- * Parameter to specify the number of clusters to find, must be an integer
- * greater than 0.
- * <p>
- * Key: {@code -projectedclustering.k}
- * </p>
- */
- public static final OptionID K_ID = OptionID.getOrCreateOptionID("projectedclustering.k", "The number of clusters to find.");
-
- /**
- * Parameter to specify the multiplier for the initial number of seeds, must
- * be an integer greater than 0.
- * <p>
- * Default value: {@code 30}
- * </p>
- * <p>
- * Key: {@code -projectedclustering.k_i}
- * </p>
- */
- public static final OptionID K_I_ID = OptionID.getOrCreateOptionID("projectedclustering.k_i", "The multiplier for the initial number of seeds.");
-
- /**
- * Parameter to specify the dimensionality of the clusters to find, must be an
- * integer greater than 0.
- * <p>
- * Key: {@code -projectedclustering.l}
- * </p>
- */
- public static final OptionID L_ID = OptionID.getOrCreateOptionID("projectedclustering.l", "The dimensionality of the clusters to find.");
-
- /**
- * Holds the value of {@link #K_ID}.
+ * Holds the value of {@link Parameterizer#K_ID}.
*/
protected int k;
/**
- * Holds the value of {@link #K_I_ID}.
+ * Holds the value of {@link Parameterizer#K_I_ID}.
*/
protected int k_i;
/**
- * Holds the value of {@link #L_ID}.
+ * Holds the value of {@link Parameterizer#L_ID}.
*/
protected int l;
@@ -138,7 +108,37 @@ public abstract class AbstractProjectedClustering<R extends Clustering<?>, V ext
*
* @apiviz.exclude
*/
- public static abstract class Parameterizer extends AbstractParameterizer {
+ public abstract static class Parameterizer extends AbstractParameterizer {
+ /**
+ * Parameter to specify the number of clusters to find, must be an integer
+ * greater than 0.
+ * <p>
+ * Key: {@code -projectedclustering.k}
+ * </p>
+ */
+ public static final OptionID K_ID = new OptionID("projectedclustering.k", "The number of clusters to find.");
+
+ /**
+ * Parameter to specify the multiplier for the initial number of seeds, must
+ * be an integer greater than 0.
+ * <p>
+ * Default value: {@code 30}
+ * </p>
+ * <p>
+ * Key: {@code -projectedclustering.k_i}
+ * </p>
+ */
+ public static final OptionID K_I_ID = new OptionID("projectedclustering.k_i", "The multiplier for the initial number of seeds.");
+
+ /**
+ * Parameter to specify the dimensionality of the clusters to find, must be
+ * an integer greater than 0.
+ * <p>
+ * Key: {@code -projectedclustering.l}
+ * </p>
+ */
+ public static final OptionID L_ID = new OptionID("projectedclustering.l", "The dimensionality of the clusters to find.");
+
protected int k;
protected int k_i;
@@ -151,8 +151,9 @@ public abstract class AbstractProjectedClustering<R extends Clustering<?>, V ext
* @param config Parameterization
*/
protected void configK(Parameterization config) {
- IntParameter kP = new IntParameter(K_ID, new GreaterConstraint(0));
- if(config.grab(kP)) {
+ IntParameter kP = new IntParameter(K_ID);
+ kP.addConstraint(new GreaterConstraint(0));
+ if (config.grab(kP)) {
k = kP.getValue();
}
}
@@ -163,8 +164,9 @@ public abstract class AbstractProjectedClustering<R extends Clustering<?>, V ext
* @param config Parameterization
*/
protected void configKI(Parameterization config) {
- IntParameter k_iP = new IntParameter(K_I_ID, new GreaterConstraint(0), 30);
- if(config.grab(k_iP)) {
+ IntParameter k_iP = new IntParameter(K_I_ID, 30);
+ k_iP.addConstraint(new GreaterConstraint(0));
+ if (config.grab(k_iP)) {
k_i = k_iP.getValue();
}
}
@@ -175,10 +177,11 @@ public abstract class AbstractProjectedClustering<R extends Clustering<?>, V ext
* @param config Parameterization
*/
protected void configL(Parameterization config) {
- IntParameter lP = new IntParameter(L_ID, new GreaterConstraint(0));
- if(config.grab(lP)) {
+ IntParameter lP = new IntParameter(L_ID);
+ lP.addConstraint(new GreaterConstraint(0));
+ if (config.grab(lP)) {
l = lP.getValue();
}
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedDBSCAN.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedDBSCAN.java
index 250cc70b..f8b73f48 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedDBSCAN.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedDBSCAN.java
@@ -38,15 +38,17 @@ import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDMIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.IndexBasedDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.LocallyWeightedDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
@@ -67,7 +69,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
* @author Arthur Zimek
* @param <V> the type of NumberVector handled by this Algorithm
*/
-public abstract class AbstractProjectedDBSCAN<R extends Clustering<Model>, V extends NumberVector<V, ?>> extends AbstractAlgorithm<R> implements ClusteringAlgorithm<R> {
+public abstract class AbstractProjectedDBSCAN<R extends Clustering<Model>, V extends NumberVector<?>> extends AbstractAlgorithm<R> implements ClusteringAlgorithm<R> {
/**
* Parameter to specify the distance function to determine the distance
* between database objects, must extend
@@ -81,12 +83,12 @@ public abstract class AbstractProjectedDBSCAN<R extends Clustering<Model>, V ext
* {@link de.lmu.ifi.dbs.elki.distance.distancefunction.LocallyWeightedDistanceFunction}
* </p>
*/
- public static final OptionID OUTER_DISTANCE_FUNCTION_ID = OptionID.getOrCreateOptionID("projdbscan.outerdistancefunction", "Distance function to determine the distance between database objects.");
+ public static final OptionID OUTER_DISTANCE_FUNCTION_ID = new OptionID("projdbscan.outerdistancefunction", "Distance function to determine the distance between database objects.");
/**
* Parameter distance function
*/
- public static final OptionID INNER_DISTANCE_FUNCTION_ID = OptionID.getOrCreateOptionID("projdbscan.distancefunction", "Distance function to determine the neighbors for variance analysis.");
+ public static final OptionID INNER_DISTANCE_FUNCTION_ID = new OptionID("projdbscan.distancefunction", "Distance function to determine the neighbors for variance analysis.");
/**
* Parameter to specify the maximum radius of the neighborhood to be
@@ -95,7 +97,7 @@ public abstract class AbstractProjectedDBSCAN<R extends Clustering<Model>, V ext
* Key: {@code -projdbscan.epsilon}
* </p>
*/
- public static final OptionID EPSILON_ID = OptionID.getOrCreateOptionID("projdbscan.epsilon", "The maximum radius of the neighborhood to be considered.");
+ public static final OptionID EPSILON_ID = new OptionID("projdbscan.epsilon", "The maximum radius of the neighborhood to be considered.");
/**
* Parameter to specify the intrinsic dimensionality of the clusters to find,
@@ -104,7 +106,7 @@ public abstract class AbstractProjectedDBSCAN<R extends Clustering<Model>, V ext
* Key: {@code -projdbscan.lambda}
* </p>
*/
- public static final OptionID LAMBDA_ID = OptionID.getOrCreateOptionID("projdbscan.lambda", "The intrinsic dimensionality of the clusters to find.");
+ public static final OptionID LAMBDA_ID = new OptionID("projdbscan.lambda", "The intrinsic dimensionality of the clusters to find.");
/**
* Parameter to specify the threshold for minimum number of points in the
@@ -113,7 +115,7 @@ public abstract class AbstractProjectedDBSCAN<R extends Clustering<Model>, V ext
* Key: {@code -projdbscan.minpts}
* </p>
*/
- public static final OptionID MINPTS_ID = OptionID.getOrCreateOptionID("projdbscan.minpts", "Threshold for minimum number of points in " + "the epsilon-neighborhood of a point.");
+ public static final OptionID MINPTS_ID = new OptionID("projdbscan.minpts", "Threshold for minimum number of points in " + "the epsilon-neighborhood of a point.");
/**
* Holds the instance of the distance function specified by
@@ -187,7 +189,7 @@ public abstract class AbstractProjectedDBSCAN<R extends Clustering<Model>, V ext
if(relation.size() >= minpts) {
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
if(!processedIDs.contains(iditer)) {
- expandCluster(distFunc, rangeQuery, iditer.getDBID(), objprog, clusprog);
+ expandCluster(distFunc, rangeQuery, DBIDUtil.deref(iditer), objprog, clusprog);
if(processedIDs.size() == relation.size() && noise.size() == 0) {
break;
}
@@ -277,9 +279,9 @@ public abstract class AbstractProjectedDBSCAN<R extends Clustering<Model>, V ext
}
// compute weighted epsilon neighborhood
- List<DistanceResultPair<DoubleDistance>> seeds = rangeQuery.getRangeForDBID(startObjectID, epsilon);
+ DistanceDBIDResult<DoubleDistance> neighbors = rangeQuery.getRangeForDBID(startObjectID, epsilon);
// neighbors < minPts -> noise
- if(seeds.size() < minpts) {
+ if(neighbors.size() < minpts) {
noise.add(startObjectID);
processedIDs.add(startObjectID);
if(objprog != null && clusprog != null) {
@@ -291,7 +293,8 @@ public abstract class AbstractProjectedDBSCAN<R extends Clustering<Model>, V ext
// try to expand the cluster
ModifiableDBIDs currentCluster = DBIDUtil.newArray();
- for(DistanceResultPair<DoubleDistance> seed : seeds) {
+ ModifiableDBIDs seeds = DBIDUtil.newHashSet();
+ for (DistanceDBIDResultIter<DoubleDistance> seed = neighbors.iter(); seed.valid(); seed.advance()) {
int nextID_corrDim = distFunc.getIndex().getLocalProjection(seed).getCorrelationDimension();
// nextID is not reachable from start object
if(nextID_corrDim > lambda) {
@@ -301,25 +304,27 @@ public abstract class AbstractProjectedDBSCAN<R extends Clustering<Model>, V ext
if(!processedIDs.contains(seed)) {
currentCluster.add(seed);
processedIDs.add(seed);
+ seeds.add(seed);
}
else if(noise.contains(seed)) {
currentCluster.add(seed);
noise.remove(seed);
}
}
- seeds.remove(0);
while(seeds.size() > 0) {
- DistanceResultPair<DoubleDistance> q = seeds.remove(0);
- int corrDim_q = distFunc.getIndex().getLocalProjection(q).getCorrelationDimension();
+ DBIDMIter iter = seeds.iter();
+ int corrDim_q = distFunc.getIndex().getLocalProjection(iter).getCorrelationDimension();
// q forms no lambda-dim hyperplane
if(corrDim_q > lambda) {
continue;
}
- List<DistanceResultPair<DoubleDistance>> reachables = rangeQuery.getRangeForDBID(q, epsilon);
+ DistanceDBIDResult<DoubleDistance> reachables = rangeQuery.getRangeForDBID(iter, epsilon);
+ iter.remove();
+
if(reachables.size() > minpts) {
- for(DistanceResultPair<DoubleDistance> r : reachables) {
+ for (DistanceDBIDResultIter<DoubleDistance> r = reachables.iter(); r.valid(); r.advance()) {
int corrDim_r = distFunc.getIndex().getLocalProjection(r).getCorrelationDimension();
// r is not reachable from q
if(corrDim_r > lambda) {
@@ -378,7 +383,7 @@ public abstract class AbstractProjectedDBSCAN<R extends Clustering<Model>, V ext
*
* @apiviz.exclude
*/
- public static abstract class Parameterizer<V extends NumberVector<V, ?>, D extends Distance<D>> extends AbstractParameterizer {
+ public abstract static class Parameterizer<V extends NumberVector<?>, D extends Distance<D>> extends AbstractParameterizer {
protected DistanceFunction<V, D> innerdist;
protected D epsilon;
@@ -405,7 +410,8 @@ public abstract class AbstractProjectedDBSCAN<R extends Clustering<Model>, V ext
}
protected void configMinPts(Parameterization config) {
- IntParameter minptsP = new IntParameter(MINPTS_ID, new GreaterConstraint(0));
+ IntParameter minptsP = new IntParameter(MINPTS_ID);
+ minptsP.addConstraint(new GreaterConstraint(0));
if(config.grab(minptsP)) {
minpts = minptsP.getValue();
}
@@ -428,7 +434,8 @@ public abstract class AbstractProjectedDBSCAN<R extends Clustering<Model>, V ext
}
protected void configLambda(Parameterization config) {
- IntParameter lambdaP = new IntParameter(LAMBDA_ID, new GreaterConstraint(0));
+ IntParameter lambdaP = new IntParameter(LAMBDA_ID);
+ lambdaP.addConstraint(new GreaterConstraint(0));
if(config.grab(lambdaP)) {
lambda = lambdaP.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/DBSCAN.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/DBSCAN.java
index 6bafa9e9..fcf81faa 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/DBSCAN.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/DBSCAN.java
@@ -34,14 +34,16 @@ import de.lmu.ifi.dbs.elki.data.model.Model;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.QueryUtil;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDMIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
@@ -77,13 +79,13 @@ public class DBSCAN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(DBSCAN.class);
+ private static final Logging LOG = Logging.getLogger(DBSCAN.class);
/**
* Parameter to specify the maximum radius of the neighborhood to be
* considered, must be suitable to the distance function specified.
*/
- public static final OptionID EPSILON_ID = OptionID.getOrCreateOptionID("dbscan.epsilon", "The maximum radius of the neighborhood to be considered.");
+ public static final OptionID EPSILON_ID = new OptionID("dbscan.epsilon", "The maximum radius of the neighborhood to be considered.");
/**
* Holds the value of {@link #EPSILON_ID}.
@@ -94,7 +96,7 @@ public class DBSCAN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
* Parameter to specify the threshold for minimum number of points in the
* epsilon-neighborhood of a point, must be an integer greater than 0.
*/
- public static final OptionID MINPTS_ID = OptionID.getOrCreateOptionID("dbscan.minpts", "Threshold for minimum number of points in the epsilon-neighborhood of a point.");
+ public static final OptionID MINPTS_ID = new OptionID("dbscan.minpts", "Threshold for minimum number of points in the epsilon-neighborhood of a point.");
/**
* Holds the value of {@link #MINPTS_ID}.
@@ -136,40 +138,36 @@ public class DBSCAN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
RangeQuery<O, D> rangeQuery = QueryUtil.getRangeQuery(relation, getDistanceFunction());
final int size = relation.size();
- FiniteProgress objprog = logger.isVerbose() ? new FiniteProgress("Processing objects", size, logger) : null;
- IndefiniteProgress clusprog = logger.isVerbose() ? new IndefiniteProgress("Number of clusters", logger) : null;
+ FiniteProgress objprog = LOG.isVerbose() ? new FiniteProgress("Processing objects", size, LOG) : null;
+ IndefiniteProgress clusprog = LOG.isVerbose() ? new IndefiniteProgress("Number of clusters", LOG) : null;
resultList = new ArrayList<ModifiableDBIDs>();
noise = DBIDUtil.newHashSet();
processedIDs = DBIDUtil.newHashSet(size);
- if(size >= minpts) {
+ if(size < minpts) {
+ // The can't be any clusters
+ noise.addDBIDs(relation.getDBIDs());
+ objprog.setProcessed(noise.size(), LOG);
+ }
+ else {
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
if(!processedIDs.contains(iditer)) {
- expandCluster(relation, rangeQuery, iditer.getDBID(), objprog, clusprog);
+ expandCluster(relation, rangeQuery, iditer, objprog, clusprog);
}
if(objprog != null && clusprog != null) {
- objprog.setProcessed(processedIDs.size(), logger);
- clusprog.setProcessed(resultList.size(), logger);
+ objprog.setProcessed(processedIDs.size(), LOG);
+ clusprog.setProcessed(resultList.size(), LOG);
}
if(processedIDs.size() == size) {
break;
}
}
}
- else {
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- noise.add(iditer);
- if(objprog != null && clusprog != null) {
- objprog.setProcessed(noise.size(), logger);
- clusprog.setProcessed(resultList.size(), logger);
- }
- }
- }
// Finish progress logging
if(objprog != null) {
- objprog.ensureCompleted(logger);
+ objprog.ensureCompleted(LOG);
}
if(clusprog != null) {
- clusprog.setCompleted(logger);
+ clusprog.setCompleted(LOG);
}
Clustering<Model> result = new Clustering<Model>("DBSCAN Clustering", "dbscan-clustering");
@@ -194,40 +192,43 @@ public class DBSCAN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
* @param startObjectID potential seed of a new potential cluster
* @param objprog the progress object for logging the current status
*/
- protected void expandCluster(Relation<O> relation, RangeQuery<O, D> rangeQuery, DBID startObjectID, FiniteProgress objprog, IndefiniteProgress clusprog) {
- List<DistanceResultPair<D>> seeds = rangeQuery.getRangeForDBID(startObjectID, epsilon);
+ protected void expandCluster(Relation<O> relation, RangeQuery<O, D> rangeQuery, DBIDRef startObjectID, FiniteProgress objprog, IndefiniteProgress clusprog) {
+ DistanceDBIDResult<D> neighbors = rangeQuery.getRangeForDBID(startObjectID, epsilon);
// startObject is no core-object
- if(seeds.size() < minpts) {
+ if(neighbors.size() < minpts) {
noise.add(startObjectID);
processedIDs.add(startObjectID);
if(objprog != null && clusprog != null) {
- objprog.setProcessed(processedIDs.size(), logger);
- clusprog.setProcessed(resultList.size(), logger);
+ objprog.setProcessed(processedIDs.size(), LOG);
+ clusprog.setProcessed(resultList.size(), LOG);
}
return;
}
// try to expand the cluster
+ HashSetModifiableDBIDs seeds = DBIDUtil.newHashSet();
ModifiableDBIDs currentCluster = DBIDUtil.newArray();
- for(DistanceResultPair<D> seed : seeds) {
+ for(DBIDIter seed = neighbors.iter(); seed.valid(); seed.advance()) {
if(!processedIDs.contains(seed)) {
currentCluster.add(seed);
processedIDs.add(seed);
+ seeds.add(seed);
}
else if(noise.contains(seed)) {
currentCluster.add(seed);
noise.remove(seed);
}
}
- seeds.remove(0);
+ seeds.remove(startObjectID);
while(seeds.size() > 0) {
- DistanceResultPair<D> o = seeds.remove(0);
- List<DistanceResultPair<D>> neighborhood = rangeQuery.getRangeForDBID(o, epsilon);
+ DBIDMIter o = seeds.iter();
+ DistanceDBIDResult<D> neighborhood = rangeQuery.getRangeForDBID(o, epsilon);
+ o.remove();
if(neighborhood.size() >= minpts) {
- for(DistanceResultPair<D> neighbor : neighborhood) {
+ for(DBIDIter neighbor = neighborhood.iter(); neighbor.valid(); neighbor.advance()) {
boolean inNoise = noise.contains(neighbor);
boolean unclassified = !processedIDs.contains(neighbor);
if(inNoise || unclassified) {
@@ -248,9 +249,9 @@ public class DBSCAN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
}
if(objprog != null && clusprog != null) {
- objprog.setProcessed(processedIDs.size(), logger);
+ objprog.setProcessed(processedIDs.size(), LOG);
int numClusters = currentCluster.size() > minpts ? resultList.size() + 1 : resultList.size();
- clusprog.setProcessed(numClusters, logger);
+ clusprog.setProcessed(numClusters, LOG);
}
}
if(currentCluster.size() >= minpts) {
@@ -270,7 +271,7 @@ public class DBSCAN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/DeLiClu.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/DeLiClu.java
index a0780e3d..22875715 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/DeLiClu.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/DeLiClu.java
@@ -36,10 +36,12 @@ import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.DistanceUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.SpatialPrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.index.tree.LeafEntry;
import de.lmu.ifi.dbs.elki.index.tree.TreeIndexPathComponent;
@@ -53,7 +55,6 @@ import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.result.ResultUtil;
import de.lmu.ifi.dbs.elki.result.optics.ClusterOrderResult;
-import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.KNNList;
import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.UpdatableHeap;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
@@ -83,17 +84,17 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@Title("DeliClu: Density-Based Hierarchical Clustering")
@Description("Hierachical algorithm to find density-connected sets in a database based on the parameter 'minpts'.")
@Reference(authors = "E. Achtert, C. Böhm, P. Kröger", title = "DeLiClu: Boosting Robustness, Completeness, Usability, and Efficiency of Hierarchical Clustering by a Closest Pair Ranking", booktitle = "Proc. 10th Pacific-Asia Conference on Knowledge Discovery and Data Mining (PAKDD 2006), Singapore, 2006", url = "http://dx.doi.org/10.1007/11731139_16")
-public class DeLiClu<NV extends NumberVector<NV, ?>, D extends Distance<D>> extends AbstractDistanceBasedAlgorithm<NV, D, ClusterOrderResult<D>> implements OPTICSTypeAlgorithm<D> {
+public class DeLiClu<NV extends NumberVector<?>, D extends Distance<D>> extends AbstractDistanceBasedAlgorithm<NV, D, ClusterOrderResult<D>> implements OPTICSTypeAlgorithm<D> {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(DeLiClu.class);
+ private static final Logging LOG = Logging.getLogger(DeLiClu.class);
/**
* Parameter to specify the threshold for minimum number of points within a
* cluster, must be an integer greater than 0.
*/
- public static final OptionID MINPTS_ID = OptionID.getOrCreateOptionID("deliclu.minpts", "Threshold for minimum number of points within a cluster.");
+ public static final OptionID MINPTS_ID = new OptionID("deliclu.minpts", "Threshold for minimum number of points within a cluster.");
/**
* The priority queue for the algorithm.
@@ -137,12 +138,12 @@ public class DeLiClu<NV extends NumberVector<NV, ?>, D extends Distance<D>> exte
SpatialPrimitiveDistanceFunction<NV, D> distFunction = (SpatialPrimitiveDistanceFunction<NV, D>) getDistanceFunction();
// first do the knn-Join
- if(logger.isVerbose()) {
- logger.verbose("knnJoin...");
+ if(LOG.isVerbose()) {
+ LOG.verbose("knnJoin...");
}
- DataStore<KNNList<D>> knns = knnJoin.run(database, relation);
+ DataStore<KNNResult<D>> knns = knnJoin.run(database, relation);
- FiniteProgress progress = logger.isVerbose() ? new FiniteProgress("DeLiClu", relation.size(), logger) : null;
+ FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("DeLiClu", relation.size(), LOG) : null;
final int size = relation.size();
ClusterOrderResult<D> clusterOrder = new ClusterOrderResult<D>("DeLiClu Clustering", "deliclu-clustering");
@@ -184,12 +185,12 @@ public class DeLiClu<NV extends NumberVector<NV, ?>, D extends Distance<D>> exte
reinsertExpanded(distFunction, index, path, knns);
if(progress != null) {
- progress.setProcessed(numHandled, logger);
+ progress.setProcessed(numHandled, LOG);
}
}
}
if(progress != null) {
- progress.ensureCompleted(logger);
+ progress.ensureCompleted(LOG);
}
return clusterOrder;
}
@@ -205,7 +206,7 @@ public class DeLiClu<NV extends NumberVector<NV, ?>, D extends Distance<D>> exte
if(!it.valid()) {
return null;
}
- return it.getDBID();
+ return DBIDUtil.deref(it);
}
/**
@@ -216,7 +217,7 @@ public class DeLiClu<NV extends NumberVector<NV, ?>, D extends Distance<D>> exte
* @param nodePair the pair of nodes to be expanded
* @param knns the knn list
*/
- private void expandNodes(DeLiCluTree index, SpatialPrimitiveDistanceFunction<NV, D> distFunction, SpatialObjectPair nodePair, DataStore<KNNList<D>> knns) {
+ private void expandNodes(DeLiCluTree index, SpatialPrimitiveDistanceFunction<NV, D> distFunction, SpatialObjectPair nodePair, DataStore<KNNResult<D>> knns) {
DeLiCluNode node1 = index.getNode(((SpatialDirectoryEntry) nodePair.entry1).getPageID());
DeLiCluNode node2 = index.getNode(((SpatialDirectoryEntry) nodePair.entry2).getPageID());
@@ -238,8 +239,8 @@ public class DeLiClu<NV extends NumberVector<NV, ?>, D extends Distance<D>> exte
* @param node2 the second node
*/
private void expandDirNodes(SpatialPrimitiveDistanceFunction<NV, D> distFunction, DeLiCluNode node1, DeLiCluNode node2) {
- if(logger.isDebuggingFinest()) {
- logger.debugFinest("ExpandDirNodes: " + node1.getPageID() + " + " + node2.getPageID());
+ if(LOG.isDebuggingFinest()) {
+ LOG.debugFinest("ExpandDirNodes: " + node1.getPageID() + " + " + node2.getPageID());
}
int numEntries_1 = node1.getNumEntries();
int numEntries_2 = node2.getNumEntries();
@@ -273,9 +274,9 @@ public class DeLiClu<NV extends NumberVector<NV, ?>, D extends Distance<D>> exte
* @param node2 the second node
* @param knns the knn list
*/
- private void expandLeafNodes(SpatialPrimitiveDistanceFunction<NV, D> distFunction, DeLiCluNode node1, DeLiCluNode node2, DataStore<KNNList<D>> knns) {
- if(logger.isDebuggingFinest()) {
- logger.debugFinest("ExpandLeafNodes: " + node1.getPageID() + " + " + node2.getPageID());
+ private void expandLeafNodes(SpatialPrimitiveDistanceFunction<NV, D> distFunction, DeLiCluNode node1, DeLiCluNode node2, DataStore<KNNResult<D>> knns) {
+ if(LOG.isDebuggingFinest()) {
+ LOG.debugFinest("ExpandLeafNodes: " + node1.getPageID() + " + " + node2.getPageID());
}
int numEntries_1 = node1.getNumEntries();
int numEntries_2 = node2.getNumEntries();
@@ -309,12 +310,12 @@ public class DeLiClu<NV extends NumberVector<NV, ?>, D extends Distance<D>> exte
* @param path the path of the object inserted last
* @param knns the knn list
*/
- private void reinsertExpanded(SpatialPrimitiveDistanceFunction<NV, D> distFunction, DeLiCluTree index, List<TreeIndexPathComponent<DeLiCluEntry>> path, DataStore<KNNList<D>> knns) {
+ private void reinsertExpanded(SpatialPrimitiveDistanceFunction<NV, D> distFunction, DeLiCluTree index, List<TreeIndexPathComponent<DeLiCluEntry>> path, DataStore<KNNResult<D>> knns) {
SpatialDirectoryEntry rootEntry = (SpatialDirectoryEntry) path.remove(0).getEntry();
reinsertExpanded(distFunction, index, path, 0, rootEntry, knns);
}
- private void reinsertExpanded(SpatialPrimitiveDistanceFunction<NV, D> distFunction, DeLiCluTree index, List<TreeIndexPathComponent<DeLiCluEntry>> path, int pos, SpatialDirectoryEntry parentEntry, DataStore<KNNList<D>> knns) {
+ private void reinsertExpanded(SpatialPrimitiveDistanceFunction<NV, D> distFunction, DeLiCluTree index, List<TreeIndexPathComponent<DeLiCluEntry>> path, int pos, SpatialDirectoryEntry parentEntry, DataStore<KNNResult<D>> knns) {
DeLiCluNode parentNode = index.getNode(parentEntry.getPageID());
SpatialEntry entry2 = path.get(pos).getEntry();
@@ -367,7 +368,7 @@ public class DeLiClu<NV extends NumberVector<NV, ?>, D extends Distance<D>> exte
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -487,7 +488,7 @@ public class DeLiClu<NV extends NumberVector<NV, ?>, D extends Distance<D>> exte
*
* @apiviz.exclude
*/
- public static class Parameterizer<NV extends NumberVector<NV, ?>, D extends Distance<D>> extends AbstractDistanceBasedAlgorithm.Parameterizer<NV, D> {
+ public static class Parameterizer<NV extends NumberVector<?>, D extends Distance<D>> extends AbstractDistanceBasedAlgorithm.Parameterizer<NV, D> {
protected int minpts = 0;
@Override
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/EM.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/EM.java
index 63ebbabb..514e63bd 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/EM.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/EM.java
@@ -27,6 +27,7 @@ import java.util.ArrayList;
import java.util.List;
import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeans;
import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeansInitialization;
import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.RandomlyGeneratedInitialMeans;
import de.lmu.ifi.dbs.elki.data.Cluster;
@@ -44,12 +45,12 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.MathUtil;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.FormatUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
@@ -86,11 +87,11 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
@Title("EM-Clustering: Clustering by Expectation Maximization")
@Description("Provides k Gaussian mixtures maximizing the probability of the given data")
@Reference(authors = "A. P. Dempster, N. M. Laird, D. B. Rubin", title = "Maximum Likelihood from Incomplete Data via the EM algorithm", booktitle = "Journal of the Royal Statistical Society, Series B, 39(1), 1977, pp. 1-31", url = "http://www.jstor.org/stable/2984875")
-public class EM<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clustering<EMModel<V>>> implements ClusteringAlgorithm<Clustering<EMModel<V>>> {
+public class EM<V extends NumberVector<?>> extends AbstractAlgorithm<Clustering<EMModel<V>>> implements ClusteringAlgorithm<Clustering<EMModel<V>>> {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(EM.class);
+ private static final Logging LOG = Logging.getLogger(EM.class);
/**
* Small value to increment diagonally of a matrix in order to avoid
@@ -102,7 +103,7 @@ public class EM<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clusteri
* Parameter to specify the number of clusters to find, must be an integer
* greater than 0.
*/
- public static final OptionID K_ID = OptionID.getOrCreateOptionID("em.k", "The number of clusters to find.");
+ public static final OptionID K_ID = new OptionID("em.k", "The number of clusters to find.");
/**
* Holds the value of {@link #K_ID}.
@@ -113,12 +114,12 @@ public class EM<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clusteri
* Parameter to specify the termination criterion for maximization of E(M):
* E(M) - E(M') < em.delta, must be a double equal to or greater than 0.
*/
- public static final OptionID DELTA_ID = OptionID.getOrCreateOptionID("em.delta", "The termination criterion for maximization of E(M): " + "E(M) - E(M') < em.delta");
+ public static final OptionID DELTA_ID = new OptionID("em.delta", "The termination criterion for maximization of E(M): " + "E(M) - E(M') < em.delta");
/**
* Parameter to specify the initialization method
*/
- public static final OptionID INIT_ID = OptionID.getOrCreateOptionID("kmeans.initialization", "Method to choose the initial means.");
+ public static final OptionID INIT_ID = new OptionID("kmeans.initialization", "Method to choose the initial means.");
private static final double MIN_LOGLIKELIHOOD = -100000;
@@ -138,17 +139,24 @@ public class EM<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clusteri
private KMeansInitialization<V> initializer;
/**
+ * Maximum number of iterations to allow
+ */
+ private int maxiter;
+
+ /**
* Constructor.
*
* @param k k parameter
* @param delta delta parameter
* @param initializer Class to choose the initial means
+ * @param maxiter Maximum number of iterations
*/
- public EM(int k, double delta, KMeansInitialization<V> initializer) {
+ public EM(int k, double delta, KMeansInitialization<V> initializer, int maxiter) {
super();
this.k = k;
this.delta = delta;
this.initializer = initializer;
+ this.maxiter = maxiter;
}
/**
@@ -164,15 +172,15 @@ public class EM<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clusteri
* @return Result
*/
public Clustering<EMModel<V>> run(Database database, Relation<V> relation) {
- if(relation.size() == 0) {
+ if (relation.size() == 0) {
throw new IllegalArgumentException("database empty: must contain elements");
}
// initial models
- if(logger.isVerbose()) {
- logger.verbose("initializing " + k + " models");
+ if (LOG.isVerbose()) {
+ LOG.verbose("initializing " + k + " models");
}
List<Vector> means = new ArrayList<Vector>();
- for(NumberVector<?, ?> nv : initializer.chooseInitialMeans(relation, k, EuclideanDistanceFunction.STATIC)) {
+ for (NumberVector<?> nv : initializer.chooseInitialMeans(relation, k, EuclideanDistanceFunction.STATIC)) {
means.add(nv.getColumnVector());
}
List<Matrix> covarianceMatrices = new ArrayList<Matrix>(k);
@@ -182,113 +190,117 @@ public class EM<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clusteri
probClusterIGivenX = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_SORTED, double[].class);
final int dimensionality = means.get(0).getDimensionality();
- for(int i = 0; i < k; i++) {
+ for (int i = 0; i < k; i++) {
Matrix m = Matrix.identity(dimensionality, dimensionality);
covarianceMatrices.add(m);
normDistrFactor[i] = 1.0 / Math.sqrt(Math.pow(MathUtil.TWOPI, dimensionality) * m.det());
invCovMatr.add(m.inverse());
clusterWeights[i] = 1.0 / k;
- if(logger.isDebuggingFinest()) {
- StringBuffer msg = new StringBuffer();
+ if (LOG.isDebuggingFinest()) {
+ StringBuilder msg = new StringBuilder();
msg.append(" model ").append(i).append(":\n");
- msg.append(" mean: ").append(means.get(i)).append("\n");
- msg.append(" m:\n").append(FormatUtil.format(m, " ")).append("\n");
- msg.append(" m.det(): ").append(m.det()).append("\n");
- msg.append(" cluster weight: ").append(clusterWeights[i]).append("\n");
- msg.append(" normDistFact: ").append(normDistrFactor[i]).append("\n");
- logger.debugFine(msg.toString());
+ msg.append(" mean: ").append(means.get(i)).append('\n');
+ msg.append(" m:\n").append(FormatUtil.format(m, " ")).append('\n');
+ msg.append(" m.det(): ").append(m.det()).append('\n');
+ msg.append(" cluster weight: ").append(clusterWeights[i]).append('\n');
+ msg.append(" normDistFact: ").append(normDistrFactor[i]).append('\n');
+ LOG.debugFine(msg.toString());
}
}
double emNew = assignProbabilitiesToInstances(relation, normDistrFactor, means, invCovMatr, clusterWeights, probClusterIGivenX);
// iteration unless no change
- if(logger.isVerbose()) {
- logger.verbose("iterating EM");
+ if (LOG.isVerbose()) {
+ LOG.verbose("iterating EM");
+ }
+ if (LOG.isVerbose()) {
+ LOG.verbose("iteration " + 0 + " - expectation value: " + emNew);
}
double em;
- int it = 0;
- do {
- it++;
- if(logger.isVerbose()) {
- logger.verbose("iteration " + it + " - expectation value: " + emNew);
- }
+ for (int it = 1; it <= maxiter || maxiter < 0; it++) {
em = emNew;
// recompute models
List<Vector> meanSums = new ArrayList<Vector>(k);
double[] sumOfClusterProbabilities = new double[k];
- for(int i = 0; i < k; i++) {
+ for (int i = 0; i < k; i++) {
clusterWeights[i] = 0.0;
meanSums.add(new Vector(dimensionality));
covarianceMatrices.set(i, Matrix.zeroMatrix(dimensionality));
}
// weights and means
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double[] clusterProbabilities = probClusterIGivenX.get(iditer);
- for(int i = 0; i < k; i++) {
+ for (int i = 0; i < k; i++) {
sumOfClusterProbabilities[i] += clusterProbabilities[i];
Vector summand = relation.get(iditer).getColumnVector().timesEquals(clusterProbabilities[i]);
meanSums.get(i).plusEquals(summand);
}
}
final int n = relation.size();
- for(int i = 0; i < k; i++) {
+ for (int i = 0; i < k; i++) {
clusterWeights[i] = sumOfClusterProbabilities[i] / n;
Vector newMean = meanSums.get(i).timesEquals(1 / sumOfClusterProbabilities[i]);
means.set(i, newMean);
}
// covariance matrices
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double[] clusterProbabilities = probClusterIGivenX.get(iditer);
Vector instance = relation.get(iditer).getColumnVector();
- for(int i = 0; i < k; i++) {
+ for (int i = 0; i < k; i++) {
Vector difference = instance.minus(means.get(i));
covarianceMatrices.get(i).plusEquals(difference.timesTranspose(difference).timesEquals(clusterProbabilities[i]));
}
}
- for(int i = 0; i < k; i++) {
+ for (int i = 0; i < k; i++) {
covarianceMatrices.set(i, covarianceMatrices.get(i).times(1 / sumOfClusterProbabilities[i]).cheatToAvoidSingularity(SINGULARITY_CHEAT));
}
- for(int i = 0; i < k; i++) {
+ for (int i = 0; i < k; i++) {
normDistrFactor[i] = 1.0 / Math.sqrt(Math.pow(MathUtil.TWOPI, dimensionality) * covarianceMatrices.get(i).det());
invCovMatr.set(i, covarianceMatrices.get(i).inverse());
}
// reassign probabilities
emNew = assignProbabilitiesToInstances(relation, normDistrFactor, means, invCovMatr, clusterWeights, probClusterIGivenX);
+
+ if (LOG.isVerbose()) {
+ LOG.verbose("iteration " + it + " - expectation value: " + emNew);
+ }
+ if (Math.abs(em - emNew) <= delta) {
+ break;
+ }
}
- while(Math.abs(em - emNew) > delta);
- if(logger.isVerbose()) {
- logger.verbose("assigning clusters");
+ if (LOG.isVerbose()) {
+ LOG.verbose("assigning clusters");
}
// fill result with clusters and models
List<ModifiableDBIDs> hardClusters = new ArrayList<ModifiableDBIDs>(k);
- for(int i = 0; i < k; i++) {
+ for (int i = 0; i < k; i++) {
hardClusters.add(DBIDUtil.newHashSet());
}
// provide a hard clustering
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double[] clusterProbabilities = probClusterIGivenX.get(iditer);
int maxIndex = 0;
double currentMax = 0.0;
- for(int i = 0; i < k; i++) {
- if(clusterProbabilities[i] > currentMax) {
+ for (int i = 0; i < k; i++) {
+ if (clusterProbabilities[i] > currentMax) {
maxIndex = i;
currentMax = clusterProbabilities[i];
}
}
hardClusters.get(maxIndex).add(iditer);
}
- final V factory = DatabaseUtil.assumeVectorField(relation).getFactory();
+ final NumberVector.Factory<V, ?> factory = RelationUtil.getNumberVectorFactory(relation);
Clustering<EMModel<V>> result = new Clustering<EMModel<V>>("EM Clustering", "em-clustering");
// provide models within the result
- for(int i = 0; i < k; i++) {
+ for (int i = 0; i < k; i++) {
// TODO: re-do labeling.
// SimpleClassLabel label = new SimpleClassLabel();
// label.init(result.canonicalClusterLabel(i));
@@ -316,37 +328,36 @@ public class EM<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clusteri
protected double assignProbabilitiesToInstances(Relation<V> database, double[] normDistrFactor, List<Vector> means, List<Matrix> invCovMatr, double[] clusterWeights, WritableDataStore<double[]> probClusterIGivenX) {
double emSum = 0.0;
- for(DBIDIter iditer = database.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for (DBIDIter iditer = database.iterDBIDs(); iditer.valid(); iditer.advance()) {
Vector x = database.get(iditer).getColumnVector();
double[] probabilities = new double[k];
- for(int i = 0; i < k; i++) {
+ for (int i = 0; i < k; i++) {
Vector difference = x.minus(means.get(i));
double rowTimesCovTimesCol = difference.transposeTimesTimes(invCovMatr.get(i), difference);
double power = rowTimesCovTimesCol / 2.0;
double prob = normDistrFactor[i] * Math.exp(-power);
- if(logger.isDebuggingFinest()) {
- logger.debugFinest(" difference vector= ( " + difference.toString() + " )\n" + " difference:\n" + FormatUtil.format(difference, " ") + "\n" + " rowTimesCovTimesCol:\n" + rowTimesCovTimesCol + "\n" + " power= " + power + "\n" + " prob=" + prob + "\n" + " inv cov matrix: \n" + FormatUtil.format(invCovMatr.get(i), " "));
+ if (LOG.isDebuggingFinest()) {
+ LOG.debugFinest(" difference vector= ( " + difference.toString() + " )\n" + " difference:\n" + FormatUtil.format(difference, " ") + "\n" + " rowTimesCovTimesCol:\n" + rowTimesCovTimesCol + "\n" + " power= " + power + "\n" + " prob=" + prob + "\n" + " inv cov matrix: \n" + FormatUtil.format(invCovMatr.get(i), " "));
}
probabilities[i] = prob;
}
double priorProbability = 0.0;
- for(int i = 0; i < k; i++) {
+ for (int i = 0; i < k; i++) {
priorProbability += probabilities[i] * clusterWeights[i];
}
double logP = Math.max(Math.log(priorProbability), MIN_LOGLIKELIHOOD);
- if(!Double.isNaN(logP)) {
+ if (!Double.isNaN(logP)) {
emSum += logP;
}
double[] clusterProbabilities = new double[k];
- for(int i = 0; i < k; i++) {
+ for (int i = 0; i < k; i++) {
assert (priorProbability >= 0.0);
assert (clusterWeights[i] >= 0.0);
// do not divide by zero!
- if(priorProbability == 0.0) {
+ if (priorProbability == 0.0) {
clusterProbabilities[i] = 0.0;
- }
- else {
+ } else {
clusterProbabilities[i] = probabilities[i] / priorProbability * clusterWeights[i];
}
}
@@ -373,7 +384,7 @@ public class EM<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clusteri
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -383,35 +394,46 @@ public class EM<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clusteri
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractParameterizer {
+ public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
protected int k;
protected double delta;
protected KMeansInitialization<V> initializer;
+ protected int maxiter = -1;
+
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- IntParameter kP = new IntParameter(K_ID, new GreaterConstraint(0));
- if(config.grab(kP)) {
+ IntParameter kP = new IntParameter(K_ID);
+ kP.addConstraint(new GreaterConstraint(0));
+ if (config.grab(kP)) {
k = kP.getValue();
}
ObjectParameter<KMeansInitialization<V>> initialP = new ObjectParameter<KMeansInitialization<V>>(INIT_ID, KMeansInitialization.class, RandomlyGeneratedInitialMeans.class);
- if(config.grab(initialP)) {
+ if (config.grab(initialP)) {
initializer = initialP.instantiateClass(config);
}
- DoubleParameter deltaP = new DoubleParameter(DELTA_ID, new GreaterEqualConstraint(0.0), 0.0);
- if(config.grab(deltaP)) {
+ DoubleParameter deltaP = new DoubleParameter(DELTA_ID, 0.0);
+ deltaP.addConstraint(new GreaterEqualConstraint(0.0));
+ if (config.grab(deltaP)) {
delta = deltaP.getValue();
}
+
+ IntParameter maxiterP = new IntParameter(KMeans.MAXITER_ID);
+ maxiterP.addConstraint(new GreaterEqualConstraint(0));
+ maxiterP.setOptional(true);
+ if (config.grab(maxiterP)) {
+ maxiter = maxiterP.getValue();
+ }
}
@Override
protected EM<V> makeInstance() {
- return new EM<V>(k, delta, initializer);
+ return new EM<V>(k, delta, initializer, maxiter);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/NaiveMeanShiftClustering.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/NaiveMeanShiftClustering.java
new file mode 100644
index 00000000..8429d8ac
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/NaiveMeanShiftClustering.java
@@ -0,0 +1,279 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+import java.util.ArrayList;
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.data.Cluster;
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.model.MeanModel;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid;
+import de.lmu.ifi.dbs.elki.math.statistics.EpanechnikovKernelDensityFunction;
+import de.lmu.ifi.dbs.elki.math.statistics.KernelDensityFunction;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DistanceParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
+
+/**
+ * Mean-shift based clustering algorithm. Naive implementation: there does not
+ * seem to be "the" mean-shift clustering algorithm, but it is a general
+ * concept. For the naive implementation, mean-shift is applied to all objects
+ * until they converge to other. This implementation is quite naive, and various
+ * optimizations can be made.
+ *
+ * It also is not really parameter-free: the kernel needs to be specified,
+ * including a radius/bandwidth.
+ *
+ * By using range queries, the algorithm does benefit from index structures!
+ *
+ * TODO: add methods to automatically choose the bandwidth?
+ *
+ * <p>
+ * Reference:<br />
+ * Y. Cheng<br />
+ * Mean shift, mode seeking, and clustering<br />
+ * IEEE Transactions on Pattern Analysis and Machine Intelligence 17-8
+ * </p>
+ *
+ * @author Erich Schubert
+ *
+ * @param <V> Vector type
+ * @param <D> Distance type
+ */
+@Reference(authors = "Y. Cheng", title = "Mean shift, mode seeking, and clustering", booktitle = "IEEE Transactions on Pattern Analysis and Machine Intelligence 17-8", url = "http://dx.doi.org/10.1109/34.400568")
+public class NaiveMeanShiftClustering<V extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<V, D, Clustering<MeanModel<V>>> implements ClusteringAlgorithm<Clustering<MeanModel<V>>> {
+ /**
+ * Class logger.
+ */
+ private static final Logging LOG = Logging.getLogger(NaiveMeanShiftClustering.class);
+
+ /**
+ * Density estimation kernel.
+ */
+ KernelDensityFunction kernel = EpanechnikovKernelDensityFunction.KERNEL;
+
+ /**
+ * Range of the kernel.
+ */
+ D range;
+
+ /**
+ * Maximum number of iterations.
+ */
+ static final int MAXITER = 1000;
+
+ /**
+ * Constructor.
+ *
+ * @param distanceFunction Distance function
+ * @param kernel Kernel function
+ * @param range Kernel radius
+ */
+ public NaiveMeanShiftClustering(DistanceFunction<? super V, D> distanceFunction, KernelDensityFunction kernel, D range) {
+ super(distanceFunction);
+ this.kernel = kernel;
+ this.range = range;
+ }
+
+ /**
+ * Run the mean-shift clustering algorithm.
+ *
+ * @param database Database
+ * @param relation Data relation
+ * @return Clustering result
+ */
+ public Clustering<MeanModel<V>> run(Database database, Relation<V> relation) {
+ final DistanceQuery<V, D> distq = database.getDistanceQuery(relation, getDistanceFunction());
+ final RangeQuery<V, D> rangeq = database.getRangeQuery(distq);
+ final int dim = RelationUtil.dimensionality(relation);
+
+ // Kernel bandwidth, for normalization
+ final double bandwidth = range.doubleValue();
+ // Stopping threshold
+ final double threshold = bandwidth * 1E-10;
+
+ // Result store:
+ ArrayList<Pair<V, ModifiableDBIDs>> clusters = new ArrayList<Pair<V, ModifiableDBIDs>>();
+
+ ModifiableDBIDs noise = DBIDUtil.newArray();
+
+ FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Mean-shift clustering", relation.size(), LOG) : null;
+
+ for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
+ // Initial position:
+ V position = relation.get(iter);
+ iterations: for (int j = 1; j <= MAXITER; j++) {
+ // Compute new position:
+ V newvec = null;
+ {
+ DistanceDBIDResult<D> neigh = rangeq.getRangeForObject(position, range);
+ boolean okay = (neigh.size() > 1) || (neigh.size() >= 1 && j > 1);
+ if (okay) {
+ Centroid newpos = new Centroid(dim);
+ for (DistanceDBIDResultIter<D> niter = neigh.iter(); niter.valid(); niter.advance()) {
+ final double weight = kernel.density(niter.getDistance().doubleValue() / bandwidth);
+ newpos.put(relation.get(niter), weight);
+ }
+ newvec = newpos.toVector(relation);
+ // TODO: detect 0 weight!
+ }
+ if (!okay) {
+ noise.add(iter);
+ break iterations;
+ }
+ }
+ // Test if we are close to one of the known clusters:
+ double bestd = Double.POSITIVE_INFINITY;
+ Pair<V, ModifiableDBIDs> bestp = null;
+ for (Pair<V, ModifiableDBIDs> pair : clusters) {
+ final double merged = distq.distance(newvec, pair.first).doubleValue();
+ if (merged < bestd) {
+ bestd = merged;
+ bestp = pair;
+ }
+ }
+ // Check for convergence:
+ D delta = distq.distance(position, newvec);
+ if (bestd < 10 * threshold || bestd * 2 < delta.doubleValue()) {
+ bestp.second.add(iter);
+ break iterations;
+ }
+ if (j == MAXITER) {
+ LOG.warning("No convergence after " + MAXITER + " iterations. Distance: " + delta.toString());
+ }
+ if (Double.isNaN(delta.doubleValue())) {
+ LOG.warning("Encountered NaN distance. Invalid center vector? " + newvec.toString());
+ break iterations;
+ }
+ if (j == MAXITER || delta.doubleValue() < threshold) {
+ if (LOG.isDebuggingFine()) {
+ LOG.debugFine("New cluster:" + newvec + " delta: " + delta + " threshold: " + threshold + " bestd: " + bestd);
+ }
+ ArrayModifiableDBIDs cids = DBIDUtil.newArray();
+ cids.add(iter);
+ clusters.add(new Pair<V, ModifiableDBIDs>(newvec, cids));
+ break iterations;
+ }
+ position = newvec;
+ }
+ if (prog != null) {
+ prog.incrementProcessed(LOG);
+ }
+ }
+ if (prog != null) {
+ prog.ensureCompleted(LOG);
+ }
+
+ ArrayList<Cluster<MeanModel<V>>> cs = new ArrayList<Cluster<MeanModel<V>>>(clusters.size());
+ for (Pair<V, ModifiableDBIDs> pair : clusters) {
+ cs.add(new Cluster<MeanModel<V>>(pair.second, new MeanModel<V>(pair.first)));
+ }
+ if (noise.size() > 0) {
+ cs.add(new Cluster<MeanModel<V>>(noise, true));
+ }
+ Clustering<MeanModel<V>> c = new Clustering<MeanModel<V>>("Mean-shift Clustering", "mean-shift-clustering", cs);
+ return c;
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(TypeUtil.NUMBER_VECTOR_FIELD);
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterizer.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ *
+ * @param <V> Vector type
+ * @param <D> Distance type
+ */
+ public static class Parameterizer<V extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<V, D> {
+ /**
+ * Parameter for kernel function.
+ */
+ public static final OptionID KERNEL_ID = new OptionID("meanshift.kernel", "Kernel function to use with mean-shift clustering.");
+
+ /**
+ * Parameter for kernel radius/range/bandwidth.
+ */
+ public static final OptionID RANGE_ID = new OptionID("meanshift.kernel-bandwidth", "Range of the kernel to use (aka: radius, bandwidth).");
+
+ /**
+ * Kernel function.
+ */
+ KernelDensityFunction kernel = EpanechnikovKernelDensityFunction.KERNEL;
+
+ /**
+ * Kernel radius.
+ */
+ D range;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ ObjectParameter<KernelDensityFunction> kernelP = new ObjectParameter<KernelDensityFunction>(KERNEL_ID, KernelDensityFunction.class, EpanechnikovKernelDensityFunction.class);
+ if (config.grab(kernelP)) {
+ kernel = kernelP.instantiateClass(config);
+ }
+ DistanceParameter<D> rangeP = new DistanceParameter<D>(RANGE_ID, distanceFunction);
+ if (config.grab(rangeP)) {
+ range = rangeP.getValue();
+ }
+ }
+
+ @Override
+ protected NaiveMeanShiftClustering<V, D> makeInstance() {
+ return new NaiveMeanShiftClustering<V, D>(distanceFunction, kernel, range);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICS.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICS.java
index 04b57081..2c098dc0 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICS.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICS.java
@@ -23,8 +23,6 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import java.util.List;
-
import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
@@ -33,14 +31,17 @@ import de.lmu.ifi.dbs.elki.database.QueryUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DistanceDBIDPair;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDistanceDBIDPair;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
-import de.lmu.ifi.dbs.elki.database.query.DoubleDistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.DistanceUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceDBIDResultIter;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
@@ -78,19 +79,19 @@ public class OPTICS<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(OPTICS.class);
+ private static final Logging LOG = Logging.getLogger(OPTICS.class);
/**
* Parameter to specify the maximum radius of the neighborhood to be
* considered, must be suitable to the distance function specified.
*/
- public static final OptionID EPSILON_ID = OptionID.getOrCreateOptionID("optics.epsilon", "The maximum radius of the neighborhood to be considered.");
+ public static final OptionID EPSILON_ID = new OptionID("optics.epsilon", "The maximum radius of the neighborhood to be considered.");
/**
* Parameter to specify the threshold for minimum number of points in the
* epsilon-neighborhood of a point, must be an integer greater than 0.
*/
- public static final OptionID MINPTS_ID = OptionID.getOrCreateOptionID("optics.minpts", "Threshold for minimum number of points in the epsilon-neighborhood of a point.");
+ public static final OptionID MINPTS_ID = new OptionID("optics.minpts", "Threshold for minimum number of points in the epsilon-neighborhood of a point.");
/**
* Hold the value of {@link #EPSILON_ID}.
@@ -135,7 +136,7 @@ public class OPTICS<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
RangeQuery<O, D> rangeQuery = QueryUtil.getRangeQuery(relation, getDistanceFunction(), epsilon);
int size = relation.size();
- final FiniteProgress progress = logger.isVerbose() ? new FiniteProgress("OPTICS", size, logger) : null;
+ final FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("OPTICS", size, LOG) : null;
processedIDs = DBIDUtil.newHashSet(size);
ClusterOrderResult<D> clusterOrder = new ClusterOrderResult<D>("OPTICS Clusterorder", "optics-clusterorder");
@@ -151,19 +152,19 @@ public class OPTICS<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
@SuppressWarnings("unchecked")
final RangeQuery<O, DoubleDistance> doubleRangeQuery = RangeQuery.class.cast(rangeQuery);
final DoubleDistance depsilon = DoubleDistance.class.cast(epsilon);
- expandClusterOrderDouble(doubleClusterOrder, database, doubleRangeQuery, iditer.getDBID(), depsilon, progress);
+ expandClusterOrderDouble(doubleClusterOrder, database, doubleRangeQuery, DBIDUtil.deref(iditer), depsilon, progress);
}
}
}
else {
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
if(!processedIDs.contains(iditer)) {
- expandClusterOrder(clusterOrder, database, rangeQuery, iditer.getDBID(), epsilon, progress);
+ expandClusterOrder(clusterOrder, database, rangeQuery, DBIDUtil.deref(iditer), epsilon, progress);
}
}
}
if(progress != null) {
- progress.ensureCompleted(logger);
+ progress.ensureCompleted(LOG);
}
return clusterOrder;
@@ -189,21 +190,21 @@ public class OPTICS<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
clusterOrder.add(current);
processedIDs.add(current.getID());
- List<DistanceResultPair<D>> neighbors = rangeQuery.getRangeForDBID(current.getID(), epsilon);
+ DistanceDBIDResult<D> neighbors = rangeQuery.getRangeForDBID(current.getID(), epsilon);
if(neighbors.size() >= minpts) {
- final DistanceResultPair<D> last = neighbors.get(minpts - 1);
+ final DistanceDBIDPair<D> last = neighbors.get(minpts - 1);
D coreDistance = last.getDistance();
- for(DistanceResultPair<D> neighbor : neighbors) {
+ for(DistanceDBIDResultIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
if(processedIDs.contains(neighbor)) {
continue;
}
D reachability = DistanceUtil.max(neighbor.getDistance(), coreDistance);
- heap.add(new GenericClusterOrderEntry<D>(neighbor.getDBID(), current.getID(), reachability));
+ heap.add(new GenericClusterOrderEntry<D>(DBIDUtil.deref(neighbor), current.getID(), reachability));
}
}
if(progress != null) {
- progress.setProcessed(processedIDs.size(), logger);
+ progress.setProcessed(processedIDs.size(), LOG);
}
}
}
@@ -228,18 +229,18 @@ public class OPTICS<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
clusterOrder.add(current);
processedIDs.add(current.getID());
- List<DistanceResultPair<DoubleDistance>> neighbors = rangeQuery.getRangeForDBID(current.getID(), epsilon);
+ DistanceDBIDResult<DoubleDistance> neighbors = rangeQuery.getRangeForDBID(current.getID(), epsilon);
if(neighbors.size() >= minpts) {
- final DistanceResultPair<DoubleDistance> last = neighbors.get(minpts - 1);
- if(last instanceof DoubleDistanceResultPair) {
- double coreDistance = ((DoubleDistanceResultPair) last).getDoubleDistance();
+ final DistanceDBIDPair<DoubleDistance> last = neighbors.get(minpts - 1);
+ if(last instanceof DoubleDistanceDBIDPair) {
+ double coreDistance = ((DoubleDistanceDBIDPair) last).doubleDistance();
- for(DistanceResultPair<DoubleDistance> neighbor : neighbors) {
+ for(DistanceDBIDResultIter<DoubleDistance> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
if(processedIDs.contains(neighbor)) {
continue;
}
- double reachability = Math.max(((DoubleDistanceResultPair) neighbor).getDoubleDistance(), coreDistance);
- heap.add(new DoubleDistanceClusterOrderEntry(neighbor.getDBID(), current.getID(), reachability));
+ double reachability = Math.max(((DoubleDistanceDBIDResultIter) neighbor).doubleDistance(), coreDistance);
+ heap.add(new DoubleDistanceClusterOrderEntry(DBIDUtil.deref(neighbor), current.getID(), reachability));
}
}
else {
@@ -247,17 +248,17 @@ public class OPTICS<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
// Only if we got an optimized result before.
double coreDistance = last.getDistance().doubleValue();
- for(DistanceResultPair<DoubleDistance> neighbor : neighbors) {
+ for(DistanceDBIDResultIter<DoubleDistance> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
if(processedIDs.contains(neighbor)) {
continue;
}
double reachability = Math.max(neighbor.getDistance().doubleValue(), coreDistance);
- heap.add(new DoubleDistanceClusterOrderEntry(neighbor.getDBID(), current.getID(), reachability));
+ heap.add(new DoubleDistanceClusterOrderEntry(DBIDUtil.deref(neighbor), current.getID(), reachability));
}
}
}
if(progress != null) {
- progress.setProcessed(processedIDs.size(), logger);
+ progress.setProcessed(processedIDs.size(), LOG);
}
}
}
@@ -279,7 +280,7 @@ public class OPTICS<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -302,9 +303,10 @@ public class OPTICS<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
epsilon = epsilonP.getValue();
}
- IntParameter minptsP = new IntParameter(MINPTS_ID, new GreaterConstraint(0));
+ IntParameter minptsP = new IntParameter(MINPTS_ID);
+ minptsP.addConstraint(new GreaterConstraint(0));
if(config.grab(minptsP)) {
- minpts = minptsP.getValue();
+ minpts = minptsP.intValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICSXi.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICSXi.java
index 41e48b89..39a0ebd6 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICSXi.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICSXi.java
@@ -50,7 +50,8 @@ import de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy.HierarchyHashmapLi
import de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy.ModifiableHierarchy;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.IntervalConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ClassParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
@@ -60,7 +61,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
*
* @author Erich Schubert
*
- * @apiviz.uses OPTICSTypeAlgorithm oneway
+ * @apiviz.composedOf OPTICSTypeAlgorithm oneway
* @apiviz.uses ClusterOrderResult oneway
* @apiviz.has SteepAreaResult
*
@@ -70,17 +71,17 @@ public class OPTICSXi<N extends NumberDistance<N, ?>> extends AbstractAlgorithm<
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(OPTICSXi.class);
+ private static final Logging LOG = Logging.getLogger(OPTICSXi.class);
/**
* Parameter to specify the actual OPTICS algorithm to use.
*/
- public static final OptionID XIALG_ID = OptionID.getOrCreateOptionID("opticsxi.algorithm", "The actual OPTICS-type algorithm to use.");
+ public static final OptionID XIALG_ID = new OptionID("opticsxi.algorithm", "The actual OPTICS-type algorithm to use.");
/**
* Parameter to specify the steepness threshold.
*/
- public static final OptionID XI_ID = OptionID.getOrCreateOptionID("opticsxi.xi", "Threshold for the steepness requirement.");
+ public static final OptionID XI_ID = new OptionID("opticsxi.xi", "Threshold for the steepness requirement.");
/**
* The actual algorithm we use.
@@ -109,12 +110,12 @@ public class OPTICSXi<N extends NumberDistance<N, ?>> extends AbstractAlgorithm<
ClusterOrderResult<N> opticsresult = optics.run(database);
if(!NumberDistance.class.isInstance(optics.getDistanceFactory())) {
- logger.verbose("Xi cluster extraction only supported for number distances!");
+ LOG.verbose("Xi cluster extraction only supported for number distances!");
return null;
}
- if(logger.isVerbose()) {
- logger.verbose("Extracting clusters with Xi: " + xi);
+ if(LOG.isVerbose()) {
+ LOG.verbose("Extracting clusters with Xi: " + xi);
}
return extractClusters(opticsresult, relation, 1.0 - xi, optics.getMinPts());
}
@@ -135,7 +136,7 @@ public class OPTICSXi<N extends NumberDistance<N, ?>> extends AbstractAlgorithm<
// TODO: make it configurable to keep this list; this is mostly useful for
// visualization
List<SteepArea> salist = new ArrayList<SteepArea>();
- List<SteepDownArea> sdaset = new java.util.Vector<SteepDownArea>();
+ List<SteepDownArea> sdaset = new ArrayList<SteepDownArea>();
ModifiableHierarchy<Cluster<OPTICSModel>> hier = new HierarchyHashmapList<Cluster<OPTICSModel>>();
HashSet<Cluster<OPTICSModel>> curclusters = new HashSet<Cluster<OPTICSModel>>();
HashSetModifiableDBIDs unclaimedids = DBIDUtil.newHashSet(relation.getDBIDs());
@@ -175,8 +176,8 @@ public class OPTICSXi<N extends NumberDistance<N, ?>> extends AbstractAlgorithm<
}
mib = clusterOrder.get(endsteep).getReachability().doubleValue();
final SteepDownArea sda = new SteepDownArea(startsteep, endsteep, startval, 0);
- if(logger.isDebuggingFinest()) {
- logger.debugFinest("Xi " + sda.toString());
+ if(LOG.isDebuggingFinest()) {
+ LOG.debugFinest("Xi " + sda.toString());
}
sdaset.add(sda);
if(salist != null) {
@@ -220,8 +221,8 @@ public class OPTICSXi<N extends NumberDistance<N, ?>> extends AbstractAlgorithm<
}
}
sua = new SteepUpArea(startsteep, endsteep, esuccr);
- if(logger.isDebuggingFinest()) {
- logger.debugFinest("Xi " + sua.toString());
+ if(LOG.isDebuggingFinest()) {
+ LOG.debugFinest("Xi " + sua.toString());
}
if(salist != null) {
salist.add(sua);
@@ -280,8 +281,8 @@ public class OPTICSXi<N extends NumberDistance<N, ?>> extends AbstractAlgorithm<
dbids.add(dbid);
}
}
- if(logger.isDebuggingFine()) {
- logger.debugFine("Found cluster with " + dbids.size() + " new objects, length " + (cstart - cend + 1));
+ if(LOG.isDebuggingFine()) {
+ LOG.debugFine("Found cluster with " + dbids.size() + " new objects, length " + (cstart - cend + 1));
}
OPTICSModel model = new OPTICSModel(cstart, cend);
Cluster<OPTICSModel> cluster = new Cluster<OPTICSModel>("Cluster_" + cstart + "_" + cend, dbids, model, hier);
@@ -362,7 +363,7 @@ public class OPTICSXi<N extends NumberDistance<N, ?>> extends AbstractAlgorithm<
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -656,9 +657,10 @@ public class OPTICSXi<N extends NumberDistance<N, ?>> extends AbstractAlgorithm<
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
DoubleParameter xiP = new DoubleParameter(XI_ID);
- xiP.addConstraint(new IntervalConstraint(0.0, IntervalConstraint.IntervalBoundary.CLOSE, 1.0, IntervalConstraint.IntervalBoundary.OPEN));
+ xiP.addConstraint(new GreaterEqualConstraint(0.0));
+ xiP.addConstraint(new LessConstraint(1.0));
if(config.grab(xiP)) {
- xi = xiP.getValue();
+ xi = xiP.doubleValue();
}
ClassParameter<OPTICSTypeAlgorithm<D>> opticsP = new ClassParameter<OPTICSTypeAlgorithm<D>>(XIALG_ID, OPTICSTypeAlgorithm.class, OPTICS.class);
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/SLINK.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/SLINK.java
index 2aa38bdd..3e1f0650 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/SLINK.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/SLINK.java
@@ -23,34 +23,35 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+import gnu.trove.list.array.TDoubleArrayList;
+
import java.util.ArrayList;
-import java.util.Collections;
import java.util.Comparator;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
import de.lmu.ifi.dbs.elki.data.Cluster;
import de.lmu.ifi.dbs.elki.data.Clustering;
-import de.lmu.ifi.dbs.elki.data.model.ClusterModel;
import de.lmu.ifi.dbs.elki.data.model.DendrogramModel;
-import de.lmu.ifi.dbs.elki.data.model.Model;
import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.datastore.DBIDDataStore;
import de.lmu.ifi.dbs.elki.database.datastore.DataStore;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.DoubleDistanceDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDBIDDataStore;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
-import de.lmu.ifi.dbs.elki.database.datastore.WritableRecordStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDistanceDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore;
import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDMIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDVar;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
@@ -58,7 +59,9 @@ import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.DistanceUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.result.BasicResult;
@@ -73,10 +76,9 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
-import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
/**
- * Efficient implementation of the Single-Link Algorithm SLINK of R. Sibson.
+ * Implementation of the efficient Single-Link Algorithm SLINK of R. Sibson.
* <p>
* Reference: R. Sibson: SLINK: An optimally efficient algorithm for the
* single-link cluster method. <br>
@@ -94,35 +96,20 @@ public class SLINK<O, D extends Distance<D>> extends AbstractDistanceBasedAlgori
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(SLINK.class);
-
- /**
- * The minimum number of clusters to extract
- */
- public static final OptionID SLINK_MINCLUSTERS_ID = OptionID.getOrCreateOptionID("slink.minclusters", "The maximum number of clusters to extract.");
-
- /**
- * The values of the function Pi of the pointer representation.
- */
- private WritableDataStore<DBID> pi;
-
- /**
- * The values of the function Lambda of the pointer representation.
- */
- private WritableDataStore<D> lambda;
+ private static final Logging LOG = Logging.getLogger(SLINK.class);
/**
* Minimum number of clusters to extract
*/
- private Integer minclusters;
+ private int minclusters = -1;
/**
* Constructor.
*
* @param distanceFunction Distance function
- * @param minclusters Minimum clusters to extract. Can be null
+ * @param minclusters Minimum clusters to extract. Can be {@code -1}.
*/
- public SLINK(DistanceFunction<? super O, D> distanceFunction, Integer minclusters) {
+ public SLINK(DistanceFunction<? super O, D> distanceFunction, int minclusters) {
super(distanceFunction);
this.minclusters = minclusters;
}
@@ -130,48 +117,71 @@ public class SLINK<O, D extends Distance<D>> extends AbstractDistanceBasedAlgori
/**
* Performs the SLINK algorithm on the given database.
*/
- @SuppressWarnings("unchecked")
public Result run(Database database, Relation<O> relation) {
DistanceQuery<O, D> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
+ @SuppressWarnings("unchecked")
Class<D> distCls = (Class<D>) getDistanceFunction().getDistanceFactory().getClass();
- WritableRecordStore store = DataStoreUtil.makeRecordStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, DBID.class, distCls);
- pi = store.getStorage(0, DBID.class);
- lambda = store.getStorage(1, distCls);
+ WritableDBIDDataStore pi = DataStoreUtil.makeDBIDStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
+ WritableDataStore<D> lambda = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, distCls);
// Temporary storage for m.
WritableDataStore<D> m = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, distCls);
- FiniteProgress progress = logger.isVerbose() ? new FiniteProgress("Clustering", relation.size(), logger) : null;
+ FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Running SLINK", relation.size(), LOG) : null;
// has to be an array for monotonicity reasons!
ModifiableDBIDs processedIDs = DBIDUtil.newArray(relation.size());
- // apply the algorithm
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- DBID id = iditer.getDBID();
- step1(id);
- step2(id, processedIDs, distQuery, m);
- step3(id, processedIDs, m);
- step4(id, processedIDs);
-
- processedIDs.add(id);
-
- if(progress != null) {
- progress.incrementProcessed(logger);
+ // Optimized code path for double distances
+ if (getDistanceFunction() instanceof PrimitiveDoubleDistanceFunction && lambda instanceof WritableDoubleDistanceDataStore && m instanceof WritableDoubleDistanceDataStore) {
+ @SuppressWarnings("unchecked")
+ PrimitiveDoubleDistanceFunction<? super O> dist = (PrimitiveDoubleDistanceFunction<? super O>) getDistanceFunction();
+ WritableDoubleDistanceDataStore lambdad = (WritableDoubleDistanceDataStore) lambda;
+ WritableDoubleDistanceDataStore md = (WritableDoubleDistanceDataStore) m;
+ // apply the algorithm
+ for (DBIDIter id = relation.iterDBIDs(); id.valid(); id.advance()) {
+ step1double(id, pi, lambdad);
+ step2double(id, processedIDs, distQuery.getRelation(), dist, md);
+ step3double(id, pi, lambdad, processedIDs, md);
+ step4double(id, pi, lambdad, processedIDs);
+
+ processedIDs.add(id);
+
+ if (progress != null) {
+ progress.incrementProcessed(LOG);
+ }
+ }
+ } else {
+ // apply the algorithm
+ for (DBIDIter id = relation.iterDBIDs(); id.valid(); id.advance()) {
+ step1(id, pi, lambda);
+ step2(id, processedIDs, distQuery, m);
+ step3(id, pi, lambda, processedIDs, m);
+ step4(id, pi, lambda, processedIDs);
+
+ processedIDs.add(id);
+
+ if (progress != null) {
+ progress.incrementProcessed(LOG);
+ }
}
}
- if(progress != null) {
- progress.ensureCompleted(logger);
+ if (progress != null) {
+ progress.ensureCompleted(LOG);
}
// We don't need m anymore.
m.destroy();
m = null;
- // build dendrogram
- BasicResult result = null;
-
- // Build clusters identified by their target object
- int minc = minclusters != null ? minclusters : relation.size();
- result = extractClusters(relation.getDBIDs(), pi, lambda, minc);
+ // Build dendrogam clusters identified by their target object
+ if (LOG.isVerbose()) {
+ LOG.verbose("Extracting clusters.");
+ }
+ final BasicResult result;
+ if (lambda instanceof DoubleDistanceDataStore) {
+ result = extractClustersDouble(relation.getDBIDs(), pi, (DoubleDistanceDataStore) lambda, minclusters);
+ } else {
+ result = extractClusters(relation.getDBIDs(), pi, lambda, minclusters);
+ }
result.addChildResult(new MaterializedRelation<DBID>("SLINK pi", "slink-order", TypeUtil.DBID, pi, processedIDs));
result.addChildResult(new MaterializedRelation<D>("SLINK lambda", "slink-order", new SimpleTypeInformation<D>(distCls), lambda, processedIDs));
@@ -182,61 +192,66 @@ public class SLINK<O, D extends Distance<D>> extends AbstractDistanceBasedAlgori
/**
* First step: Initialize P(id) = id, L(id) = infinity.
*
- * @param newID the id of the object to be inserted into the pointer
+ * @param id the id of the object to be inserted into the pointer
* representation
+ * @param pi Pi data store
+ * @param lambda Lambda data store
*/
- private void step1(DBID newID) {
+ private void step1(DBIDRef id, WritableDBIDDataStore pi, WritableDataStore<D> lambda) {
// P(n+1) = n+1:
- pi.put(newID, newID);
+ pi.put(id, id);
// L(n+1) = infinity
- lambda.put(newID, getDistanceFunction().getDistanceFactory().infiniteDistance());
+ lambda.put(id, getDistanceFunction().getDistanceFactory().infiniteDistance());
}
/**
* Second step: Determine the pairwise distances from all objects in the
* pointer representation to the new object with the specified id.
*
- * @param newID the id of the object to be inserted into the pointer
+ * @param id the id of the object to be inserted into the pointer
* representation
* @param processedIDs the already processed ids
+ * @param m Data store
* @param distFunc Distance function to use
*/
- private void step2(DBID newID, DBIDs processedIDs, DistanceQuery<O, D> distFunc, WritableDataStore<D> m) {
- for(DBIDIter it = processedIDs.iter(); it.valid(); it.advance()) {
- DBID id = it.getDBID();
+ private void step2(DBIDRef id, DBIDs processedIDs, DistanceQuery<O, D> distFunc, WritableDataStore<D> m) {
+ O newObj = distFunc.getRelation().get(id);
+ for (DBIDIter it = processedIDs.iter(); it.valid(); it.advance()) {
// M(i) = dist(i, n+1)
- m.put(id, distFunc.distance(id, newID));
+ m.put(it, distFunc.distance(it, newObj));
}
}
/**
* Third step: Determine the values for P and L
*
- * @param newID the id of the object to be inserted into the pointer
+ * @param id the id of the object to be inserted into the pointer
* representation
+ * @param pi Pi data store
+ * @param lambda Lambda data store
* @param processedIDs the already processed ids
+ * @param m Data store
*/
- private void step3(DBID newID, DBIDs processedIDs, WritableDataStore<D> m) {
+ private void step3(DBIDRef id, WritableDBIDDataStore pi, WritableDataStore<D> lambda, DBIDs processedIDs, WritableDataStore<D> m) {
+ DBIDVar p_i = DBIDUtil.newVar();
// for i = 1..n
- for(DBIDIter it = processedIDs.iter(); it.valid(); it.advance()) {
- DBID id = it.getDBID();
- D l_i = lambda.get(id);
- D m_i = m.get(id);
- DBID p_i = pi.get(id);
+ for (DBIDIter it = processedIDs.iter(); it.valid(); it.advance()) {
+ D l_i = lambda.get(it);
+ D m_i = m.get(it);
+ pi.assignVar(it, p_i); // p_i = pi(it)
D mp_i = m.get(p_i);
// if L(i) >= M(i)
- if(l_i.compareTo(m_i) >= 0) {
+ if (l_i.compareTo(m_i) >= 0) {
// M(P(i)) = min { M(P(i)), L(i) }
m.put(p_i, DistanceUtil.min(mp_i, l_i));
// L(i) = M(i)
- lambda.put(id, m_i);
+ lambda.put(it, m_i);
// P(i) = n+1;
- pi.put(id, newID);
- }
- else {
+ pi.put(it, id);
+ } else {
// M(P(i)) = min { M(P(i)), M(i) }
m.put(p_i, DistanceUtil.min(mp_i, m_i));
}
@@ -246,34 +261,119 @@ public class SLINK<O, D extends Distance<D>> extends AbstractDistanceBasedAlgori
/**
* Fourth step: Actualize the clusters if necessary
*
- * @param newID the id of the current object
+ * @param id the id of the current object
+ * @param pi Pi data store
+ * @param lambda Lambda data store
* @param processedIDs the already processed ids
*/
- private void step4(DBID newID, DBIDs processedIDs) {
+ private void step4(DBIDRef id, WritableDBIDDataStore pi, WritableDataStore<D> lambda, DBIDs processedIDs) {
+ DBIDVar p_i = DBIDUtil.newVar();
// for i = 1..n
- for(DBIDIter it = processedIDs.iter(); it.valid(); it.advance()) {
- DBID id = it.getDBID();
- D l_i = lambda.get(id);
- D lp_i = lambda.get(pi.get(id));
+ for (DBIDIter it = processedIDs.iter(); it.valid(); it.advance()) {
+ D l_i = lambda.get(it);
+ pi.assignVar(it, p_i); // p_i = pi(it)
+ D lp_i = lambda.get(p_i);
// if L(i) >= L(P(i))
- if(l_i.compareTo(lp_i) >= 0) {
+ if (l_i.compareTo(lp_i) >= 0) {
// P(i) = n+1
- pi.put(id, newID);
+ pi.put(it, id);
}
}
}
- private DBID lastObjectInCluster(DBID id, D stopdist, final DataStore<DBID> pi, final DataStore<D> lambda) {
- if(stopdist == null) {
- return id;
+ /**
+ * First step: Initialize P(id) = id, L(id) = infinity.
+ *
+ * @param id the id of the object to be inserted into the pointer
+ * representation
+ * @param pi Pi data store
+ * @param lambda Lambda data store
+ */
+ private void step1double(DBIDRef id, WritableDBIDDataStore pi, WritableDoubleDistanceDataStore lambda) {
+ // P(n+1) = n+1:
+ pi.put(id, id);
+ // L(n+1) = infinity
+ lambda.putDouble(id, Double.POSITIVE_INFINITY);
+ }
+
+ /**
+ * Second step: Determine the pairwise distances from all objects in the
+ * pointer representation to the new object with the specified id.
+ *
+ * @param id the id of the object to be inserted into the pointer
+ * representation
+ * @param processedIDs the already processed ids
+ * @param m Data store
+ * @param relation Data relation
+ * @param distFunc Distance function to use
+ */
+ private void step2double(DBIDRef id, DBIDs processedIDs, Relation<? extends O> relation, PrimitiveDoubleDistanceFunction<? super O> distFunc, WritableDoubleDistanceDataStore m) {
+ O newObj = relation.get(id);
+ for (DBIDIter it = processedIDs.iter(); it.valid(); it.advance()) {
+ // M(i) = dist(i, n+1)
+ m.putDouble(it, distFunc.doubleDistance(relation.get(it), newObj));
+ }
+ }
+
+ /**
+ * Third step: Determine the values for P and L
+ *
+ * @param id the id of the object to be inserted into the pointer
+ * representation
+ * @param pi Pi data store
+ * @param lambda Lambda data store
+ * @param processedIDs the already processed ids
+ * @param m Data store
+ */
+ private void step3double(DBIDRef id, WritableDBIDDataStore pi, WritableDoubleDistanceDataStore lambda, DBIDs processedIDs, WritableDoubleDistanceDataStore m) {
+ DBIDVar p_i = DBIDUtil.newVar();
+ // for i = 1..n
+ for (DBIDIter it = processedIDs.iter(); it.valid(); it.advance()) {
+ double l_i = lambda.doubleValue(it);
+ double m_i = m.doubleValue(it);
+ pi.assignVar(it, p_i); // p_i = pi(it)
+ double mp_i = m.doubleValue(p_i);
+
+ // if L(i) >= M(i)
+ if (l_i >= m_i) {
+ // M(P(i)) = min { M(P(i)), L(i) }
+ m.putDouble(p_i, Math.min(mp_i, l_i));
+
+ // L(i) = M(i)
+ lambda.putDouble(it, m_i);
+
+ // P(i) = n+1;
+ pi.put(it, id);
+ } else {
+ // M(P(i)) = min { M(P(i)), M(i) }
+ m.putDouble(p_i, Math.min(mp_i, m_i));
+ }
}
+ }
+
+ /**
+ * Fourth step: Actualize the clusters if necessary
+ *
+ * @param id the id of the current object
+ * @param pi Pi data store
+ * @param lambda Lambda data store
+ * @param processedIDs the already processed ids
+ */
+ private void step4double(DBIDRef id, WritableDBIDDataStore pi, WritableDoubleDistanceDataStore lambda, DBIDs processedIDs) {
+ DBIDVar p_i = DBIDUtil.newVar();
+ // for i = 1..n
+ for (DBIDIter it = processedIDs.iter(); it.valid(); it.advance()) {
+ double l_i = lambda.doubleValue(it);
+ pi.assignVar(it, p_i); // p_i = pi(it)
+ double lp_i = lambda.doubleValue(p_i);
- DBID currentID = id;
- while(lambda.get(currentID).compareTo(stopdist) < 1) {
- currentID = pi.get(currentID);
+ // if L(i) >= L(P(i))
+ if (l_i >= lp_i) {
+ // P(i) = n+1
+ pi.put(it, id);
+ }
}
- return currentID;
}
/**
@@ -286,167 +386,148 @@ public class SLINK<O, D extends Distance<D>> extends AbstractDistanceBasedAlgori
*
* @return Hierarchical clustering
*/
- private Clustering<DendrogramModel<D>> extractClusters(DBIDs ids, final DataStore<DBID> pi, final DataStore<D> lambda, int minclusters) {
- FiniteProgress progress = logger.isVerbose() ? new FiniteProgress("Extracting clusters", ids.size(), logger) : null;
+ private Clustering<DendrogramModel<D>> extractClusters(DBIDs ids, final DBIDDataStore pi, final DataStore<D> lambda, int minclusters) {
+ FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Extracting clusters", ids.size(), LOG) : null;
+ D nulldist = getDistanceFunction().getDistanceFactory().nullDistance();
- // stopdist
- D stopdist = null;
- // sort by lambda
+ // Sort DBIDs by lambda. We need this for two things:
+ // a) to determine the stop distance from "minclusters" parameter
+ // b) to process arrows in decreasing / increasing order
ArrayModifiableDBIDs order = DBIDUtil.newArray(ids);
order.sort(new CompareByLambda<D>(lambda));
- int index = ids.size() - minclusters - 1;
- while(index >= 0) {
- if(lambda.get(order.get(index)).equals(lambda.get(order.get(index + 1)))) {
- index--;
- }
- else {
- stopdist = lambda.get(order.get(index));
- break;
- }
- }
- // extract the child clusters
- Map<DBID, ModifiableDBIDs> cluster_ids = new HashMap<DBID, ModifiableDBIDs>();
- Map<DBID, D> cluster_distances = new HashMap<DBID, D>();
- for(DBIDIter it = ids.iter(); it.valid(); it.advance()) {
- DBID id = it.getDBID();
- DBID lastObjectInCluster = lastObjectInCluster(id, stopdist, pi, lambda);
- ModifiableDBIDs cluster = cluster_ids.get(lastObjectInCluster);
- if(cluster == null) {
- cluster = DBIDUtil.newArray();
- cluster_ids.put(lastObjectInCluster, cluster);
+ // Stop distance:
+ final D stopdist = (minclusters > 0) ? lambda.get(order.get(ids.size() - minclusters)) : null;
+
+ // The initial pass is top-down.
+ DBIDArrayIter it = order.iter();
+ int split = (minclusters > 0) ? Math.max(ids.size() - minclusters, 0) : 0;
+ // Tie handling: decrement split.
+ if (stopdist != null) {
+ while (split > 0) {
+ it.seek(split - 1);
+ if (stopdist.compareTo(lambda.get(it)) == 0) {
+ split--;
+ minclusters++;
+ } else {
+ break;
+ }
}
- cluster.add(id);
+ }
- D lambda_id = lambda.get(id);
- if(stopdist != null && lambda_id.compareTo(stopdist) <= 0 && (cluster_distances.get(lastObjectInCluster) == null || lambda_id.compareTo(cluster_distances.get(lastObjectInCluster)) > 0)) {
- cluster_distances.put(lastObjectInCluster, lambda_id);
+ // Extract the child clusters
+ int cnum = 0;
+ int expcnum = Math.max(0, minclusters);
+ WritableIntegerDataStore cluster_map = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_TEMP, -1);
+ ArrayList<ModifiableDBIDs> cluster_dbids = new ArrayList<ModifiableDBIDs>(expcnum);
+ ArrayList<D> cluster_dist = new ArrayList<D>(expcnum);
+ ArrayModifiableDBIDs cluster_leads = DBIDUtil.newArray(expcnum);
+
+ DBIDVar succ = DBIDUtil.newVar(); // Variable for successor.
+ // Go backwards on the lower part.
+ for (it.seek(split - 1); it.valid(); it.retract()) {
+ D dist = lambda.get(it); // Distance to successor
+ pi.assignVar(it, succ); // succ = pi(it)
+ int clusterid = cluster_map.intValue(succ);
+ // Successor cluster has already been created:
+ if (clusterid >= 0) {
+ cluster_dbids.get(clusterid).add(it);
+ cluster_map.putInt(it, clusterid);
+ // Update distance to maximum encountered:
+ if (cluster_dist.get(clusterid).compareTo(dist) < 0) {
+ cluster_dist.set(clusterid, dist);
+ }
+ } else {
+ // Need to start a new cluster:
+ clusterid = cnum; // next cluster number.
+ ModifiableDBIDs cids = DBIDUtil.newArray();
+ // Add element and successor as initial members:
+ cids.add(succ);
+ cluster_map.putInt(succ, clusterid);
+ cids.add(it);
+ cluster_map.putInt(it, clusterid);
+ // Store new cluster.
+ cluster_dbids.add(cids);
+ cluster_leads.add(succ);
+ cluster_dist.add(dist);
+ cnum++;
}
// Decrement counter
- if(progress != null) {
- progress.incrementProcessed(logger);
+ if (progress != null) {
+ progress.incrementProcessed(LOG);
}
}
- if(progress != null) {
- progress.ensureCompleted(logger);
- }
-
- // build hierarchy
- final Clustering<DendrogramModel<D>> dendrogram = new Clustering<DendrogramModel<D>>("Single-Link-Dendrogram", "slink-dendrogram");
+ // Build a hierarchy out of these clusters.
+ Cluster<DendrogramModel<D>> root = null;
ModifiableHierarchy<Cluster<DendrogramModel<D>>> hier = new HierarchyHashmapList<Cluster<DendrogramModel<D>>>();
- Cluster<DendrogramModel<D>> root = root(cluster_ids, cluster_distances, pi, lambda, hier, progress);
- dendrogram.addCluster(root);
-
- return dendrogram;
- }
-
- private Cluster<DendrogramModel<D>> root(Map<DBID, ModifiableDBIDs> cluster_ids, Map<DBID, D> cluster_distances, final DataStore<DBID> pi, final DataStore<D> lambda, ModifiableHierarchy<Cluster<DendrogramModel<D>>> hier, FiniteProgress progress) {
- if(cluster_ids.size() == 1) {
- DBID id = cluster_ids.keySet().iterator().next();
- String name = "cluster_" + id + "_" + cluster_distances.get(id);
- return new Cluster<DendrogramModel<D>>(name, cluster_ids.get(id), new DendrogramModel<D>(cluster_distances.get(id)), hier);
- }
-
- // sort leafs by lambda
- List<Pair<DBID, D>> leafs = new ArrayList<Pair<DBID, D>>(cluster_ids.size());
- for(DBID id : cluster_ids.keySet()) {
- leafs.add(new Pair<DBID, D>(id, lambda.get(id)));
- }
-
- Collections.sort(leafs, new Comparator<Pair<DBID, D>>() {
- @Override
- public int compare(Pair<DBID, D> o1, Pair<DBID, D> o2) {
- D k1 = lambda.get(o1.first);
- D k2 = lambda.get(o2.first);
- if(k1 == null && k2 == null) {
- return 0;
- }
- else if(k1 == null) {
- return -1;
- }
- else if(k2 == null) {
- return 1;
- }
- else {
- return k1.compareTo(k2);
- }
+ ArrayList<Cluster<DendrogramModel<D>>> clusters = new ArrayList<Cluster<DendrogramModel<D>>>(ids.size() + expcnum - split);
+ // Convert initial clusters to cluster objects
+ {
+ int i = 0;
+ for (DBIDIter it2 = cluster_leads.iter(); it2.valid(); it2.advance(), i++) {
+ clusters.add(makeCluster(it2, cluster_dist.get(i), cluster_dbids.get(i), hier));
}
- });
-
- // create nodes of the dendrogram
- Cluster<DendrogramModel<D>> parent = null;
- Map<DBID, Cluster<DendrogramModel<D>>> nodes = new HashMap<DBID, Cluster<DendrogramModel<D>>>();
- int nodeCount = 0;
- int clusterCount = 0;
- while(!leafs.isEmpty()) {
- // left child
- Pair<DBID, D> leaf = leafs.remove(0);
- DBID leftID = leaf.first;
- Cluster<DendrogramModel<D>> left = nodes.get(leftID);
- if(left == null) {
- // String name = "cluster_" + leftID + "_" +
- // cluster_distances.get(leftID);
- String name = "cluster_" + (++clusterCount);
- left = new Cluster<DendrogramModel<D>>(name, cluster_ids.get(leftID), new DendrogramModel<D>(cluster_distances.get(leftID)), hier);
- nodes.put(leftID, left);
- }
- // right child
- DBID rightID = pi.get(leftID);
- if(leftID.sameDBID(rightID)) {
- break;
+ cluster_dist = null; // Invalidate
+ cluster_dbids = null; // Invalidate
+ }
+ // Process the upper part, bottom-up.
+ for (it.seek(split); it.valid(); it.advance()) {
+ int clusterid = cluster_map.intValue(it);
+ // The current cluster:
+ final Cluster<DendrogramModel<D>> clus;
+ if (clusterid >= 0) {
+ clus = clusters.get(clusterid);
+ } else {
+ ArrayModifiableDBIDs cids = DBIDUtil.newArray(1);
+ cids.add(it);
+ clus = makeCluster(it, nulldist, cids, hier);
+ // No need to store in clusters: cannot have another incoming pi
+ // pointer!
}
- Cluster<DendrogramModel<D>> right = nodes.get(rightID);
- if(right == null) {
- // String name = "cluster_" + rightID + "_" +
- // cluster_distances.get(rightID);
- String name = "cluster_" + (++clusterCount);
- right = new Cluster<DendrogramModel<D>>(name, cluster_ids.get(rightID), new DendrogramModel<D>(cluster_distances.get(rightID)), hier);
- nodes.put(rightID, right);
+ // The successor to join:
+ pi.assignVar(it, succ); // succ = pi(it)
+ if (DBIDUtil.equal(it, succ)) {
+ assert (root == null);
+ root = clus;
+ } else {
+ // Parent cluster:
+ int parentid = cluster_map.intValue(succ);
+ D depth = lambda.get(it);
+ // Parent cluster exists - merge as a new cluster:
+ if (parentid >= 0) {
+ Cluster<DendrogramModel<D>> pclus = makeCluster(succ, depth, DBIDUtil.EMPTYDBIDS, hier);
+ hier.add(pclus, clusters.get(parentid));
+ hier.add(pclus, clus);
+ clusters.set(parentid, pclus); // Replace existing parent cluster
+ } else {
+ // Create a new, one-element, parent cluster.
+ parentid = cnum;
+ cnum++;
+ ArrayModifiableDBIDs cids = DBIDUtil.newArray(1);
+ cids.add(succ);
+ Cluster<DendrogramModel<D>> pclus = makeCluster(succ, depth, cids, hier);
+ hier.add(pclus, clus);
+ assert (clusters.size() == parentid);
+ clusters.add(pclus); // Remember parent cluster
+ cluster_map.putInt(succ, parentid); // Reference
+ }
}
- // parent
- // String name = "node" + (++nodeCount) + "_" + leaf.second;
- String name = "node_" + (++nodeCount);
- parent = createParent(name, lastAncestor(left, hier), lastAncestor(right, hier), leaf.second, hier);
// Decrement counter
- if(progress != null) {
- progress.incrementProcessed(logger);
+ if (progress != null) {
+ progress.incrementProcessed(LOG);
}
}
- // root = parent
- return parent;
- }
- /**
- * Determines recursively the last ancestor of the specified cluster.
- *
- * @param cluster the child
- * @param hier the cluster hierarchy
- * @return the (currently) last ancestor
- */
- private Cluster<DendrogramModel<D>> lastAncestor(Cluster<DendrogramModel<D>> cluster, ModifiableHierarchy<Cluster<DendrogramModel<D>>> hier) {
- List<Cluster<DendrogramModel<D>>> parents = hier.getParents(cluster);
- if(parents.isEmpty()) {
- return cluster;
+ if (progress != null) {
+ progress.ensureCompleted(LOG);
}
- else {
- if(parents.size() > 1) {
- logger.warning("More than one parent in Single-Link dendrogram: " + cluster + " parents: " + parents);
- return null;
- }
- return lastAncestor(parents.get(0), hier);
- }
- }
-
- private Cluster<DendrogramModel<D>> createParent(String name, Cluster<DendrogramModel<D>> leftChild, Cluster<DendrogramModel<D>> rightChild, D distance, ModifiableHierarchy<Cluster<DendrogramModel<D>>> hier) {
- // DBIDs ids = DBIDUtil.union(leftChild.getIDs(), rightChild.getIDs());
- Cluster<DendrogramModel<D>> parent = new Cluster<DendrogramModel<D>>(name, DBIDUtil.EMPTYDBIDS, new DendrogramModel<D>(distance), hier);
-
- hier.add(parent, leftChild);
- hier.add(parent, rightChild);
+ // build hierarchy
+ final Clustering<DendrogramModel<D>> dendrogram = new Clustering<DendrogramModel<D>>("Single-Link-Dendrogram", "slink-dendrogram");
+ dendrogram.addCluster(root);
- return parent;
+ return dendrogram;
}
/**
@@ -459,119 +540,174 @@ public class SLINK<O, D extends Distance<D>> extends AbstractDistanceBasedAlgori
*
* @return Hierarchical clustering
*/
- @SuppressWarnings("unused")
- private Clustering<Model> extractClusters_erich(DBIDs ids, final DataStore<DBID> pi, final DataStore<D> lambda, int minclusters) {
- // extract a hierarchical clustering
- ArrayModifiableDBIDs order = DBIDUtil.newArray(ids);
- // sort by lambda
- order.sort(new CompareByLambda<D>(lambda));
- D curdist = null;
+ private Clustering<DendrogramModel<D>> extractClustersDouble(DBIDs ids, final DBIDDataStore pi, final DoubleDistanceDataStore lambda, int minclusters) {
+ FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Extracting clusters", ids.size(), LOG) : null;
+ D nulldist = getDistanceFunction().getDistanceFactory().nullDistance();
- D stopdist = null;
- if(minclusters < ids.size()) {
- stopdist = lambda.get(order.get(ids.size() - minclusters));
+ // Sort DBIDs by lambda. We need this for two things:
+ // a) to determine the stop distance from "minclusters" parameter
+ // b) to process arrows in decreasing / increasing order
+ ArrayModifiableDBIDs order = DBIDUtil.newArray(ids);
+ order.sort(new CompareByDoubleLambda(lambda));
+
+ // Stop distance:
+ final double stopdist = (minclusters > 0) ? lambda.doubleValue(order.get(ids.size() - minclusters)) : Double.POSITIVE_INFINITY;
+
+ // The initial pass is top-down.
+ DBIDArrayIter it = order.iter();
+ int split = (minclusters > 0) ? Math.max(ids.size() - minclusters, 0) : 0;
+ // Tie handling: decrement split.
+ if (minclusters > 0) {
+ while (split > 0) {
+ it.seek(split - 1);
+ if (stopdist <= lambda.doubleValue(it)) {
+ split--;
+ minclusters++;
+ } else {
+ break;
+ }
+ }
}
- ModifiableHierarchy<Cluster<Model>> hier = new HierarchyHashmapList<Cluster<Model>>();
- Map<DBID, Cluster<Model>> clusters = new HashMap<DBID, Cluster<Model>>();
- Map<DBID, ModifiableDBIDs> cids = new HashMap<DBID, ModifiableDBIDs>();
-
- FiniteProgress progress = logger.isVerbose() ? new FiniteProgress("Extracting clusters", ids.size(), logger) : null;
-
- for(DBIDIter it = order.iter(); it.valid(); it.advance()) {
- DBID dest = pi.get(it);
- D l = lambda.get(it);
- // logger.debugFine("DBID " + cur.toString() + " dist: " + l.toString());
- if(stopdist != null && stopdist.compareTo(l) > 0) {
- DBID cur = it.getDBID();
- ModifiableDBIDs curset = cids.remove(cur);
- ModifiableDBIDs destset = cids.get(dest);
- if(destset == null) {
- if(curset != null) {
- destset = curset;
- }
- else {
- destset = DBIDUtil.newHashSet();
- destset.add(cur);
- }
- destset.add(dest);
- cids.put(dest, destset);
- }
- else {
- if(curset != null) {
- destset.addDBIDs(curset);
- }
- else {
- destset.add(cur);
- }
+ // Extract the child clusters
+ int cnum = 0;
+ int expcnum = Math.max(0, minclusters);
+ WritableIntegerDataStore cluster_map = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_TEMP, -1);
+ ArrayList<ModifiableDBIDs> cluster_dbids = new ArrayList<ModifiableDBIDs>(expcnum);
+ TDoubleArrayList cluster_dist = new TDoubleArrayList(expcnum);
+ ArrayModifiableDBIDs cluster_leads = DBIDUtil.newArray(expcnum);
+
+ DBIDVar succ = DBIDUtil.newVar(); // Variable for successor.
+ // Go backwards on the lower part.
+ for (it.seek(split - 1); it.valid(); it.retract()) {
+ double dist = lambda.doubleValue(it); // Distance to successor
+ pi.assignVar(it, succ); // succ = pi(it)
+ int clusterid = cluster_map.intValue(succ);
+ // Successor cluster has already been created:
+ if (clusterid >= 0) {
+ cluster_dbids.get(clusterid).add(it);
+ cluster_map.putInt(it, clusterid);
+ // Update distance to maximum encountered:
+ if (cluster_dist.get(clusterid) < dist) {
+ cluster_dist.set(clusterid, dist);
}
- curdist = l;
+ } else {
+ // Need to start a new cluster:
+ clusterid = cnum; // next cluster number.
+ ModifiableDBIDs cids = DBIDUtil.newArray();
+ // Add element and successor as initial members:
+ cids.add(succ);
+ cluster_map.putInt(succ, clusterid);
+ cids.add(it);
+ cluster_map.putInt(it, clusterid);
+ // Store new cluster.
+ cluster_dbids.add(cids);
+ cluster_leads.add(succ);
+ cluster_dist.add(dist);
+ cnum++;
}
- else {
- if(curdist == null || l.compareTo(curdist) > 0) {
- // New distance level reached. Post-process the current objects
- for(Entry<DBID, ModifiableDBIDs> ent : cids.entrySet()) {
- DBID key = ent.getKey();
- ModifiableDBIDs clusids = ent.getValue();
- // Make a new cluster
- String cname = "Cluster_" + key.toString() + "_" + curdist.toString();
- Cluster<Model> cluster = new Cluster<Model>(cname, clusids, ClusterModel.CLUSTER, hier);
- // Collect child clusters and clean up the cluster ids, keeping only
- // "new" objects.
- for(DBIDMIter iter = clusids.iter(); iter.valid(); iter.advance()) {
- Cluster<Model> chiclus = clusters.get(iter);
- if(chiclus != null) {
- hier.add(cluster, chiclus);
- clusters.remove(iter);
- iter.remove();
- }
- }
- clusters.put(key, cluster);
- }
- if(logger.isDebuggingFine()) {
- StringBuffer buf = new StringBuffer();
- buf.append("Number of clusters at depth ");
- buf.append((curdist != null ? curdist.toString() : "null"));
- buf.append(": ").append(clusters.size()).append(" ");
- buf.append("last-objects:");
- for(DBID id : clusters.keySet()) {
- buf.append(" ").append(id.toString());
- }
- logger.debugFine(buf.toString());
- }
- cids.clear();
- curdist = l;
- }
- // Add the current object to the destinations cluster
- {
- ModifiableDBIDs destset = cids.get(dest);
- if(destset == null) {
- destset = DBIDUtil.newHashSet();
- cids.put(dest, destset);
- destset.add(dest);
- }
- destset.add(it);
+
+ // Decrement counter
+ if (progress != null) {
+ progress.incrementProcessed(LOG);
+ }
+ }
+ // Build a hierarchy out of these clusters.
+ Cluster<DendrogramModel<D>> root = null;
+ ModifiableHierarchy<Cluster<DendrogramModel<D>>> hier = new HierarchyHashmapList<Cluster<DendrogramModel<D>>>();
+ ArrayList<Cluster<DendrogramModel<D>>> clusters = new ArrayList<Cluster<DendrogramModel<D>>>(ids.size() + expcnum - split);
+ // Convert initial clusters to cluster objects
+ {
+ int i = 0;
+ for (DBIDIter it2 = cluster_leads.iter(); it2.valid(); it2.advance(), i++) {
+ @SuppressWarnings("unchecked")
+ D depth = (D) new DoubleDistance(cluster_dist.get(i));
+ clusters.add(makeCluster(it2, depth, cluster_dbids.get(i), hier));
+ }
+ cluster_dist = null; // Invalidate
+ cluster_dbids = null; // Invalidate
+ }
+ // Process the upper part, bottom-up.
+ for (it.seek(split); it.valid(); it.advance()) {
+ int clusterid = cluster_map.intValue(it);
+ // The current cluster:
+ final Cluster<DendrogramModel<D>> clus;
+ if (clusterid >= 0) {
+ clus = clusters.get(clusterid);
+ } else {
+ ArrayModifiableDBIDs cids = DBIDUtil.newArray(1);
+ cids.add(it);
+ clus = makeCluster(it, nulldist, cids, hier);
+ // No need to store in clusters: cannot have another incoming pi
+ // pointer!
+ }
+ // The successor to join:
+ pi.assignVar(it, succ); // succ = pi(it)
+ if (DBIDUtil.equal(it, succ)) {
+ assert (root == null);
+ root = clus;
+ } else {
+ // Parent cluster:
+ int parentid = cluster_map.intValue(succ);
+ @SuppressWarnings("unchecked")
+ D depth = (D) new DoubleDistance(lambda.doubleValue(it));
+ // Parent cluster exists - merge as a new cluster:
+ if (parentid >= 0) {
+ Cluster<DendrogramModel<D>> pclus = makeCluster(succ, depth, DBIDUtil.EMPTYDBIDS, hier);
+ hier.add(pclus, clusters.get(parentid));
+ hier.add(pclus, clus);
+ clusters.set(parentid, pclus); // Replace existing parent cluster
+ } else {
+ // Create a new, one-element, parent cluster.
+ parentid = cnum;
+ cnum++;
+ ArrayModifiableDBIDs cids = DBIDUtil.newArray(1);
+ cids.add(succ);
+ Cluster<DendrogramModel<D>> pclus = makeCluster(succ, depth, cids, hier);
+ hier.add(pclus, clus);
+ assert (clusters.size() == parentid);
+ clusters.add(pclus); // Remember parent cluster
+ cluster_map.putInt(succ, parentid); // Reference
}
}
+
// Decrement counter
- if(progress != null) {
- progress.incrementProcessed(logger);
+ if (progress != null) {
+ progress.incrementProcessed(LOG);
}
}
- if(progress != null) {
- progress.ensureCompleted(logger);
- }
- // There should be one cluster remaining at infinite distance...
- if(clusters.size() != 1) {
- logger.warning("Single-link is expected to have a single cluster at the top level!");
- return null;
+
+ if (progress != null) {
+ progress.ensureCompleted(LOG);
}
- final Clustering<Model> clustering = new Clustering<Model>("Single-Link-Clustering", "slink-clustering");
- // FIXME: validate this is works correctly for a single-object dataset!
- for(Cluster<Model> cluster : clusters.values()) {
- clustering.addCluster(cluster);
+ // build hierarchy
+ final Clustering<DendrogramModel<D>> dendrogram = new Clustering<DendrogramModel<D>>("Single-Link-Dendrogram", "slink-dendrogram");
+ dendrogram.addCluster(root);
+
+ return dendrogram;
+ }
+
+ /**
+ * Make the cluster for the given object
+ *
+ * @param lead Leading object
+ * @param depth Linkage depth
+ * @param members Member objects
+ * @param hier Cluster hierarchy
+ * @return Cluster
+ */
+ private Cluster<DendrogramModel<D>> makeCluster(DBIDRef lead, D depth, DBIDs members, ModifiableHierarchy<Cluster<DendrogramModel<D>>> hier) {
+ final String name;
+ if (members.size() == 0) {
+ name = "merge_" + lead + "_" + depth;
+ } else if (depth.isInfiniteDistance()) {
+ assert (members.contains(lead));
+ name = "object_" + lead;
+ } else {
+ name = "cluster_" + lead + "_" + depth;
}
- return clustering;
+ Cluster<DendrogramModel<D>> cluster = new Cluster<DendrogramModel<D>>(name, members, new DendrogramModel<D>(depth), hier);
+ return cluster;
}
@Override
@@ -581,7 +717,7 @@ public class SLINK<O, D extends Distance<D>> extends AbstractDistanceBasedAlgori
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -593,7 +729,7 @@ public class SLINK<O, D extends Distance<D>> extends AbstractDistanceBasedAlgori
*
* @param <D> Distance type
*/
- private static final class CompareByLambda<D extends Distance<D>> implements Comparator<DBID> {
+ private static final class CompareByLambda<D extends Distance<D>> implements Comparator<DBIDRef> {
/**
* Lambda storage
*/
@@ -609,7 +745,7 @@ public class SLINK<O, D extends Distance<D>> extends AbstractDistanceBasedAlgori
}
@Override
- public int compare(DBID id1, DBID id2) {
+ public int compare(DBIDRef id1, DBIDRef id2) {
D k1 = lambda.get(id1);
D k2 = lambda.get(id2);
assert (k1 != null);
@@ -619,6 +755,36 @@ public class SLINK<O, D extends Distance<D>> extends AbstractDistanceBasedAlgori
}
/**
+ * Order a DBID collection by the lambda value.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ private static final class CompareByDoubleLambda implements Comparator<DBIDRef> {
+ /**
+ * Lambda storage
+ */
+ private final DoubleDistanceDataStore lambda;
+
+ /**
+ * Constructor.
+ *
+ * @param lambda Lambda storage
+ */
+ protected CompareByDoubleLambda(DoubleDistanceDataStore lambda) {
+ this.lambda = lambda;
+ }
+
+ @Override
+ public int compare(DBIDRef id1, DBIDRef id2) {
+ double k1 = lambda.doubleValue(id1);
+ double k2 = lambda.doubleValue(id2);
+ return Double.compare(k1, k2);
+ }
+ }
+
+ /**
* Parameterization class.
*
* @author Erich Schubert
@@ -626,14 +792,21 @@ public class SLINK<O, D extends Distance<D>> extends AbstractDistanceBasedAlgori
* @apiviz.exclude
*/
public static class Parameterizer<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
- protected Integer minclusters = null;
+ /**
+ * The minimum number of clusters to extract
+ */
+ public static final OptionID SLINK_MINCLUSTERS_ID = new OptionID("slink.minclusters", "The maximum number of clusters to extract.");
+
+ protected int minclusters = -1;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- IntParameter minclustersP = new IntParameter(SLINK_MINCLUSTERS_ID, new GreaterEqualConstraint(1), true);
- if(config.grab(minclustersP)) {
- minclusters = minclustersP.getValue();
+ IntParameter minclustersP = new IntParameter(SLINK_MINCLUSTERS_ID);
+ minclustersP.addConstraint(new GreaterEqualConstraint(1));
+ minclustersP.setOptional(true);
+ if (config.grab(minclustersP)) {
+ minclusters = minclustersP.intValue();
}
}
@@ -642,4 +815,4 @@ public class SLINK<O, D extends Distance<D>> extends AbstractDistanceBasedAlgori
return new SLINK<O, D>(distanceFunction, minclusters);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/SNNClustering.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/SNNClustering.java
index ae612b2a..f3b59c42 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/SNNClustering.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/SNNClustering.java
@@ -81,13 +81,13 @@ public class SNNClustering<O> extends AbstractAlgorithm<Clustering<Model>> imple
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(SNNClustering.class);
+ private static final Logging LOG = Logging.getLogger(SNNClustering.class);
/**
* Parameter to specify the minimum SNN density, must be an integer greater
* than 0.
*/
- public static final OptionID EPSILON_ID = OptionID.getOrCreateOptionID("snn.epsilon", "The minimum SNN density.");
+ public static final OptionID EPSILON_ID = new OptionID("snn.epsilon", "The minimum SNN density.");
/**
* Holds the value of {@link #EPSILON_ID}.
@@ -98,7 +98,7 @@ public class SNNClustering<O> extends AbstractAlgorithm<Clustering<Model>> imple
* Parameter to specify the threshold for minimum number of points in the
* epsilon-SNN-neighborhood of a point, must be an integer greater than 0.
*/
- public static final OptionID MINPTS_ID = OptionID.getOrCreateOptionID("snn.minpts", "Threshold for minimum number of points in " + "the epsilon-SNN-neighborhood of a point.");
+ public static final OptionID MINPTS_ID = new OptionID("snn.minpts", "Threshold for minimum number of points in " + "the epsilon-SNN-neighborhood of a point.");
/**
* Holds the value of {@link #MINPTS_ID}.
@@ -149,22 +149,22 @@ public class SNNClustering<O> extends AbstractAlgorithm<Clustering<Model>> imple
public Clustering<Model> run(Database database, Relation<O> relation) {
SimilarityQuery<O, IntegerDistance> snnInstance = similarityFunction.instantiate(relation);
- FiniteProgress objprog = logger.isVerbose() ? new FiniteProgress("SNNClustering", relation.size(), logger) : null;
- IndefiniteProgress clusprog = logger.isVerbose() ? new IndefiniteProgress("Number of clusters", logger) : null;
+ FiniteProgress objprog = LOG.isVerbose() ? new FiniteProgress("SNNClustering", relation.size(), LOG) : null;
+ IndefiniteProgress clusprog = LOG.isVerbose() ? new IndefiniteProgress("Number of clusters", LOG) : null;
resultList = new ArrayList<ModifiableDBIDs>();
noise = DBIDUtil.newHashSet();
processedIDs = DBIDUtil.newHashSet(relation.size());
if(relation.size() >= minpts) {
for(DBIDIter id = snnInstance.getRelation().iterDBIDs(); id.valid(); id.advance()) {
if(!processedIDs.contains(id)) {
- expandCluster(snnInstance, id.getDBID(), objprog, clusprog);
+ expandCluster(snnInstance, DBIDUtil.deref(id), objprog, clusprog);
if(processedIDs.size() == relation.size() && noise.size() == 0) {
break;
}
}
if(objprog != null && clusprog != null) {
- objprog.setProcessed(processedIDs.size(), logger);
- clusprog.setProcessed(resultList.size(), logger);
+ objprog.setProcessed(processedIDs.size(), LOG);
+ clusprog.setProcessed(resultList.size(), LOG);
}
}
}
@@ -172,15 +172,15 @@ public class SNNClustering<O> extends AbstractAlgorithm<Clustering<Model>> imple
for(DBIDIter id = snnInstance.getRelation().iterDBIDs(); id.valid(); id.advance()) {
noise.add(id);
if(objprog != null && clusprog != null) {
- objprog.setProcessed(noise.size(), logger);
- clusprog.setProcessed(resultList.size(), logger);
+ objprog.setProcessed(noise.size(), LOG);
+ clusprog.setProcessed(resultList.size(), LOG);
}
}
}
// Finish progress logging
if(objprog != null && clusprog != null) {
- objprog.ensureCompleted(logger);
- clusprog.setCompleted(logger);
+ objprog.ensureCompleted(LOG);
+ clusprog.setCompleted(LOG);
}
Clustering<Model> result = new Clustering<Model>("Shared-Nearest-Neighbor Clustering", "snn-clustering");
@@ -230,8 +230,8 @@ public class SNNClustering<O> extends AbstractAlgorithm<Clustering<Model>> imple
noise.add(startObjectID);
processedIDs.add(startObjectID);
if(objprog != null && clusprog != null) {
- objprog.setProcessed(processedIDs.size(), logger);
- clusprog.setProcessed(resultList.size(), logger);
+ objprog.setProcessed(processedIDs.size(), LOG);
+ clusprog.setProcessed(resultList.size(), LOG);
}
return;
}
@@ -255,26 +255,25 @@ public class SNNClustering<O> extends AbstractAlgorithm<Clustering<Model>> imple
if(neighborhood.size() >= minpts) {
for(DBIDIter iter = neighborhood.iter(); iter.valid(); iter.advance()) {
- DBID p = iter.getDBID();
- boolean inNoise = noise.contains(p);
- boolean unclassified = !processedIDs.contains(p);
+ boolean inNoise = noise.contains(iter);
+ boolean unclassified = !processedIDs.contains(iter);
if(inNoise || unclassified) {
if(unclassified) {
- seeds.add(p);
+ seeds.add(iter);
}
- currentCluster.add(p);
- processedIDs.add(p);
+ currentCluster.add(iter);
+ processedIDs.add(iter);
if(inNoise) {
- noise.remove(p);
+ noise.remove(iter);
}
}
}
}
if(objprog != null && clusprog != null) {
- objprog.setProcessed(processedIDs.size(), logger);
+ objprog.setProcessed(processedIDs.size(), LOG);
int numClusters = currentCluster.size() > minpts ? resultList.size() + 1 : resultList.size();
- clusprog.setProcessed(numClusters, logger);
+ clusprog.setProcessed(numClusters, LOG);
}
if(processedIDs.size() == snnInstance.getRelation().size() && noise.size() == 0) {
@@ -298,7 +297,7 @@ public class SNNClustering<O> extends AbstractAlgorithm<Clustering<Model>> imple
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -328,9 +327,10 @@ public class SNNClustering<O> extends AbstractAlgorithm<Clustering<Model>> imple
epsilon = epsilonP.getValue();
}
- IntParameter minptsP = new IntParameter(MINPTS_ID, new GreaterConstraint(0));
+ IntParameter minptsP = new IntParameter(MINPTS_ID);
+ minptsP.addConstraint(new GreaterConstraint(0));
if(config.grab(minptsP)) {
- minpts = minptsP.getValue();
+ minpts = minptsP.intValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/CASH.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/CASH.java
index e4c6a123..1cb1eb0d 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/CASH.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/CASH.java
@@ -23,19 +23,20 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.correlation;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+import java.util.ArrayList;
import java.util.Arrays;
-import java.util.List;
import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.DependencyDerivator;
import de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.clustering.correlation.cash.CASHInterval;
import de.lmu.ifi.dbs.elki.algorithm.clustering.correlation.cash.CASHIntervalSplit;
+import de.lmu.ifi.dbs.elki.algorithm.clustering.correlation.cash.ParameterizationFunction;
import de.lmu.ifi.dbs.elki.data.Cluster;
import de.lmu.ifi.dbs.elki.data.Clustering;
import de.lmu.ifi.dbs.elki.data.DoubleVector;
import de.lmu.ifi.dbs.elki.data.HyperBoundingBox;
-import de.lmu.ifi.dbs.elki.data.ParameterizationFunction;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.model.ClusterModel;
import de.lmu.ifi.dbs.elki.data.model.CorrelationAnalysisSolution;
import de.lmu.ifi.dbs.elki.data.model.LinearEquationModel;
@@ -65,16 +66,13 @@ import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.FirstNEigenPairFilter;
import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAFilteredRunner;
import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap;
import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.IntegerPriorityObject;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
-import de.lmu.ifi.dbs.elki.utilities.exceptions.UnableToComplyException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.ParameterException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
@@ -86,10 +84,6 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
* Provides the CASH algorithm, an subspace clustering algorithm based on the
* Hough transform.
*
- * <b>Note:</b> CASH requires explicitly setting the input vector type to
- * {@link ParameterizationFunction}:
- * (in the MiniGui, set option: parser.vector-type ParameterizationFunction).
- *
* <p>
* Reference: E. Achtert, C. Böhm, J. David, P. Kröger, A. Zimek: Robust
* clustering in arbitrarily oriented subspaces. <br>
@@ -101,16 +95,18 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
* @apiviz.has CASHInterval
* @apiviz.uses ParameterizationFunction
* @apiviz.has LinearEquationModel
+ *
+ * @param <V> Vector type
*/
// todo elke hierarchy (later)
@Title("CASH: Robust clustering in arbitrarily oriented subspaces")
@Description("Subspace clustering algorithm based on the Hough transform.")
@Reference(authors = "E. Achtert, C. Böhm, J. David, P. Kröger, A. Zimek", title = "Robust clustering in arbitraily oriented subspaces", booktitle = "Proc. 8th SIAM Int. Conf. on Data Mining (SDM'08), Atlanta, GA, 2008", url = "http://www.siam.org/proceedings/datamining/2008/dm08_69_AchtertBoehmDavidKroegerZimek.pdf")
-public class CASH extends AbstractAlgorithm<Clustering<Model>> implements ClusteringAlgorithm<Clustering<Model>> {
+public class CASH<V extends NumberVector<?>> extends AbstractAlgorithm<Clustering<Model>> implements ClusteringAlgorithm<Clustering<Model>> {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(CASH.class);
+ private static final Logging LOG = Logging.getLogger(CASH.class);
/**
* Parameter to specify the threshold for minimum number of points in a
@@ -119,7 +115,7 @@ public class CASH extends AbstractAlgorithm<Clustering<Model>> implements Cluste
* Key: {@code -cash.minpts}
* </p>
*/
- public static final OptionID MINPTS_ID = OptionID.getOrCreateOptionID("cash.minpts", "Threshold for minimum number of points in a cluster.");
+ public static final OptionID MINPTS_ID = new OptionID("cash.minpts", "Threshold for minimum number of points in a cluster.");
/**
* Parameter to specify the maximum level for splitting the hypercube, must be
@@ -128,7 +124,7 @@ public class CASH extends AbstractAlgorithm<Clustering<Model>> implements Cluste
* Key: {@code -cash.maxlevel}
* </p>
*/
- public static final OptionID MAXLEVEL_ID = OptionID.getOrCreateOptionID("cash.maxlevel", "The maximum level for splitting the hypercube.");
+ public static final OptionID MAXLEVEL_ID = new OptionID("cash.maxlevel", "The maximum level for splitting the hypercube.");
/**
* Parameter to specify the minimum dimensionality of the subspaces to be
@@ -140,7 +136,7 @@ public class CASH extends AbstractAlgorithm<Clustering<Model>> implements Cluste
* Key: {@code -cash.mindim}
* </p>
*/
- public static final OptionID MINDIM_ID = OptionID.getOrCreateOptionID("cash.mindim", "The minimum dimensionality of the subspaces to be found.");
+ public static final OptionID MINDIM_ID = new OptionID("cash.mindim", "The minimum dimensionality of the subspaces to be found.");
/**
* Parameter to specify the maximum jitter for distance values, must be a
@@ -149,7 +145,7 @@ public class CASH extends AbstractAlgorithm<Clustering<Model>> implements Cluste
* Key: {@code -cash.jitter}
* </p>
*/
- public static final OptionID JITTER_ID = OptionID.getOrCreateOptionID("cash.jitter", "The maximum jitter for distance values.");
+ public static final OptionID JITTER_ID = new OptionID("cash.jitter", "The maximum jitter for distance values.");
/**
* Flag to indicate that an adjustment of the applied heuristic for choosing
@@ -158,7 +154,7 @@ public class CASH extends AbstractAlgorithm<Clustering<Model>> implements Cluste
* Key: {@code -cash.adjust}
* </p>
*/
- public static final OptionID ADJUST_ID = OptionID.getOrCreateOptionID("cash.adjust", "Flag to indicate that an adjustment of the applied heuristic for choosing an interval " + "is performed after an interval is selected.");
+ public static final OptionID ADJUST_ID = new OptionID("cash.adjust", "Flag to indicate that an adjustment of the applied heuristic for choosing an interval " + "is performed after an interval is selected.");
/**
* Holds the value of {@link #MINPTS_ID}.
@@ -196,7 +192,7 @@ public class CASH extends AbstractAlgorithm<Clustering<Model>> implements Cluste
private ModifiableDBIDs processedIDs;
/**
- * The entire database
+ * The entire relation.
*/
private Relation<ParameterizationFunction> fulldatabase;
@@ -222,52 +218,61 @@ public class CASH extends AbstractAlgorithm<Clustering<Model>> implements Cluste
* Run CASH on the relation.
*
* @param database Database
- * @param relation Relation
+ * @param vrel Relation
* @return Clustering result
*/
- public Clustering<Model> run(Database database, Relation<ParameterizationFunction> relation) {
- this.fulldatabase = relation;
- if(logger.isVerbose()) {
- StringBuffer msg = new StringBuffer();
- msg.append("DB size: ").append(relation.size());
+ public Clustering<Model> run(Database database, Relation<V> vrel) {
+ this.fulldatabase = preprocess(database, vrel);
+ if (LOG.isVerbose()) {
+ StringBuilder msg = new StringBuilder();
+ msg.append("DB size: ").append(fulldatabase.size());
msg.append("\nmin Dim: ").append(minDim);
- logger.verbose(msg.toString());
+ LOG.verbose(msg.toString());
}
- try {
- processedIDs = DBIDUtil.newHashSet(relation.size());
- noiseDim = DatabaseUtil.dimensionality(relation);
+ processedIDs = DBIDUtil.newHashSet(fulldatabase.size());
+ noiseDim = dimensionality(fulldatabase);
- FiniteProgress progress = logger.isVerbose() ? new FiniteProgress("CASH Clustering", relation.size(), logger) : null;
- Clustering<Model> result = doRun(relation, progress);
- if(progress != null) {
- progress.ensureCompleted(logger);
- }
+ FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("CASH Clustering", fulldatabase.size(), LOG) : null;
+ Clustering<Model> result = doRun(fulldatabase, progress);
+ if (progress != null) {
+ progress.ensureCompleted(LOG);
+ }
- if(logger.isVerbose()) {
- StringBuffer msg = new StringBuffer();
- for(Cluster<Model> c : result.getAllClusters()) {
- if(c.getModel() instanceof LinearEquationModel) {
- LinearEquationModel s = (LinearEquationModel) c.getModel();
- msg.append("\n Cluster: Dim: " + s.getLes().subspacedim() + " size: " + c.size());
- }
- else {
- msg.append("\n Cluster: " + c.getModel().getClass().getName() + " size: " + c.size());
- }
+ if (LOG.isVerbose()) {
+ StringBuilder msg = new StringBuilder();
+ for (Cluster<Model> c : result.getAllClusters()) {
+ if (c.getModel() instanceof LinearEquationModel) {
+ LinearEquationModel s = (LinearEquationModel) c.getModel();
+ msg.append("\n Cluster: Dim: " + s.getLes().subspacedim() + " size: " + c.size());
+ } else {
+ msg.append("\n Cluster: " + c.getModel().getClass().getName() + " size: " + c.size());
}
- logger.verbose(msg.toString());
}
- return result;
- }
- catch(UnableToComplyException e) {
- throw new IllegalStateException(e);
+ LOG.verbose(msg.toString());
}
- catch(ParameterException e) {
- throw new IllegalStateException(e);
- }
- catch(NonNumericFeaturesException e) {
- throw new IllegalStateException(e);
+ return result;
+ }
+
+ /**
+ * Preprocess the dataset, precomputing the parameterization functions.
+ *
+ * @param db Database
+ * @param vrel Vector relation
+ * @return Preprocessed relation
+ */
+ private Relation<ParameterizationFunction> preprocess(Database db, Relation<V> vrel) {
+ DBIDs ids = vrel.getDBIDs();
+ SimpleTypeInformation<ParameterizationFunction> type = new SimpleTypeInformation<ParameterizationFunction>(ParameterizationFunction.class);
+ MaterializedRelation<ParameterizationFunction> prep = new MaterializedRelation<ParameterizationFunction>(db, type, ids);
+
+ // Project
+ for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ ParameterizationFunction f = new ParameterizationFunction(vrel.get(iter));
+ prep.set(iter, f);
}
+
+ return prep;
}
/**
@@ -277,69 +282,62 @@ public class CASH extends AbstractAlgorithm<Clustering<Model>> implements Cluste
* @param relation the Relation to run the CASH algorithm on
* @param progress the progress object for verbose messages
* @return a mapping of subspace dimensionalities to clusters
- * @throws UnableToComplyException if an error according to the database
- * occurs
- * @throws ParameterException if the parameter setting is wrong
- * @throws NonNumericFeaturesException if non numeric feature vectors are used
*/
- private Clustering<Model> doRun(Relation<ParameterizationFunction> relation, FiniteProgress progress) throws UnableToComplyException, ParameterException, NonNumericFeaturesException {
+ private Clustering<Model> doRun(Relation<ParameterizationFunction> relation, FiniteProgress progress) {
Clustering<Model> res = new Clustering<Model>("CASH clustering", "cash-clustering");
- final int dim = DatabaseUtil.dimensionality(relation);
+ final int dim = dimensionality(relation);
// init heap
Heap<IntegerPriorityObject<CASHInterval>> heap = new Heap<IntegerPriorityObject<CASHInterval>>();
ModifiableDBIDs noiseIDs = DBIDUtil.newHashSet(relation.getDBIDs());
initHeap(heap, relation, dim, noiseIDs);
- if(logger.isDebugging()) {
- StringBuffer msg = new StringBuffer();
+ if (LOG.isDebugging()) {
+ StringBuilder msg = new StringBuilder();
msg.append("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
msg.append("\nXXXX dim ").append(dim);
msg.append("\nXXXX database.size ").append(relation.size());
msg.append("\nXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
- logger.debugFine(msg.toString());
- }
- else if(logger.isVerbose()) {
- StringBuffer msg = new StringBuffer();
+ LOG.debugFine(msg.toString());
+ } else if (LOG.isVerbose()) {
+ StringBuilder msg = new StringBuilder();
msg.append("XXXX dim ").append(dim).append(" database.size ").append(relation.size());
- logger.verbose(msg.toString());
+ LOG.verbose(msg.toString());
}
// get the ''best'' d-dimensional intervals at max level
- while(!heap.isEmpty()) {
+ while (!heap.isEmpty()) {
CASHInterval interval = determineNextIntervalAtMaxLevel(heap);
- if(logger.isDebugging()) {
- logger.debugFine("next interval in dim " + dim + ": " + interval);
- }
- else if(logger.isVerbose()) {
- logger.verbose("next interval in dim " + dim + ": " + interval);
+ if (LOG.isDebugging()) {
+ LOG.debugFine("next interval in dim " + dim + ": " + interval);
+ } else if (LOG.isVerbose()) {
+ LOG.verbose("next interval in dim " + dim + ": " + interval);
}
// only noise left
- if(interval == null) {
+ if (interval == null) {
break;
}
// do a dim-1 dimensional run
ModifiableDBIDs clusterIDs = DBIDUtil.newHashSet();
- if(dim > minDim + 1) {
+ if (dim > minDim + 1) {
ModifiableDBIDs ids;
Matrix basis_dim_minus_1;
- if(adjust) {
+ if (adjust) {
ids = DBIDUtil.newHashSet();
basis_dim_minus_1 = runDerivator(relation, dim, interval, ids);
- }
- else {
+ } else {
ids = interval.getIDs();
basis_dim_minus_1 = determineBasis(SpatialUtil.centroid(interval));
}
- if(ids.size() != 0) {
+ if (ids.size() != 0) {
MaterializedRelation<ParameterizationFunction> db = buildDB(dim, basis_dim_minus_1, ids, relation);
// add result of dim-1 to this result
Clustering<Model> res_dim_minus_1 = doRun(db, progress);
- for(Cluster<Model> cluster : res_dim_minus_1.getAllClusters()) {
+ for (Cluster<Model> cluster : res_dim_minus_1.getAllClusters()) {
res.addCluster(cluster);
noiseIDs.removeDBIDs(cluster.getIDs());
clusterIDs.addDBIDs(cluster.getIDs());
@@ -358,28 +356,31 @@ public class CASH extends AbstractAlgorithm<Clustering<Model>> implements Cluste
}
// Rebuild heap
- List<IntegerPriorityObject<CASHInterval>> heapVector = heap.toSortedArrayList();
- for(IntegerPriorityObject<CASHInterval> pair : heapVector) {
+ ArrayList<IntegerPriorityObject<CASHInterval>> heapVector = new ArrayList<IntegerPriorityObject<CASHInterval>>(heap.size());
+ for (IntegerPriorityObject<CASHInterval> obj : heap) {
+ heapVector.add(obj);
+ }
+ heap.clear();
+ for (IntegerPriorityObject<CASHInterval> pair : heapVector) {
CASHInterval currentInterval = pair.getObject();
currentInterval.removeIDs(clusterIDs);
- if(currentInterval.getIDs().size() >= minPts) {
+ if (currentInterval.getIDs().size() >= minPts) {
heap.add(new IntegerPriorityObject<CASHInterval>(currentInterval.priority(), currentInterval));
}
}
- if(progress != null) {
- progress.setProcessed(processedIDs.size(), logger);
+ if (progress != null) {
+ progress.setProcessed(processedIDs.size(), LOG);
}
}
// put noise to clusters
- if(!noiseIDs.isEmpty()) {
- if(dim == noiseDim) {
+ if (!noiseIDs.isEmpty()) {
+ if (dim == noiseDim) {
Cluster<Model> c = new Cluster<Model>(noiseIDs, true, ClusterModel.CLUSTER);
res.addCluster(c);
processedIDs.addDBIDs(noiseIDs);
- }
- else if(noiseIDs.size() >= minPts) {
+ } else if (noiseIDs.size() >= minPts) {
LinearEquationSystem les = runDerivator(fulldatabase, dim - 1, noiseIDs);
Cluster<Model> c = new Cluster<Model>(noiseIDs, true, new LinearEquationModel(les));
res.addCluster(c);
@@ -387,29 +388,38 @@ public class CASH extends AbstractAlgorithm<Clustering<Model>> implements Cluste
}
}
- if(logger.isDebugging()) {
- StringBuffer msg = new StringBuffer();
+ if (LOG.isDebugging()) {
+ StringBuilder msg = new StringBuilder();
msg.append("noise fuer dim ").append(dim).append(": ").append(noiseIDs.size());
- for(Cluster<Model> c : res.getAllClusters()) {
- if(c.getModel() instanceof LinearEquationModel) {
+ for (Cluster<Model> c : res.getAllClusters()) {
+ if (c.getModel() instanceof LinearEquationModel) {
LinearEquationModel s = (LinearEquationModel) c.getModel();
msg.append("\n Cluster: Dim: " + s.getLes().subspacedim() + " size: " + c.size());
- }
- else {
+ } else {
msg.append("\n Cluster: " + c.getModel().getClass().getName() + " size: " + c.size());
}
}
- logger.debugFine(msg.toString());
+ LOG.debugFine(msg.toString());
}
- if(progress != null) {
- progress.setProcessed(processedIDs.size(), logger);
+ if (progress != null) {
+ progress.setProcessed(processedIDs.size(), LOG);
}
return res;
}
/**
+ * Get the dimensionality of a vector field.
+ *
+ * @param relation Relation
+ * @return Dimensionality
+ */
+ private static int dimensionality(Relation<ParameterizationFunction> relation) {
+ return relation.get(relation.iterDBIDs()).getDimensionality();
+ }
+
+ /**
* Initializes the heap with the root intervals.
*
* @param heap the heap to be initialized
@@ -431,21 +441,20 @@ public class CASH extends AbstractAlgorithm<Clustering<Model>> implements Cluste
double[] d_mins = new double[numDIntervals];
double[] d_maxs = new double[numDIntervals];
- if(logger.isDebugging()) {
- StringBuffer msg = new StringBuffer();
+ if (LOG.isDebugging()) {
+ StringBuilder msg = new StringBuilder();
msg.append("d_min ").append(d_min);
msg.append("\nd_max ").append(d_max);
msg.append("\nnumDIntervals ").append(numDIntervals);
msg.append("\ndIntervalSize ").append(dIntervalSize);
- logger.debugFine(msg.toString());
- }
- else if(logger.isVerbose()) {
- StringBuffer msg = new StringBuffer();
+ LOG.debugFine(msg.toString());
+ } else if (LOG.isVerbose()) {
+ StringBuilder msg = new StringBuilder();
msg.append("d_min ").append(d_min);
msg.append("\nd_max ").append(d_max);
msg.append("\nnumDIntervals ").append(numDIntervals);
msg.append("\ndIntervalSize ").append(dIntervalSize);
- logger.verbose(msg.toString());
+ LOG.verbose(msg.toString());
}
// alpha intervals
@@ -453,33 +462,31 @@ public class CASH extends AbstractAlgorithm<Clustering<Model>> implements Cluste
double[] alphaMax = new double[dim - 1];
Arrays.fill(alphaMax, Math.PI);
- for(int i = 0; i < numDIntervals; i++) {
- if(i == 0) {
+ for (int i = 0; i < numDIntervals; i++) {
+ if (i == 0) {
d_mins[i] = d_min;
- }
- else {
+ } else {
d_mins[i] = d_maxs[i - 1];
}
- if(i < numDIntervals - 1) {
+ if (i < numDIntervals - 1) {
d_maxs[i] = d_mins[i] + dIntervalSize;
- }
- else {
+ } else {
d_maxs[i] = d_max - d_mins[i];
}
HyperBoundingBox alphaInterval = new HyperBoundingBox(alphaMin, alphaMax);
ModifiableDBIDs intervalIDs = split.determineIDs(ids, alphaInterval, d_mins[i], d_maxs[i]);
- if(intervalIDs != null && intervalIDs.size() >= minPts) {
- CASHInterval rootInterval = new CASHInterval(alphaMin, alphaMax, split, intervalIDs, 0, 0, d_mins[i], d_maxs[i]);
+ if (intervalIDs != null && intervalIDs.size() >= minPts) {
+ CASHInterval rootInterval = new CASHInterval(alphaMin, alphaMax, split, intervalIDs, -1, 0, d_mins[i], d_maxs[i]);
heap.add(new IntegerPriorityObject<CASHInterval>(rootInterval.priority(), rootInterval));
}
}
- if(logger.isDebuggingFiner()) {
- StringBuffer msg = new StringBuffer();
+ if (LOG.isDebuggingFiner()) {
+ StringBuilder msg = new StringBuilder();
msg.append("heap.size ").append(heap.size());
- logger.debugFiner(msg.toString());
+ LOG.debugFiner(msg.toString());
}
}
@@ -493,23 +500,21 @@ public class CASH extends AbstractAlgorithm<Clustering<Model>> implements Cluste
* @param relation the database storing the parameterization functions
* @return a dim-1 dimensional database where the objects are projected into
* the specified subspace
- * @throws UnableToComplyException if an error according to the database
- * occurs
*/
- private MaterializedRelation<ParameterizationFunction> buildDB(int dim, Matrix basis, DBIDs ids, Relation<ParameterizationFunction> relation) throws UnableToComplyException {
+ private MaterializedRelation<ParameterizationFunction> buildDB(int dim, Matrix basis, DBIDs ids, Relation<ParameterizationFunction> relation) {
ProxyDatabase proxy = new ProxyDatabase(ids);
- VectorFieldTypeInformation<ParameterizationFunction> type = VectorFieldTypeInformation.get(ParameterizationFunction.class, basis.getColumnDimensionality());
+ SimpleTypeInformation<ParameterizationFunction> type = new SimpleTypeInformation<ParameterizationFunction>(ParameterizationFunction.class);
MaterializedRelation<ParameterizationFunction> prep = new MaterializedRelation<ParameterizationFunction>(proxy, type, ids);
proxy.addRelation(prep);
-
+
// Project
- for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
ParameterizationFunction f = project(basis, relation.get(iter));
prep.set(iter, f);
}
- if(logger.isDebugging()) {
- logger.debugFine("db fuer dim " + (dim - 1) + ": " + ids.size());
+ if (LOG.isDebugging()) {
+ LOG.debugFine("db fuer dim " + (dim - 1) + ": " + ids.size());
}
return prep;
@@ -527,7 +532,7 @@ public class CASH extends AbstractAlgorithm<Clustering<Model>> implements Cluste
// Matrix m = new Matrix(new
// double[][]{f.getPointCoordinates()}).times(basis);
Matrix m = f.getColumnVector().transposeTimes(basis);
- ParameterizationFunction f_t = new ParameterizationFunction(m.getColumnPackedCopy());
+ ParameterizationFunction f_t = new ParameterizationFunction(new DoubleVector(m.getColumnPackedCopy()));
return f_t;
}
@@ -540,7 +545,7 @@ public class CASH extends AbstractAlgorithm<Clustering<Model>> implements Cluste
*/
private Matrix determineBasis(double[] alpha) {
double[] nn = new double[alpha.length + 1];
- for(int i = 0; i < nn.length; i++) {
+ for (int i = 0; i < nn.length; i++) {
double alpha_i = i == alpha.length ? 0 : alpha[i];
nn[i] = sinusProduct(0, i, alpha) * StrictMath.cos(alpha_i);
}
@@ -560,7 +565,7 @@ public class CASH extends AbstractAlgorithm<Clustering<Model>> implements Cluste
*/
private double sinusProduct(int start, int end, double[] alpha) {
double result = 1;
- for(int j = start; j < end; j++) {
+ for (int j = start; j < end; j++) {
result *= StrictMath.sin(alpha[j]);
}
return result;
@@ -576,8 +581,8 @@ public class CASH extends AbstractAlgorithm<Clustering<Model>> implements Cluste
private CASHInterval determineNextIntervalAtMaxLevel(Heap<IntegerPriorityObject<CASHInterval>> heap) {
CASHInterval next = doDetermineNextIntervalAtMaxLevel(heap);
// noise path was chosen
- while(next == null) {
- if(heap.isEmpty()) {
+ while (next == null) {
+ if (heap.isEmpty()) {
return null;
}
next = doDetermineNextIntervalAtMaxLevel(heap);
@@ -596,48 +601,45 @@ public class CASH extends AbstractAlgorithm<Clustering<Model>> implements Cluste
private CASHInterval doDetermineNextIntervalAtMaxLevel(Heap<IntegerPriorityObject<CASHInterval>> heap) {
CASHInterval interval = heap.poll().getObject();
int dim = interval.getDimensionality();
- while(true) {
+ while (true) {
// max level is reached
- if(interval.getLevel() >= maxLevel && interval.getMaxSplitDimension() == dim) {
+ if (interval.getLevel() >= maxLevel && interval.getMaxSplitDimension() == (dim - 1)) {
return interval;
}
- if(heap.size() % 10000 == 0 && logger.isVerbose()) {
- logger.verbose("heap size " + heap.size());
+ if (heap.size() % 10000 == 0 && LOG.isVerbose()) {
+ LOG.verbose("heap size " + heap.size());
}
- if(heap.size() >= 40000) {
- logger.warning("Heap size > 40.000!!!");
+ if (heap.size() >= 40000) {
+ LOG.warning("Heap size > 40.000!!!");
heap.clear();
return null;
}
- if(logger.isDebuggingFiner()) {
- logger.debugFiner("split " + interval.toString() + " " + interval.getLevel() + "-" + interval.getMaxSplitDimension());
+ if (LOG.isDebuggingFiner()) {
+ LOG.debugFiner("split " + interval.toString() + " " + interval.getLevel() + "-" + interval.getMaxSplitDimension());
}
interval.split();
// noise
- if(!interval.hasChildren()) {
+ if (!interval.hasChildren()) {
return null;
}
CASHInterval bestInterval;
- if(interval.getLeftChild() != null && interval.getRightChild() != null) {
+ if (interval.getLeftChild() != null && interval.getRightChild() != null) {
int comp = interval.getLeftChild().compareTo(interval.getRightChild());
- if(comp < 0) {
+ if (comp < 0) {
bestInterval = interval.getRightChild();
heap.add(new IntegerPriorityObject<CASHInterval>(interval.getLeftChild().priority(), interval.getLeftChild()));
- }
- else {
+ } else {
bestInterval = interval.getLeftChild();
heap.add(new IntegerPriorityObject<CASHInterval>(interval.getRightChild().priority(), interval.getRightChild()));
}
- }
- else if(interval.getLeftChild() == null) {
+ } else if (interval.getLeftChild() == null) {
bestInterval = interval.getRightChild();
- }
- else {
+ } else {
bestInterval = interval.getLeftChild();
}
@@ -662,7 +664,7 @@ public class CASH extends AbstractAlgorithm<Clustering<Model>> implements Cluste
double d_min = Double.POSITIVE_INFINITY;
double d_max = Double.NEGATIVE_INFINITY;
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
ParameterizationFunction f = relation.get(iditer);
HyperBoundingBox minMax = f.determineAlphaMinMax(box);
double f_min = f.function(SpatialUtil.getMin(minMax));
@@ -684,11 +686,8 @@ public class CASH extends AbstractAlgorithm<Clustering<Model>> implements Cluste
* @param dim the dimensionality of the database
* @param ids an empty set to assign the ids
* @return a basis of the found subspace
- * @throws UnableToComplyException if an error according to the database
- * occurs
- * @throws ParameterException if the parameter setting is wrong
*/
- private Matrix runDerivator(Relation<ParameterizationFunction> relation, int dim, CASHInterval interval, ModifiableDBIDs ids) throws UnableToComplyException, ParameterException {
+ private Matrix runDerivator(Relation<ParameterizationFunction> relation, int dim, CASHInterval interval, ModifiableDBIDs ids) {
// build database for derivator
Database derivatorDB = buildDerivatorDB(relation, interval);
@@ -705,14 +704,14 @@ public class CASH extends AbstractAlgorithm<Clustering<Model>> implements Cluste
Matrix weightMatrix = model.getSimilarityMatrix();
DoubleVector centroid = new DoubleVector(model.getCentroid());
DistanceQuery<DoubleVector, DoubleDistance> df = QueryUtil.getDistanceQuery(derivatorDB, new WeightedDistanceFunction(weightMatrix));
- DoubleDistance eps = df.getDistanceFactory().parseString("0.25");
+ DoubleDistance eps = new DoubleDistance(0.25);
ids.addDBIDs(interval.getIDs());
// Search for nearby vectors in original database
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
DoubleVector v = new DoubleVector(relation.get(iditer).getColumnVector().getArrayRef());
DoubleDistance d = df.distance(v, centroid);
- if(d.compareTo(eps) < 0) {
+ if (d.compareTo(eps) < 0) {
ids.add(iditer);
}
}
@@ -729,25 +728,23 @@ public class CASH extends AbstractAlgorithm<Clustering<Model>> implements Cluste
* @param interval the interval to build the database from
* @return a database for the derivator consisting of the ids in the specified
* interval
- * @throws UnableToComplyException if an error according to the database
- * occurs
*/
- private Database buildDerivatorDB(Relation<ParameterizationFunction> relation, CASHInterval interval) throws UnableToComplyException {
+ private Database buildDerivatorDB(Relation<ParameterizationFunction> relation, CASHInterval interval) {
DBIDs ids = interval.getIDs();
ProxyDatabase proxy = new ProxyDatabase(ids);
- int dim = DatabaseUtil.dimensionality(relation);
- SimpleTypeInformation<DoubleVector> type = new VectorFieldTypeInformation<DoubleVector>(DoubleVector.class, dim, new DoubleVector(new double[dim]));
+ int dim = dimensionality(relation);
+ SimpleTypeInformation<DoubleVector> type = new VectorFieldTypeInformation<DoubleVector>(DoubleVector.FACTORY, dim);
MaterializedRelation<DoubleVector> prep = new MaterializedRelation<DoubleVector>(proxy, type, ids);
proxy.addRelation(prep);
// Project
- for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
DoubleVector v = new DoubleVector(relation.get(iter).getColumnVector().getArrayRef());
prep.set(iter, v);
}
- if(logger.isDebugging()) {
- logger.debugFine("db fuer derivator : " + prep.size());
+ if (LOG.isDebugging()) {
+ LOG.debugFine("db fuer derivator : " + prep.size());
}
return proxy;
@@ -778,11 +775,7 @@ public class CASH extends AbstractAlgorithm<Clustering<Model>> implements Cluste
CorrelationAnalysisSolution<DoubleVector> model = derivator.run(derivatorDB);
LinearEquationSystem les = model.getNormalizedLinearEquationSystem(null);
return les;
- }
- catch(UnableToComplyException e) {
- throw new IllegalStateException("Initialization of the database for the derivator failed: " + e);
- }
- catch(NonNumericFeaturesException e) {
+ } catch (NonNumericFeaturesException e) {
throw new IllegalStateException("Error during normalization" + e);
}
}
@@ -795,18 +788,16 @@ public class CASH extends AbstractAlgorithm<Clustering<Model>> implements Cluste
* @param ids the ids to build the database from
* @return a database for the derivator consisting of the ids in the specified
* interval
- * @throws UnableToComplyException if initialization of the database is not
- * possible
*/
- private Database buildDerivatorDB(Relation<ParameterizationFunction> relation, DBIDs ids) throws UnableToComplyException {
+ private Database buildDerivatorDB(Relation<ParameterizationFunction> relation, DBIDs ids) {
ProxyDatabase proxy = new ProxyDatabase(ids);
- int dim = DatabaseUtil.dimensionality(relation);
- SimpleTypeInformation<DoubleVector> type = new VectorFieldTypeInformation<DoubleVector>(DoubleVector.class, dim, new DoubleVector(new double[dim]));
+ int dim = dimensionality(relation);
+ SimpleTypeInformation<DoubleVector> type = new VectorFieldTypeInformation<DoubleVector>(DoubleVector.FACTORY, dim);
MaterializedRelation<DoubleVector> prep = new MaterializedRelation<DoubleVector>(proxy, type, ids);
proxy.addRelation(prep);
// Project
- for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
DoubleVector v = new DoubleVector(relation.get(iter).getColumnVector().getArrayRef());
prep.set(iter, v);
}
@@ -816,12 +807,12 @@ public class CASH extends AbstractAlgorithm<Clustering<Model>> implements Cluste
@Override
public TypeInformation[] getInputTypeRestriction() {
- return TypeUtil.array(VectorFieldTypeInformation.get(ParameterizationFunction.class));
+ return TypeUtil.array(TypeUtil.NUMBER_VECTOR_FIELD);
}
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -845,31 +836,35 @@ public class CASH extends AbstractAlgorithm<Clustering<Model>> implements Cluste
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- IntParameter minptsP = new IntParameter(MINPTS_ID, new GreaterConstraint(0));
- if(config.grab(minptsP)) {
+ IntParameter minptsP = new IntParameter(MINPTS_ID);
+ minptsP.addConstraint(new GreaterConstraint(0));
+ if (config.grab(minptsP)) {
minpts = minptsP.getValue();
}
- IntParameter maxlevelP = new IntParameter(MAXLEVEL_ID, new GreaterConstraint(0));
- if(config.grab(maxlevelP)) {
+ IntParameter maxlevelP = new IntParameter(MAXLEVEL_ID);
+ maxlevelP.addConstraint(new GreaterConstraint(0));
+ if (config.grab(maxlevelP)) {
maxlevel = maxlevelP.getValue();
}
- IntParameter mindimP = new IntParameter(MINDIM_ID, new GreaterConstraint(0), 1);
- if(config.grab(mindimP)) {
+ IntParameter mindimP = new IntParameter(MINDIM_ID, 1);
+ mindimP.addConstraint(new GreaterConstraint(0));
+ if (config.grab(mindimP)) {
mindim = mindimP.getValue();
}
- DoubleParameter jitterP = new DoubleParameter(JITTER_ID, new GreaterConstraint(0));
- if(config.grab(jitterP)) {
+ DoubleParameter jitterP = new DoubleParameter(JITTER_ID);
+ jitterP.addConstraint(new GreaterConstraint(0));
+ if (config.grab(jitterP)) {
jitter = jitterP.getValue();
}
Flag adjustF = new Flag(ADJUST_ID);
- if(config.grab(adjustF)) {
+ if (config.grab(adjustF)) {
adjust = adjustF.getValue();
}
}
@Override
- protected CASH makeInstance() {
- return new CASH(minpts, maxlevel, mindim, jitter, adjust);
+ protected CASH<NumberVector<?>> makeInstance() {
+ return new CASH<NumberVector<?>>(minpts, maxlevel, mindim, jitter, adjust);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/COPAC.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/COPAC.java
index 1d41d37e..ac50559e 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/COPAC.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/COPAC.java
@@ -40,13 +40,13 @@ import de.lmu.ifi.dbs.elki.data.model.Model;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.ProxyDatabase;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.FilteredLocalPCABasedDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.IndexBasedDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.LocallyWeightedDistanceFunction;
@@ -56,7 +56,6 @@ import de.lmu.ifi.dbs.elki.index.preprocessed.LocalProjectionIndex;
import de.lmu.ifi.dbs.elki.index.preprocessed.LocalProjectionIndex.Factory;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
@@ -92,11 +91,11 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
@Title("COPAC: COrrelation PArtition Clustering")
@Description("Partitions a database according to the correlation dimension of its objects and performs " + "a clustering algorithm over the partitions.")
@Reference(authors = "E. Achtert, C. Böhm, H.-P. Kriegel, P. Kröger P., A. Zimek", title = "Robust, Complete, and Efficient Correlation Clustering", booktitle = "Proc. 7th SIAM International Conference on Data Mining (SDM'07), Minneapolis, MN, 2007", url = "http://www.siam.org/proceedings/datamining/2007/dm07_037achtert.pdf")
-public class COPAC<V extends NumberVector<V, ?>, D extends Distance<D>> extends AbstractAlgorithm<Clustering<Model>> implements ClusteringAlgorithm<Clustering<Model>> {
+public class COPAC<V extends NumberVector<?>, D extends Distance<D>> extends AbstractAlgorithm<Clustering<Model>> implements ClusteringAlgorithm<Clustering<Model>> {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(COPAC.class);
+ private static final Logging LOG = Logging.getLogger(COPAC.class);
/**
* Parameter to specify the local PCA preprocessor to derive partition
@@ -106,7 +105,7 @@ public class COPAC<V extends NumberVector<V, ?>, D extends Distance<D>> extends
* Key: {@code -copac.preprocessor}
* </p>
*/
- public static final OptionID PREPROCESSOR_ID = OptionID.getOrCreateOptionID("copac.preprocessor", "Local PCA Preprocessor to derive partition criterion.");
+ public static final OptionID PREPROCESSOR_ID = new OptionID("copac.preprocessor", "Local PCA Preprocessor to derive partition criterion.");
/**
* Parameter to specify the distance function to use inside the partitions
@@ -120,7 +119,7 @@ public class COPAC<V extends NumberVector<V, ?>, D extends Distance<D>> extends
* Key: {@code -copac.partitionDistance}
* </p>
*/
- public static final OptionID PARTITION_DISTANCE_ID = OptionID.getOrCreateOptionID("copac.partitionDistance", "Distance to use for the inner algorithms.");
+ public static final OptionID PARTITION_DISTANCE_ID = new OptionID("copac.partitionDistance", "Distance to use for the inner algorithms.");
/**
* Parameter to specify the clustering algorithm to apply to each partition,
@@ -130,7 +129,7 @@ public class COPAC<V extends NumberVector<V, ?>, D extends Distance<D>> extends
* Key: {@code -copac.partitionAlgorithm}
* </p>
*/
- public static final OptionID PARTITION_ALGORITHM_ID = OptionID.getOrCreateOptionID("copac.partitionAlgorithm", "Clustering algorithm to apply to each partition.");
+ public static final OptionID PARTITION_ALGORITHM_ID = new OptionID("copac.partitionAlgorithm", "Clustering algorithm to apply to each partition.");
/**
* Holds the instance of the preprocessed distance function
@@ -178,8 +177,8 @@ public class COPAC<V extends NumberVector<V, ?>, D extends Distance<D>> extends
*/
@SuppressWarnings("unchecked")
public Clustering<Model> run(Relation<V> relation) {
- if(logger.isVerbose()) {
- logger.verbose("Running COPAC on db size = " + relation.size() + " with dimensionality = " + DatabaseUtil.dimensionality(relation));
+ if(LOG.isVerbose()) {
+ LOG.verbose("Running COPAC on db size = " + relation.size() + " with dimensionality = " + RelationUtil.dimensionality(relation));
}
partitionDistanceQuery = (FilteredLocalPCABasedDistanceFunction.Instance<V, LocalProjectionIndex<V, ?>, D>) partitionDistanceFunction.instantiate(relation);
@@ -187,7 +186,7 @@ public class COPAC<V extends NumberVector<V, ?>, D extends Distance<D>> extends
// partitioning
Map<Integer, ModifiableDBIDs> partitionMap = new HashMap<Integer, ModifiableDBIDs>();
- FiniteProgress partitionProgress = logger.isVerbose() ? new FiniteProgress("Partitioning", relation.size(), logger) : null;
+ FiniteProgress partitionProgress = LOG.isVerbose() ? new FiniteProgress("Partitioning", relation.size(), LOG) : null;
int processed = 1;
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
@@ -199,17 +198,17 @@ public class COPAC<V extends NumberVector<V, ?>, D extends Distance<D>> extends
partitionMap.get(corrdim).add(iditer);
if(partitionProgress != null) {
- partitionProgress.setProcessed(processed++, logger);
+ partitionProgress.setProcessed(processed++, LOG);
}
}
if(partitionProgress != null) {
- partitionProgress.ensureCompleted(logger);
+ partitionProgress.ensureCompleted(LOG);
}
- if(logger.isVerbose()) {
+ if(LOG.isVerbose()) {
for(Integer corrDim : partitionMap.keySet()) {
ModifiableDBIDs list = partitionMap.get(corrDim);
- logger.verbose("Partition [corrDim = " + corrDim + "]: " + list.size() + " objects.");
+ LOG.verbose("Partition [corrDim = " + corrDim + "]: " + list.size() + " objects.");
}
}
@@ -236,7 +235,7 @@ public class COPAC<V extends NumberVector<V, ?>, D extends Distance<D>> extends
// TODO: use an extra finite progress for the partitions?
for(Entry<Integer, DBIDs> pair : partitionMap.entrySet()) {
// noise partition
- if(pair.getKey() == DatabaseUtil.dimensionality(relation)) {
+ if(pair.getKey() == RelationUtil.dimensionality(relation)) {
// Make a Noise cluster
result.addCluster(new Cluster<Model>(pair.getValue(), true, ClusterModel.CLUSTER));
}
@@ -245,8 +244,8 @@ public class COPAC<V extends NumberVector<V, ?>, D extends Distance<D>> extends
ProxyDatabase proxy = new ProxyDatabase(partids, relation);
ClusteringAlgorithm<Clustering<Model>> partitionAlgorithm = getPartitionAlgorithm(query);
- if(logger.isVerbose()) {
- logger.verbose("Running " + partitionAlgorithm.getClass().getName() + " on partition [corrDim = " + pair.getKey() + "]...");
+ if(LOG.isVerbose()) {
+ LOG.verbose("Running " + partitionAlgorithm.getClass().getName() + " on partition [corrDim = " + pair.getKey() + "]...");
}
Clustering<Model> p = partitionAlgorithm.run(proxy);
// Re-Wrap resulting Clusters as DimensionModel clusters.
@@ -295,7 +294,7 @@ public class COPAC<V extends NumberVector<V, ?>, D extends Distance<D>> extends
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -305,7 +304,7 @@ public class COPAC<V extends NumberVector<V, ?>, D extends Distance<D>> extends
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<V, ?>, D extends Distance<D>> extends AbstractParameterizer {
+ public static class Parameterizer<V extends NumberVector<?>, D extends Distance<D>> extends AbstractParameterizer {
protected LocalProjectionIndex.Factory<V, ?> indexI = null;
protected FilteredLocalPCABasedDistanceFunction<V, ?, D> pdistI = null;
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ERiC.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ERiC.java
index b57a6e29..7e7314b4 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ERiC.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ERiC.java
@@ -44,6 +44,7 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.ProxyDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.correlation.ERiCDistanceFunction;
@@ -52,11 +53,11 @@ import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.distance.distancevalue.IntegerDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.StepProgress;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid;
import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.FirstNEigenPairFilter;
import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAFilteredResult;
import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAFilteredRunner;
import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
@@ -78,11 +79,11 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameteriz
*
* @author Elke Achtert
*
- * @apiviz.uses COPAC
- * @apiviz.uses DBSCAN
- * @apiviz.uses ERiCDistanceFunction
- * @apiviz.uses FirstNEigenPairFilter
- * @apiviz.uses PCAFilteredRunner
+ * @apiviz.composedOf COPAC
+ * @apiviz.composedOf DBSCAN
+ * @apiviz.composedOf ERiCDistanceFunction
+ * @apiviz.composedOf FirstNEigenPairFilter
+ * @apiviz.composedOf PCAFilteredRunner
* @apiviz.has CorrelationModel
*
* @param <V> the type of NumberVector handled by this Algorithm
@@ -91,11 +92,11 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameteriz
@Title("ERiC: Exploring Relationships among Correlation Clusters")
@Description("Performs the DBSCAN algorithm on the data using a special distance function taking into account correlations among attributes and builds " + "a hierarchy that allows multiple inheritance from the correlation clustering result.")
@Reference(authors = "E. Achtert, C. Böhm, H.-P. Kriegel, P. Kröger, and A. Zimek", title = "On Exploring Complex Relationships of Correlation Clusters", booktitle = "Proc. 19th International Conference on Scientific and Statistical Database Management (SSDBM 2007), Banff, Canada, 2007", url = "http://dx.doi.org/10.1109/SSDBM.2007.21")
-public class ERiC<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clustering<CorrelationModel<V>>> implements ClusteringAlgorithm<Clustering<CorrelationModel<V>>> {
+public class ERiC<V extends NumberVector<?>> extends AbstractAlgorithm<Clustering<CorrelationModel<V>>> implements ClusteringAlgorithm<Clustering<CorrelationModel<V>>> {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(ERiC.class);
+ private static final Logging LOG = Logging.getLogger(ERiC.class);
/**
* The COPAC clustering algorithm.
@@ -119,13 +120,13 @@ public class ERiC<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Cluste
* @return Clustering result
*/
public Clustering<CorrelationModel<V>> run(Relation<V> relation) {
- final int dimensionality = DatabaseUtil.dimensionality(relation);
+ final int dimensionality = RelationUtil.dimensionality(relation);
- StepProgress stepprog = logger.isVerbose() ? new StepProgress(3) : null;
+ StepProgress stepprog = LOG.isVerbose() ? new StepProgress(3) : null;
// run COPAC
if(stepprog != null) {
- stepprog.beginStep(1, "Preprocessing local correlation dimensionalities and partitioning data", logger);
+ stepprog.beginStep(1, "Preprocessing local correlation dimensionalities and partitioning data", LOG);
}
Clustering<Model> copacResult = copacAlgorithm.run(relation);
@@ -133,11 +134,11 @@ public class ERiC<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Cluste
// extract correlation clusters
if(stepprog != null) {
- stepprog.beginStep(2, "Extract correlation clusters", logger);
+ stepprog.beginStep(2, "Extract correlation clusters", LOG);
}
SortedMap<Integer, List<Cluster<CorrelationModel<V>>>> clusterMap = extractCorrelationClusters(copacResult, relation, dimensionality);
- if(logger.isDebugging()) {
- StringBuffer msg = new StringBuffer("Step 2: Extract correlation clusters...");
+ if(LOG.isDebugging()) {
+ StringBuilder msg = new StringBuilder("Step 2: Extract correlation clusters...");
for(Integer corrDim : clusterMap.keySet()) {
List<Cluster<CorrelationModel<V>>> correlationClusters = clusterMap.get(corrDim);
msg.append("\n\ncorrDim ").append(corrDim);
@@ -149,23 +150,23 @@ public class ERiC<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Cluste
// " ids " + cluster.getIDs().size());
}
}
- logger.debugFine(msg.toString());
+ LOG.debugFine(msg.toString());
}
- if(logger.isVerbose()) {
+ if(LOG.isVerbose()) {
int clusters = 0;
for(List<Cluster<CorrelationModel<V>>> correlationClusters : clusterMap.values()) {
clusters += correlationClusters.size();
}
- logger.verbose(clusters + " clusters extracted.");
+ LOG.verbose(clusters + " clusters extracted.");
}
// build hierarchy
if(stepprog != null) {
- stepprog.beginStep(3, "Building hierarchy", logger);
+ stepprog.beginStep(3, "Building hierarchy", LOG);
}
buildHierarchy(clusterMap, query);
- if(logger.isDebugging()) {
- StringBuffer msg = new StringBuffer("Step 3: Build hierarchy");
+ if(LOG.isDebugging()) {
+ StringBuilder msg = new StringBuilder("Step 3: Build hierarchy");
for(Integer corrDim : clusterMap.keySet()) {
List<Cluster<CorrelationModel<V>>> correlationClusters = clusterMap.get(corrDim);
for(Cluster<CorrelationModel<V>> cluster : correlationClusters) {
@@ -179,10 +180,10 @@ public class ERiC<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Cluste
}
}
}
- logger.debugFine(msg.toString());
+ LOG.debugFine(msg.toString());
}
if(stepprog != null) {
- stepprog.setCompleted(logger);
+ stepprog.setCompleted(LOG);
}
Clustering<CorrelationModel<V>> result = new Clustering<CorrelationModel<V>>("ERiC clustering", "eric-clustering");
@@ -221,7 +222,7 @@ public class ERiC<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Cluste
Class<PCAFilteredRunner<V>> cls = ClassGenericsUtil.uglyCastIntoSubclass(PCAFilteredRunner.class);
PCAFilteredRunner<V> pca = parameters.tryInstantiate(cls);
for(ParameterException e : parameters.getErrors()) {
- logger.warning("Error in internal parameterization: " + e.getMessage());
+ LOG.warning("Error in internal parameterization: " + e.getMessage());
}
// get cluster list for this dimension.
@@ -233,7 +234,7 @@ public class ERiC<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Cluste
PCAFilteredResult pcares = pca.processIds(group, database);
- V centroid = DatabaseUtil.centroid(database, group);
+ V centroid = Centroid.make(database, group).toVector(database);
Cluster<CorrelationModel<V>> correlationCluster = new Cluster<CorrelationModel<V>>("[" + correlationDimension + "_" + correlationClusters.size() + "]", group, new CorrelationModel<V>(pcares, centroid), new ArrayList<Cluster<CorrelationModel<V>>>(), new ArrayList<Cluster<CorrelationModel<V>>>());
correlationClusters.add(correlationCluster);
}
@@ -264,11 +265,11 @@ public class ERiC<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Cluste
Class<PCAFilteredRunner<V>> cls = ClassGenericsUtil.uglyCastIntoSubclass(PCAFilteredRunner.class);
PCAFilteredRunner<V> pca = parameters.tryInstantiate(cls);
for(ParameterException e : parameters.getErrors()) {
- logger.warning("Error in internal parameterization: " + e.getMessage());
+ LOG.warning("Error in internal parameterization: " + e.getMessage());
}
PCAFilteredResult pcares = pca.processIds(noise.getIDs(), database);
- V centroid = DatabaseUtil.centroid(database, noise.getIDs());
+ V centroid = Centroid.make(database, noise.getIDs()).toVector(database);
Cluster<CorrelationModel<V>> correlationCluster = new Cluster<CorrelationModel<V>>("[noise]", noise.getIDs(), new CorrelationModel<V>(pcares, centroid), new ArrayList<Cluster<CorrelationModel<V>>>(), new ArrayList<Cluster<CorrelationModel<V>>>());
correlationClusters.add(correlationCluster);
}
@@ -292,7 +293,7 @@ public class ERiC<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Cluste
}
private void buildHierarchy(SortedMap<Integer, List<Cluster<CorrelationModel<V>>>> clusterMap, DistanceQuery<V, IntegerDistance> query) {
- StringBuffer msg = new StringBuffer();
+ StringBuilder msg = new StringBuilder();
DBSCAN<V, DoubleDistance> dbscan = ClassGenericsUtil.castWithGenericsOrNull(DBSCAN.class, copacAlgorithm.getPartitionAlgorithm(query));
if(dbscan == null) {
@@ -310,7 +311,7 @@ public class ERiC<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Cluste
for(Integer childCorrDim : clusterMap.keySet()) {
List<Cluster<CorrelationModel<V>>> children = clusterMap.get(childCorrDim);
SortedMap<Integer, List<Cluster<CorrelationModel<V>>>> parentMap = clusterMap.tailMap(childCorrDim + 1);
- if(logger.isDebugging()) {
+ if(LOG.isDebugging()) {
msg.append("\ncorrdim ").append(childCorrDim);
msg.append("\nparents ").append(parentMap.keySet());
}
@@ -323,8 +324,8 @@ public class ERiC<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Cluste
if(subspaceDim_parent == lambda_max && child.getParents().isEmpty()) {
parent.getChildren().add(child);
child.getParents().add(parent);
- if(logger.isDebugging()) {
- msg.append("\n").append(parent).append(" is parent of ").append(child);
+ if(LOG.isDebugging()) {
+ msg.append('\n').append(parent).append(" is parent of ").append(child);
}
}
else {
@@ -332,8 +333,8 @@ public class ERiC<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Cluste
if(!dist.bitValue() && (child.getParents().isEmpty() || !isParent(distanceFunction, parent, child.getParents()))) {
parent.getChildren().add(child);
child.getParents().add(parent);
- if(logger.isDebugging()) {
- msg.append("\n").append(parent).append(" is parent of ").append(child);
+ if(LOG.isDebugging()) {
+ msg.append('\n').append(parent).append(" is parent of ").append(child);
}
}
}
@@ -341,8 +342,8 @@ public class ERiC<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Cluste
}
}
}
- if(logger.isDebugging()) {
- logger.debugFine(msg.toString());
+ if(LOG.isDebugging()) {
+ LOG.debugFine(msg.toString());
}
}
@@ -360,7 +361,7 @@ public class ERiC<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Cluste
*/
private boolean isParent(ERiCDistanceFunction distanceFunction, Cluster<CorrelationModel<V>> parent, List<Cluster<CorrelationModel<V>>> children) {
- StringBuffer msg = new StringBuffer();
+ StringBuilder msg = new StringBuilder();
for(Cluster<CorrelationModel<V>> child : children) {
if(parent.getModel().getPCAResult().getCorrelationDimension() == child.getModel().getPCAResult().getCorrelationDimension()) {
@@ -368,19 +369,19 @@ public class ERiC<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Cluste
}
BitDistance dist = distanceFunction.distance(parent.getModel().getCentroid(), child.getModel().getCentroid(), parent.getModel().getPCAResult(), child.getModel().getPCAResult());
- if(logger.isDebugging()) {
+ if(LOG.isDebugging()) {
msg.append("\ndist(").append(child).append(" - ").append(parent).append(") = ").append(dist);
}
if(!dist.bitValue()) {
- if(logger.isDebugging()) {
- logger.debugFine(msg.toString());
+ if(LOG.isDebugging()) {
+ LOG.debugFine(msg.toString());
}
return true;
}
}
- if(logger.isDebugging()) {
- logger.debugFine(msg.toString());
+ if(LOG.isDebugging()) {
+ LOG.debugFine(msg.toString());
}
return false;
}
@@ -392,7 +393,7 @@ public class ERiC<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Cluste
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -402,7 +403,7 @@ public class ERiC<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Cluste
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractParameterizer {
+ public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
/**
* The COPAC instance to use
*/
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/FourC.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/FourC.java
index 98761962..f56342e0 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/FourC.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/FourC.java
@@ -57,11 +57,11 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameteriz
@Title("4C: Computing Correlation Connected Clusters")
@Description("4C identifies local subgroups of data objects sharing a uniform correlation. " + "The algorithm is based on a combination of PCA and density-based clustering (DBSCAN).")
@Reference(authors = "C. Böhm, K. Kailing, P. Kröger, A. Zimek", title = "Computing Clusters of Correlation Connected Objects", booktitle = "Proc. ACM SIGMOD Int. Conf. on Management of Data, Paris, France, 2004, 455-466", url = "http://dx.doi.org/10.1145/1007568.1007620")
-public class FourC<V extends NumberVector<V, ?>> extends AbstractProjectedDBSCAN<Clustering<Model>, V> {
+public class FourC<V extends NumberVector<?>> extends AbstractProjectedDBSCAN<Clustering<Model>, V> {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(FourC.class);
+ private static final Logging LOG = Logging.getLogger(FourC.class);
/**
* Constructor.
@@ -92,7 +92,7 @@ public class FourC<V extends NumberVector<V, ?>> extends AbstractProjectedDBSCAN
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -102,7 +102,7 @@ public class FourC<V extends NumberVector<V, ?>> extends AbstractProjectedDBSCAN
*
* @apiviz.exclude
*/
- public static class Parameterizer<O extends NumberVector<O, ?>> extends AbstractProjectedDBSCAN.Parameterizer<O, DoubleDistance> {
+ public static class Parameterizer<O extends NumberVector<?>> extends AbstractProjectedDBSCAN.Parameterizer<O, DoubleDistance> {
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/HiCO.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/HiCO.java
index 1065682c..759e8f59 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/HiCO.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/HiCO.java
@@ -1,26 +1,27 @@
package de.lmu.ifi.dbs.elki.algorithm.clustering.correlation;
-/*
-This file is part of ELKI:
-Environment for Developing KDD-Applications Supported by Index-Structures
-
-Copyright (C) 2012
-Ludwig-Maximilians-Universität München
-Lehr- und Forschungseinheit für Datenbanksysteme
-ELKI Development Team
-
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU Affero General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU Affero General Public License for more details.
-
-You should have received a copy of the GNU Affero General Public License
-along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
import de.lmu.ifi.dbs.elki.algorithm.clustering.OPTICS;
import de.lmu.ifi.dbs.elki.data.NumberVector;
@@ -37,7 +38,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.IntervalConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ChainedParameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
@@ -64,11 +65,11 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@Title("Mining Hierarchies of Correlation Clusters")
@Description("Algorithm for detecting hierarchies of correlation clusters.")
@Reference(authors = "E. Achtert, C. Böhm, P. Kröger, A. Zimek", title = "Mining Hierarchies of Correlation Clusterse", booktitle = "Proc. Int. Conf. on Scientific and Statistical Database Management (SSDBM'06), Vienna, Austria, 2006", url = "http://dx.doi.org/10.1109/SSDBM.2006.35")
-public class HiCO<V extends NumberVector<V, ?>> extends OPTICS<V, PCACorrelationDistance> {
+public class HiCO<V extends NumberVector<?>> extends OPTICS<V, PCACorrelationDistance> {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(HiCO.class);
+ private static final Logging LOG = Logging.getLogger(HiCO.class);
/**
* Parameter to specify the smoothing factor, must be an integer greater than
@@ -79,7 +80,7 @@ public class HiCO<V extends NumberVector<V, ?>> extends OPTICS<V, PCACorrelation
* Key: {@code -hico.mu}
* </p>
*/
- public static final OptionID MU_ID = OptionID.getOrCreateOptionID("hico.mu", "Specifies the smoothing factor. The mu-nearest neighbor is used to compute the correlation reachability of an object.");
+ public static final OptionID MU_ID = new OptionID("hico.mu", "Specifies the smoothing factor. The mu-nearest neighbor is used to compute the correlation reachability of an object.");
/**
* Optional parameter to specify the number of nearest neighbors considered in
@@ -92,7 +93,7 @@ public class HiCO<V extends NumberVector<V, ?>> extends OPTICS<V, PCACorrelation
* Default value: {@link #MU_ID}
* </p>
*/
- public static final OptionID K_ID = OptionID.getOrCreateOptionID("hico.k", "Optional parameter to specify the number of nearest neighbors considered in the PCA. If this parameter is not set, k is set to the value of parameter mu.");
+ public static final OptionID K_ID = new OptionID("hico.k", "Optional parameter to specify the number of nearest neighbors considered in the PCA. If this parameter is not set, k is set to the value of parameter mu.");
/**
* Parameter to specify the threshold of a distance between a vector q and a
@@ -105,7 +106,7 @@ public class HiCO<V extends NumberVector<V, ?>> extends OPTICS<V, PCACorrelation
* Key: {@code -hico.delta}
* </p>
*/
- public static final OptionID DELTA_ID = OptionID.getOrCreateOptionID("hico.delta", "Threshold of a distance between a vector q and a given space that indicates that " + "q adds a new dimension to the space.");
+ public static final OptionID DELTA_ID = new OptionID("hico.delta", "Threshold of a distance between a vector q and a given space that indicates that " + "q adds a new dimension to the space.");
/**
* The threshold for 'strong' eigenvectors: the 'strong' eigenvectors explain
@@ -117,7 +118,7 @@ public class HiCO<V extends NumberVector<V, ?>> extends OPTICS<V, PCACorrelation
* Key: {@code -hico.alpha}
* </p>
*/
- public static final OptionID ALPHA_ID = OptionID.getOrCreateOptionID("hico.alpha", "The threshold for 'strong' eigenvectors: the 'strong' eigenvectors explain a portion of at least alpha of the total variance.");
+ public static final OptionID ALPHA_ID = new OptionID("hico.alpha", "The threshold for 'strong' eigenvectors: the 'strong' eigenvectors explain a portion of at least alpha of the total variance.");
/**
* The default value for {@link #DELTA_ID}.
@@ -131,7 +132,7 @@ public class HiCO<V extends NumberVector<V, ?>> extends OPTICS<V, PCACorrelation
/**
* Constructor.
- *
+ *
* @param distanceFunction Distance function
* @param mu Mu parameter
*/
@@ -141,31 +142,34 @@ public class HiCO<V extends NumberVector<V, ?>> extends OPTICS<V, PCACorrelation
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
- * Parameterization class.
- *
- * @author Erich Schubert
- *
- * @apiviz.exclude
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractParameterizer {
+ public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
int mu = -1;
-
+
PCABasedCorrelationDistanceFunction distance;
-
+
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- IntParameter muP = new IntParameter(MU_ID, new GreaterConstraint(0));
+ IntParameter muP = new IntParameter(MU_ID);
+ muP.addConstraint(new GreaterConstraint(0));
if (config.grab(muP)) {
mu = muP.getValue();
}
- IntParameter kP = new IntParameter(K_ID, new GreaterConstraint(0), true);
+ IntParameter kP = new IntParameter(K_ID);
+ kP.addConstraint(new GreaterConstraint(0));
+ kP.setOptional(true);
final int k;
if (config.grab(kP)) {
k = kP.getValue();
@@ -173,16 +177,19 @@ public class HiCO<V extends NumberVector<V, ?>> extends OPTICS<V, PCACorrelation
k = mu;
}
- DoubleParameter deltaP = new DoubleParameter(DELTA_ID, new GreaterEqualConstraint(0), DEFAULT_DELTA);
+ DoubleParameter deltaP = new DoubleParameter(DELTA_ID, DEFAULT_DELTA);
+ deltaP.addConstraint(new GreaterEqualConstraint(0));
double delta = DEFAULT_DELTA;
if (config.grab(deltaP)) {
- delta = deltaP.getValue();
+ delta = deltaP.doubleValue();
}
- DoubleParameter alphaP = new DoubleParameter(ALPHA_ID, new IntervalConstraint(0.0, IntervalConstraint.IntervalBoundary.OPEN, 1.0, IntervalConstraint.IntervalBoundary.OPEN), DEFAULT_ALPHA);
+ DoubleParameter alphaP = new DoubleParameter(ALPHA_ID, DEFAULT_ALPHA);
+ alphaP.addConstraint(new GreaterConstraint(0.0));
+ alphaP.addConstraint(new LessConstraint(1.0));
double alpha = DEFAULT_ALPHA;
if (config.grab(alphaP)) {
- alpha = alphaP.getValue();
+ alpha = alphaP.doubleValue();
}
// Configure Distance function
@@ -203,4 +210,4 @@ public class HiCO<V extends NumberVector<V, ?>> extends OPTICS<V, PCACorrelation
return new HiCO<V>(distance, mu);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/LMCLUS.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/LMCLUS.java
index b8942de8..fdea8b35 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/LMCLUS.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/LMCLUS.java
@@ -1,4 +1,5 @@
package de.lmu.ifi.dbs.elki.algorithm.clustering.correlation;
+
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
@@ -23,7 +24,6 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.correlation;
*/
import java.util.ArrayList;
-import java.util.Iterator;
import java.util.List;
import java.util.Random;
@@ -40,24 +40,26 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress;
import de.lmu.ifi.dbs.elki.math.MeanVariance;
-import de.lmu.ifi.dbs.elki.math.histograms.FlexiHistogram;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.histogram.DoubleDynamicHistogram;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.histogram.DoubleHistogram;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.histogram.DoubleStaticHistogram.Iter;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
-import de.lmu.ifi.dbs.elki.utilities.exceptions.UnableToComplyException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
-import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
/**
* Linear manifold clustering in high dimensional spaces by stochastic search.
@@ -83,17 +85,17 @@ public class LMCLUS extends AbstractAlgorithm<Clustering<Model>> {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(LMCLUS.class);
+ private static final Logging LOG = Logging.getLogger(LMCLUS.class);
/**
* Epsilon
*/
- private final static double NOT_FROM_ONE_CLUSTER_PROBABILITY = 0.2;
+ private static final double NOT_FROM_ONE_CLUSTER_PROBABILITY = 0.2;
/**
* Histogram resolution
*/
- private final static int BINS = 50;
+ private static final int BINS = 50;
/**
* The current threshold value calculated by the findSeperation Method.
@@ -114,6 +116,11 @@ public class LMCLUS extends AbstractAlgorithm<Clustering<Model>> {
* Number of sampling rounds to find a good split
*/
private final int samplingLevel;
+
+ /**
+ * Random factory
+ */
+ private final RandomFactory rnd;
/**
* Constructor.
@@ -122,13 +129,15 @@ public class LMCLUS extends AbstractAlgorithm<Clustering<Model>> {
* @param minsize Minimum cluster size
* @param samplingLevel Sampling level
* @param sensitivityThreshold Threshold
+ * @param rnd Random factory
*/
- public LMCLUS(int maxdim, int minsize, int samplingLevel, double sensitivityThreshold) {
+ public LMCLUS(int maxdim, int minsize, int samplingLevel, double sensitivityThreshold, RandomFactory rnd) {
super();
this.maxLMDim = maxdim;
this.minsize = minsize;
this.samplingLevel = samplingLevel;
this.sensitivityThreshold = sensitivityThreshold;
+ this.rnd = rnd;
}
/**
@@ -148,40 +157,40 @@ public class LMCLUS extends AbstractAlgorithm<Clustering<Model>> {
* @param database The database to operate on
* @param relation Relation
* @return Clustering result
- * @throws de.lmu.ifi.dbs.elki.utilities.UnableToComplyException
*/
- public Clustering<Model> run(Database database, Relation<NumberVector<?, ?>> relation) throws UnableToComplyException {
+ public Clustering<Model> run(Database database, Relation<NumberVector<?>> relation) {
Clustering<Model> ret = new Clustering<Model>("LMCLUS Clustering", "lmclus-clustering");
- FiniteProgress progress = logger.isVerbose() ? new FiniteProgress("Clustered objects", relation.size(), logger) : null;
- IndefiniteProgress cprogress = logger.isVerbose() ? new IndefiniteProgress("Clusters found", logger) : null;
+ FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Clustered objects", relation.size(), LOG) : null;
+ IndefiniteProgress cprogress = LOG.isVerbose() ? new IndefiniteProgress("Clusters found", LOG) : null;
ModifiableDBIDs unclustered = DBIDUtil.newHashSet(relation.getDBIDs());
+ Random r = rnd.getRandom();
- final int maxdim = Math.min(maxLMDim, DatabaseUtil.dimensionality(relation));
+ final int maxdim = Math.min(maxLMDim, RelationUtil.dimensionality(relation));
int cnum = 0;
- while(unclustered.size() > minsize) {
+ while (unclustered.size() > minsize) {
DBIDs current = unclustered;
int lmDim = 1;
- for(int k = 1; k <= maxdim; k++) {
+ for (int k = 1; k <= maxdim; k++) {
// Implementation note: this while loop is from the original publication
// and the published LMCLUS source code. It doesn't make sense to me -
// it is lacking a stop criterion other than "cluster is too small" and
// "cluster is inseparable"! Additionally, there is good criterion for
// stopping at the appropriate dimensionality either.
- while(true) {
- Separation separation = findSeparation(relation, current, k);
+ while (true) {
+ Separation separation = findSeparation(relation, current, k, r);
// logger.verbose("k: " + k + " goodness: " + separation.goodness +
// " threshold: " + separation.threshold);
- if(separation.goodness <= sensitivityThreshold) {
+ if (separation.goodness <= sensitivityThreshold) {
break;
}
ModifiableDBIDs subset = DBIDUtil.newArray(current.size());
- for(DBIDIter iter = current.iter(); iter.valid(); iter.advance()) {
- if(deviation(relation.get(iter).getColumnVector().minusEquals(separation.originV), separation.basis) < separation.threshold) {
+ for (DBIDIter iter = current.iter(); iter.valid(); iter.advance()) {
+ if (deviation(relation.get(iter).getColumnVector().minusEquals(separation.originV), separation.basis) < separation.threshold) {
subset.add(iter);
}
}
// logger.verbose("size:"+subset.size());
- if(subset.size() < minsize) {
+ if (subset.size() < minsize) {
break;
}
current = subset;
@@ -190,7 +199,7 @@ public class LMCLUS extends AbstractAlgorithm<Clustering<Model>> {
}
}
// No more clusters found
- if(current.size() < minsize || current == unclustered) {
+ if (current.size() < minsize || current == unclustered) {
break;
}
// New cluster found
@@ -201,23 +210,23 @@ public class LMCLUS extends AbstractAlgorithm<Clustering<Model>> {
ret.addCluster(cluster);
// Remove from main working set.
unclustered.removeDBIDs(current);
- if(progress != null) {
- progress.setProcessed(relation.size() - unclustered.size(), logger);
+ if (progress != null) {
+ progress.setProcessed(relation.size() - unclustered.size(), LOG);
}
- if(cprogress != null) {
- cprogress.setProcessed(cnum, logger);
+ if (cprogress != null) {
+ cprogress.setProcessed(cnum, LOG);
}
}
// Remaining objects are noise
- if(unclustered.size() > 0) {
+ if (unclustered.size() > 0) {
ret.addCluster(new Cluster<Model>(unclustered, true));
}
- if(progress != null) {
- progress.setProcessed(relation.size(), logger);
- progress.ensureCompleted(logger);
+ if (progress != null) {
+ progress.setProcessed(relation.size(), LOG);
+ progress.ensureCompleted(LOG);
}
- if(cprogress != null) {
- cprogress.setCompleted(logger);
+ if (cprogress != null) {
+ cprogress.setCompleted(LOG);
}
return ret;
}
@@ -251,19 +260,19 @@ public class LMCLUS extends AbstractAlgorithm<Clustering<Model>> {
* @param relation The vector relation
* @param currentids Current DBIDs
* @param dimension the dimension of the linear manifold to sample.
+ * @param r Random generator
* @return the overall goodness of the separation. The values origin basis and
* threshold are returned indirectly over class variables.
*/
- private Separation findSeparation(Relation<NumberVector<?, ?>> relation, DBIDs currentids, int dimension) {
+ private Separation findSeparation(Relation<NumberVector<?>> relation, DBIDs currentids, int dimension, Random r) {
Separation separation = new Separation();
// determine the number of samples needed, to secure that with a specific
// probability
// in at least on sample every sampled point is from the same cluster.
int samples = (int) Math.min(Math.log(NOT_FROM_ONE_CLUSTER_PROBABILITY) / (Math.log(1 - Math.pow((1.0d / samplingLevel), dimension))), (double) currentids.size());
// System.out.println("Number of samples: " + samples);
- Random r = new Random();
int remaining_retries = 100;
- for(int i = 1; i <= samples; i++) {
+ for (int i = 1; i <= samples; i++) {
DBIDs sample = DBIDUtil.randomSample(currentids, dimension + 1, r.nextLong());
final DBIDIter iter = sample.iter();
// Use first as origin
@@ -273,36 +282,36 @@ public class LMCLUS extends AbstractAlgorithm<Clustering<Model>> {
Matrix basis;
{
List<Vector> vectors = new ArrayList<Vector>(sample.size() - 1);
- for(;iter.valid(); iter.advance()) {
+ for (; iter.valid(); iter.advance()) {
Vector vec = relation.get(iter).getColumnVector();
vectors.add(vec.minusEquals(originV));
}
// generate orthogonal basis
basis = generateOrthonormalBasis(vectors);
- if(basis == null) {
+ if (basis == null) {
// new sample has to be taken.
i--;
remaining_retries--;
- if(remaining_retries < 0) {
+ if (remaining_retries < 0) {
throw new AbortException("Too many retries in sampling, and always a linear dependant data set.");
}
continue;
}
}
// Generate and fill a histogram.
- FlexiHistogram<Double, Double> histogram = FlexiHistogram.DoubleSumHistogram(BINS);
+ DoubleDynamicHistogram histogram = new DoubleDynamicHistogram(BINS);
double w = 1.0 / currentids.size();
- for(DBIDIter iter2 = currentids.iter(); iter2.valid(); iter2.advance()) {
+ for (DBIDIter iter2 = currentids.iter(); iter2.valid(); iter2.advance()) {
// Skip sampled points
- if(sample.contains(iter2)) {
+ if (sample.contains(iter2)) {
continue;
}
Vector vec = relation.get(iter2).getColumnVector().minusEquals(originV);
final double distance = deviation(vec, basis);
- histogram.aggregate(distance, w);
+ histogram.increment(distance, w);
}
double[] th = findAndEvaluateThreshold(histogram); // evaluate threshold
- if(th[1] > separation.goodness) {
+ if (th[1] > separation.goodness) {
separation.goodness = th[1];
separation.threshold = th[0];
separation.originV = originV;
@@ -332,17 +341,17 @@ public class LMCLUS extends AbstractAlgorithm<Clustering<Model>> {
first = first.times(1.0 / first.euclideanLength());
Matrix ret = new Matrix(first.getDimensionality(), vectors.size());
ret.setCol(0, first);
- for(int i = 1; i < vectors.size(); i++) {
+ for (int i = 1; i < vectors.size(); i++) {
// System.out.println("Matrix:" + ret);
Vector v_i = vectors.get(i);
Vector u_i = v_i.copy();
// System.out.println("Vector " + i + ":" + partialSol);
- for(int j = 0; j < i; j++) {
+ for (int j = 0; j < i; j++) {
Vector v_j = ret.getCol(j);
double f = v_i.transposeTimes(v_j) / v_j.transposeTimes(v_j);
- if(Double.isNaN(f)) {
- if(logger.isDebuggingFine()) {
- logger.debugFine("Zero vector encountered? " + v_j);
+ if (Double.isNaN(f)) {
+ if (LOG.isDebuggingFine()) {
+ LOG.debugFine("Zero vector encountered? " + v_j);
}
return null;
}
@@ -350,9 +359,9 @@ public class LMCLUS extends AbstractAlgorithm<Clustering<Model>> {
}
// check if the vectors weren't independent
final double len_u_i = u_i.euclideanLength();
- if(len_u_i == 0.0) {
- if(logger.isDebuggingFine()) {
- logger.debugFine("Points not independent - no orthonormalization.");
+ if (len_u_i == 0.0) {
+ if (LOG.isDebuggingFine()) {
+ LOG.debugFine("Points not independent - no orthonormalization.");
}
return null;
}
@@ -369,7 +378,7 @@ public class LMCLUS extends AbstractAlgorithm<Clustering<Model>> {
* @param histogram Histogram to evaluate
* @return Position and goodness
*/
- private double[] findAndEvaluateThreshold(FlexiHistogram<Double, Double> histogram) {
+ private double[] findAndEvaluateThreshold(DoubleDynamicHistogram histogram) {
int n = histogram.getNumBins();
double[] p1 = new double[n];
double[] p2 = new double[n];
@@ -381,11 +390,10 @@ public class LMCLUS extends AbstractAlgorithm<Clustering<Model>> {
// Forward pass
{
MeanVariance mv = new MeanVariance();
- Iterator<DoubleObjPair<Double>> forward = histogram.iterator();
- for(int i = 0; forward.hasNext(); i++) {
- DoubleObjPair<Double> pair = forward.next();
- p1[i] = pair.second + ((i > 0) ? p1[i - 1] : 0);
- mv.put(i, pair.second);
+ DoubleHistogram.Iter forward = histogram.iter();
+ for (int i = 0; forward.valid(); i++, forward.advance()) {
+ p1[i] = forward.getValue() + ((i > 0) ? p1[i - 1] : 0);
+ mv.put(i, forward.getValue());
mu1[i] = mv.getMean();
sigma1[i] = mv.getNaiveStddev();
}
@@ -393,17 +401,18 @@ public class LMCLUS extends AbstractAlgorithm<Clustering<Model>> {
// Backwards pass
{
MeanVariance mv = new MeanVariance();
- Iterator<DoubleObjPair<Double>> backwards = histogram.reverseIterator();
- for(int j = n - 1; backwards.hasNext(); j--) {
- DoubleObjPair<Double> pair = backwards.next();
- p2[j] = pair.second + ((j + 1 < n) ? p2[j + 1] : 0);
- mv.put(j, pair.second);
+ DoubleHistogram.Iter backwards = histogram.iter();
+ backwards.seek(histogram.getNumBins() - 1); // Seek to last
+
+ for (int j = n - 1; backwards.valid(); j--, backwards.retract()) {
+ p2[j] = backwards.getValue() + ((j + 1 < n) ? p2[j + 1] : 0);
+ mv.put(j, backwards.getValue());
mu2[j] = mv.getMean();
sigma2[j] = mv.getNaiveStddev();
}
}
- for(int i = 0; i < n; i++) {
+ for (int i = 0; i < n; i++) {
jt[i] = 1.0 + 2 * (p1[i] * (Math.log(sigma1[i]) - Math.log(p1[i])) + p2[i] * (Math.log(sigma2[i]) - Math.log(p2[i])));
}
@@ -411,23 +420,23 @@ public class LMCLUS extends AbstractAlgorithm<Clustering<Model>> {
double bestgoodness = Double.NEGATIVE_INFINITY;
double devPrev = jt[1] - jt[0];
- for(int i = 1; i < jt.length - 1; i++) {
+ for (int i = 1; i < jt.length - 1; i++) {
double devCur = jt[i + 1] - jt[i];
// System.out.println(p1[i]);
// System.out.println(jt[i + 1]);
// System.out.println(jt[i]);
// System.out.println(devCur);
// Local minimum found - calculate depth
- if(devCur >= 0 && devPrev <= 0) {
+ if (devCur >= 0 && devPrev <= 0) {
double lowestMaxima = Double.POSITIVE_INFINITY;
- for(int j = i - 1; j > 0; j--) {
- if(jt[j - 1] < jt[j]) {
+ for (int j = i - 1; j > 0; j--) {
+ if (jt[j - 1] < jt[j]) {
lowestMaxima = Math.min(lowestMaxima, jt[j]);
break;
}
}
- for(int j = i + 1; j < n - 2; j++) {
- if(jt[j + 1] < jt[j]) {
+ for (int j = i + 1; j < n - 2; j++) {
+ if (jt[j + 1] < jt[j]) {
lowestMaxima = Math.min(lowestMaxima, jt[j]);
break;
}
@@ -436,23 +445,25 @@ public class LMCLUS extends AbstractAlgorithm<Clustering<Model>> {
final double mud = mu1[i] - mu2[i];
double discriminability = mud * mud / (sigma1[i] * sigma1[i] + sigma2[i] * sigma2[i]);
- if(Double.isNaN(discriminability)) {
+ if (Double.isNaN(discriminability)) {
discriminability = -1;
}
double goodness = localDepth * discriminability;
- if(goodness > bestgoodness) {
+ if (goodness > bestgoodness) {
bestgoodness = goodness;
bestpos = i;
}
}
devPrev = devCur;
}
- return new double[] { histogram.getBinMax(bestpos), bestgoodness };
+ Iter iter = histogram.iter();
+ iter.seek(bestpos);
+ return new double[] { iter.getRight(), bestgoodness };
}
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
@Override
@@ -500,22 +511,27 @@ public class LMCLUS extends AbstractAlgorithm<Clustering<Model>> {
/**
* Parameter with the maximum dimension to search for
*/
- public static final OptionID MAXDIM_ID = OptionID.getOrCreateOptionID("lmclus.maxdim", "Maximum linear manifold dimension to search.");
+ public static final OptionID MAXDIM_ID = new OptionID("lmclus.maxdim", "Maximum linear manifold dimension to search.");
/**
* Parameter for the minimum cluster size
*/
- public static final OptionID MINSIZE_ID = OptionID.getOrCreateOptionID("lmclus.minsize", "Minimum cluster size to allow.");
+ public static final OptionID MINSIZE_ID = new OptionID("lmclus.minsize", "Minimum cluster size to allow.");
/**
* Sampling intensity level
*/
- public static final OptionID SAMPLINGL_ID = OptionID.getOrCreateOptionID("lmclus.sampling-level", "A number used to determine how many samples are taken in each search.");
+ public static final OptionID SAMPLINGL_ID = new OptionID("lmclus.sampling-level", "A number used to determine how many samples are taken in each search.");
/**
* Global significance threshold
*/
- public static final OptionID THRESHOLD_ID = OptionID.getOrCreateOptionID("lmclus.threshold", "Threshold to determine if a cluster was found.");
+ public static final OptionID THRESHOLD_ID = new OptionID("lmclus.threshold", "Threshold to determine if a cluster was found.");
+
+ /**
+ * Random seeding
+ */
+ public static final OptionID RANDOM_ID = new OptionID("lmclus.seed", "Random generator seed.");
/**
* Maximum dimensionality to search for
@@ -536,31 +552,43 @@ public class LMCLUS extends AbstractAlgorithm<Clustering<Model>> {
* Threshold
*/
private double threshold;
+
+ /**
+ * Random generator
+ */
+ private RandomFactory rnd;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- IntParameter maxLMDimP = new IntParameter(MAXDIM_ID, new GreaterEqualConstraint(1), true);
- if(config.grab(maxLMDimP)) {
+ IntParameter maxLMDimP = new IntParameter(MAXDIM_ID);
+ maxLMDimP.addConstraint(new GreaterEqualConstraint(1));
+ maxLMDimP.setOptional(true);
+ if (config.grab(maxLMDimP)) {
maxdim = maxLMDimP.getValue();
}
- IntParameter minsizeP = new IntParameter(MINSIZE_ID, new GreaterEqualConstraint(1));
- if(config.grab(minsizeP)) {
+ IntParameter minsizeP = new IntParameter(MINSIZE_ID);
+ minsizeP.addConstraint(new GreaterEqualConstraint(1));
+ if (config.grab(minsizeP)) {
minsize = minsizeP.getValue();
}
IntParameter samplingLevelP = new IntParameter(SAMPLINGL_ID, 100);
- if(config.grab(samplingLevelP)) {
+ if (config.grab(samplingLevelP)) {
samplingLevel = samplingLevelP.getValue();
}
DoubleParameter sensivityThresholdP = new DoubleParameter(THRESHOLD_ID);
- if(config.grab(sensivityThresholdP)) {
+ if (config.grab(sensivityThresholdP)) {
threshold = sensivityThresholdP.getValue();
}
+ RandomParameter rndP = new RandomParameter(RANDOM_ID);
+ if (config.grab(rndP)) {
+ rnd = rndP.getValue();
+ }
}
@Override
protected LMCLUS makeInstance() {
- return new LMCLUS(maxdim, minsize, samplingLevel, threshold);
+ return new LMCLUS(maxdim, minsize, samplingLevel, threshold, rnd);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ORCLUS.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ORCLUS.java
index 2e9f4a9b..f567098b 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ORCLUS.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ORCLUS.java
@@ -37,33 +37,35 @@ import de.lmu.ifi.dbs.elki.data.model.Model;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
-import de.lmu.ifi.dbs.elki.database.query.GenericDistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.GenericDistanceDBIDList;
import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
import de.lmu.ifi.dbs.elki.math.linearalgebra.SortedEigenPairs;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAResult;
import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCARunner;
import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.IntervalConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessEqualConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.LongParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
/**
* ORCLUS provides the ORCLUS algorithm, an algorithm to find clusters in high
@@ -83,38 +85,21 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.LongParameter;
@Title("ORCLUS: Arbitrarily ORiented projected CLUSter generation")
@Description("Algorithm to find correlation clusters in high dimensional spaces.")
@Reference(authors = "C. C. Aggarwal, P. S. Yu", title = "Finding Generalized Projected Clusters in High Dimensional Spaces", booktitle = "Proc. ACM SIGMOD Int. Conf. on Management of Data (SIGMOD '00)", url = "http://dx.doi.org/10.1145/342009.335383")
-public class ORCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClustering<Clustering<Model>, V> {
+public class ORCLUS<V extends NumberVector<?>> extends AbstractProjectedClustering<Clustering<Model>, V> {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(ORCLUS.class);
+ private static final Logging LOG = Logging.getLogger(ORCLUS.class);
/**
- * Parameter to specify the factor for reducing the number of current clusters
- * in each iteration, must be an integer greater than 0 and less than 1.
- * <p>
- * Default value: {@code 0.5}
- * </p>
- * <p>
- * Key: {@code -orclus.alpha}
- * </p>
- */
- public static final OptionID ALPHA_ID = OptionID.getOrCreateOptionID("orclus.alpha", "The factor for reducing the number of current clusters in each iteration.");
-
- /**
- * Parameter to specify the random generator seed.
- */
- public static final OptionID SEED_ID = OptionID.getOrCreateOptionID("orclus.seed", "The random number generator seed.");
-
- /**
- * Holds the value of {@link #ALPHA_ID}.
+ * Holds the value of {@link Parameterizer#ALPHA_ID}.
*/
private double alpha;
/**
- * Holds the value of {@link #SEED_ID}.
+ * Random generator
*/
- private Long seed;
+ private RandomFactory rnd;
/**
* The PCA utility object.
@@ -128,13 +113,13 @@ public class ORCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClust
* @param k_i k_i Parameter
* @param l l Parameter
* @param alpha Alpha Parameter
- * @param seed Seed parameter
+ * @param rnd Random generator
* @param pca PCA runner
*/
- public ORCLUS(int k, int k_i, int l, double alpha, long seed, PCARunner<V> pca) {
+ public ORCLUS(int k, int k_i, int l, double alpha, RandomFactory rnd, PCARunner<V> pca) {
super(k, k_i, l);
this.alpha = alpha;
- this.seed = seed;
+ this.rnd = rnd;
this.pca = pca;
}
@@ -148,9 +133,9 @@ public class ORCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClust
try {
DistanceQuery<V, DoubleDistance> distFunc = this.getDistanceQuery(database);
// current dimensionality associated with each seed
- int dim_c = DatabaseUtil.dimensionality(relation);
+ int dim_c = RelationUtil.dimensionality(relation);
- if(dim_c < l) {
+ if (dim_c < l) {
throw new IllegalStateException("Dimensionality of data < parameter l! " + "(" + dim_c + " < " + l + ")");
}
@@ -162,19 +147,19 @@ public class ORCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClust
double beta = StrictMath.exp(-StrictMath.log((double) dim_c / (double) l) * StrictMath.log(1 / alpha) / StrictMath.log((double) k_c / (double) k));
- IndefiniteProgress cprogress = logger.isVerbose() ? new IndefiniteProgress("Current number of clusters:", logger) : null;
+ IndefiniteProgress cprogress = LOG.isVerbose() ? new IndefiniteProgress("Current number of clusters:", LOG) : null;
- while(k_c > k) {
- if(cprogress != null) {
- cprogress.setProcessed(clusters.size(), logger);
+ while (k_c > k) {
+ if (cprogress != null) {
+ cprogress.setProcessed(clusters.size(), LOG);
}
// find partitioning induced by the seeds of the clusters
assign(relation, distFunc, clusters);
// determine current subspace associated with each cluster
- for(ORCLUSCluster cluster : clusters) {
- if(cluster.objectIDs.size() > 0) {
+ for (ORCLUSCluster cluster : clusters) {
+ if (cluster.objectIDs.size() > 0) {
cluster.basis = findBasis(relation, distFunc, cluster, dim_c);
}
}
@@ -187,19 +172,18 @@ public class ORCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClust
}
assign(relation, distFunc, clusters);
- if(cprogress != null) {
+ if (cprogress != null) {
cprogress.setProcessed(clusters.size());
- cprogress.setCompleted(logger);
+ cprogress.setCompleted(LOG);
}
// get the result
Clustering<Model> r = new Clustering<Model>("ORCLUS clustering", "orclus-clustering");
- for(ORCLUSCluster c : clusters) {
+ for (ORCLUSCluster c : clusters) {
r.addCluster(new Cluster<Model>(c.objectIDs, ClusterModel.CLUSTER));
}
return r;
- }
- catch(Exception e) {
+ } catch (Exception e) {
throw new IllegalStateException(e);
}
}
@@ -212,11 +196,11 @@ public class ORCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClust
* @return the initial seed list
*/
private List<ORCLUSCluster> initialSeeds(Relation<V> database, int k) {
- DBIDs randomSample = DBIDUtil.randomSample(database.getDBIDs(), k, seed);
- V factory = DatabaseUtil.assumeVectorField(database).getFactory();
+ DBIDs randomSample = DBIDUtil.randomSample(database.getDBIDs(), k, rnd);
+ NumberVector.Factory<V, ?> factory = RelationUtil.getNumberVectorFactory(database);
List<ORCLUSCluster> seeds = new ArrayList<ORCLUSCluster>();
- for(DBIDIter iter = randomSample.iter(); iter.valid(); iter.advance()) {
- seeds.add(new ORCLUSCluster(database.get(iter), iter.getDBID(), factory));
+ for (DBIDIter iter = randomSample.iter(); iter.valid(); iter.advance()) {
+ seeds.add(new ORCLUSCluster(database.get(iter), iter, factory));
}
return seeds;
}
@@ -231,15 +215,15 @@ public class ORCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClust
* assigned to
*/
private void assign(Relation<V> database, DistanceQuery<V, DoubleDistance> distFunc, List<ORCLUSCluster> clusters) {
- V factory = DatabaseUtil.assumeVectorField(database).getFactory();
+ NumberVector.Factory<V, ?> factory = RelationUtil.getNumberVectorFactory(database);
// clear the current clusters
- for(ORCLUSCluster cluster : clusters) {
+ for (ORCLUSCluster cluster : clusters) {
cluster.objectIDs.clear();
}
// projected centroids of the clusters
List<V> projectedCentroids = new ArrayList<V>(clusters.size());
- for(ORCLUSCluster c : clusters) {
+ for (ORCLUSCluster c : clusters) {
projectedCentroids.add(projection(c, c.centroid, factory));
}
@@ -251,11 +235,11 @@ public class ORCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClust
ORCLUSCluster minCluster = null;
// determine projected distance between o and cluster
- for(int i = 0; i < clusters.size(); i++) {
+ for (int i = 0; i < clusters.size(); i++) {
ORCLUSCluster c = clusters.get(i);
V o_proj = projection(c, o, factory);
DoubleDistance dist = distFunc.distance(o_proj, projectedCentroids.get(i));
- if(minDist == null || minDist.compareTo(dist) > 0) {
+ if (minDist == null || minDist.compareTo(dist) > 0) {
minDist = dist;
minCluster = c;
}
@@ -266,9 +250,9 @@ public class ORCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClust
}
// recompute the seed in each clusters
- for(ORCLUSCluster cluster : clusters) {
- if(cluster.objectIDs.size() > 0) {
- cluster.centroid = DatabaseUtil.centroid(database, cluster.objectIDs);
+ for (ORCLUSCluster cluster : clusters) {
+ if (cluster.objectIDs.size() > 0) {
+ cluster.centroid = Centroid.make(database, cluster.objectIDs).toVector(database);
}
}
}
@@ -286,13 +270,12 @@ public class ORCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClust
private Matrix findBasis(Relation<V> database, DistanceQuery<V, DoubleDistance> distFunc, ORCLUSCluster cluster, int dim) {
// covariance matrix of cluster
// Matrix covariance = Util.covarianceMatrix(database, cluster.objectIDs);
- List<DistanceResultPair<DoubleDistance>> results = new ArrayList<DistanceResultPair<DoubleDistance>>(cluster.objectIDs.size());
- for(DBIDIter it = cluster.objectIDs.iter(); it.valid(); it.advance()) {
+ GenericDistanceDBIDList<DoubleDistance> results = new GenericDistanceDBIDList<DoubleDistance>(cluster.objectIDs.size());
+ for (DBIDIter it = cluster.objectIDs.iter(); it.valid(); it.advance()) {
DoubleDistance distance = distFunc.distance(cluster.centroid, database.get(it));
- DistanceResultPair<DoubleDistance> qr = new GenericDistanceResultPair<DoubleDistance>(distance, it.getDBID());
- results.add(qr);
+ results.add(distance, it);
}
- Collections.sort(results);
+ results.sort();
PCAResult pcares = pca.processQueryResult(results, database);
SortedEigenPairs eigenPairs = pcares.getEigenPairs();
return eigenPairs.reverseEigenVectors(dim);
@@ -321,9 +304,9 @@ public class ORCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClust
*/
private void merge(Relation<V> database, DistanceQuery<V, DoubleDistance> distFunc, List<ORCLUSCluster> clusters, int k_new, int d_new, IndefiniteProgress cprogress) {
ArrayList<ProjectedEnergy> projectedEnergies = new ArrayList<ProjectedEnergy>();
- for(int i = 0; i < clusters.size(); i++) {
- for(int j = 0; j < clusters.size(); j++) {
- if(i >= j) {
+ for (int i = 0; i < clusters.size(); i++) {
+ for (int j = 0; j < clusters.size(); j++) {
+ if (i >= j) {
continue;
}
// projected energy of c_ij in subspace e_ij
@@ -335,21 +318,21 @@ public class ORCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClust
}
}
- while(clusters.size() > k_new) {
- if(cprogress != null) {
- cprogress.setProcessed(clusters.size(), logger);
+ while (clusters.size() > k_new) {
+ if (cprogress != null) {
+ cprogress.setProcessed(clusters.size(), LOG);
}
// find the smallest value of r_ij
ProjectedEnergy minPE = Collections.min(projectedEnergies);
// renumber the clusters by replacing cluster c_i with cluster c_ij
// and discarding cluster c_j
- for(int c = 0; c < clusters.size(); c++) {
- if(c == minPE.i) {
+ for (int c = 0; c < clusters.size(); c++) {
+ if (c == minPE.i) {
clusters.remove(c);
clusters.add(c, minPE.cluster);
}
- if(c == minPE.j) {
+ if (c == minPE.j) {
clusters.remove(c);
}
}
@@ -358,16 +341,15 @@ public class ORCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClust
int i = minPE.i;
int j = minPE.j;
Iterator<ProjectedEnergy> it = projectedEnergies.iterator();
- while(it.hasNext()) {
+ while (it.hasNext()) {
ProjectedEnergy pe = it.next();
- if(pe.i == i || pe.i == j || pe.j == i || pe.j == j) {
+ if (pe.i == i || pe.i == j || pe.j == i || pe.j == j) {
it.remove();
- }
- else {
- if(pe.i > j) {
+ } else {
+ if (pe.i > j) {
pe.i -= 1;
}
- if(pe.j > j) {
+ if (pe.j > j) {
pe.j -= 1;
}
}
@@ -375,11 +357,10 @@ public class ORCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClust
// ... and recompute them
ORCLUSCluster c_ij = minPE.cluster;
- for(int c = 0; c < clusters.size(); c++) {
- if(c < i) {
+ for (int c = 0; c < clusters.size(); c++) {
+ if (c < i) {
projectedEnergies.add(projectedEnergy(database, distFunc, clusters.get(c), c_ij, c, i, d_new));
- }
- else if(c > i) {
+ } else if (c > i) {
projectedEnergies.add(projectedEnergy(database, distFunc, clusters.get(c), c_ij, i, c, d_new));
}
}
@@ -404,11 +385,11 @@ public class ORCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClust
private ProjectedEnergy projectedEnergy(Relation<V> database, DistanceQuery<V, DoubleDistance> distFunc, ORCLUSCluster c_i, ORCLUSCluster c_j, int i, int j, int dim) {
// union of cluster c_i and c_j
ORCLUSCluster c_ij = union(database, distFunc, c_i, c_j, dim);
- V factory = DatabaseUtil.assumeVectorField(database).getFactory();
+ NumberVector.Factory<V, ?> factory = RelationUtil.getNumberVectorFactory(database);
DoubleDistance sum = getDistanceFunction().getDistanceFactory().nullDistance();
V c_proj = projection(c_ij, c_ij.centroid, factory);
- for(DBIDIter iter = c_ij.objectIDs.iter(); iter.valid(); iter.advance()) {
+ for (DBIDIter iter = c_ij.objectIDs.iter(); iter.valid(); iter.advance()) {
V o_proj = projection(c_ij, database.get(iter), factory);
DoubleDistance dist = distFunc.distance(o_proj, c_proj);
sum = sum.plus(dist.times(dist));
@@ -436,16 +417,15 @@ public class ORCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClust
// convert into array.
c.objectIDs = DBIDUtil.newArray(c.objectIDs);
- if(c.objectIDs.size() > 0) {
- c.centroid = DatabaseUtil.centroid(relation, c.objectIDs);
+ if (c.objectIDs.size() > 0) {
+ c.centroid = Centroid.make(relation, c.objectIDs).toVector(relation);
c.basis = findBasis(relation, distFunc, c, dim);
- }
- else {
- V factory = DatabaseUtil.assumeVectorField(relation).getFactory();
- Vector cent = c1.centroid.getColumnVector().plusEquals(c2.centroid.getColumnVector()).timesEquals(0.5);
+ } else {
+ NumberVector.Factory<V, ?> factory = RelationUtil.getNumberVectorFactory(relation);
+ Vector cent = c1.centroid.getColumnVector().plusEquals(c2.centroid.getColumnVector()).timesEquals(0.5);
c.centroid = factory.newNumberVector(cent.getArrayRef());
double[][] doubles = new double[c1.basis.getRowDimensionality()][dim];
- for(int i = 0; i < dim; i++) {
+ for (int i = 0; i < dim; i++) {
doubles[i][i] = 1;
}
c.basis = new Matrix(doubles);
@@ -462,7 +442,7 @@ public class ORCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClust
* @param factory Factory object / prototype
* @return the projection of double vector o in the subspace of cluster c
*/
- private V projection(ORCLUSCluster c, V o, V factory) {
+ private V projection(ORCLUSCluster c, V o, NumberVector.Factory<V, ?> factory) {
Matrix o_proj = o.getColumnVector().transposeTimes(c.basis);
double[] values = o_proj.getColumnPackedCopy();
return factory.newNumberVector(values);
@@ -475,7 +455,7 @@ public class ORCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClust
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -511,21 +491,19 @@ public class ORCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClust
* Creates a new cluster containing the specified object o.
*
* @param o the object belonging to this cluster.
+ * @param id Object id
* @param factory Factory object / prototype
*/
- ORCLUSCluster(V o, DBID id, V factory) {
+ ORCLUSCluster(V o, DBIDRef id, NumberVector.Factory<V, ?> factory) {
this.objectIDs.add(id);
// initially the basis ist the original axis-system
int dim = o.getDimensionality();
this.basis = Matrix.unitMatrix(dim);
- // TODO: can we replace this with some kind of clone() statement?
// initially the centroid is the value array of o
- double[] values = new double[o.getDimensionality()];
- for(int d = 1; d <= o.getDimensionality(); d++) {
- values[d - 1] = o.doubleValue(d);
- }
+ double[] values = o.getColumnVector().getArrayRef();
+ // FIXME: avoid going through 'values'
this.centroid = factory.newNumberVector(values);
}
}
@@ -571,10 +549,28 @@ public class ORCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClust
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractProjectedClustering.Parameterizer {
+ public static class Parameterizer<V extends NumberVector<?>> extends AbstractProjectedClustering.Parameterizer {
+ /**
+ * Parameter to specify the factor for reducing the number of current
+ * clusters in each iteration, must be an integer greater than 0 and less
+ * than 1.
+ * <p>
+ * Default value: {@code 0.5}
+ * </p>
+ * <p>
+ * Key: {@code -orclus.alpha}
+ * </p>
+ */
+ public static final OptionID ALPHA_ID = new OptionID("orclus.alpha", "The factor for reducing the number of current clusters in each iteration.");
+
+ /**
+ * Parameter to specify the random generator seed.
+ */
+ public static final OptionID SEED_ID = new OptionID("orclus.seed", "The random number generator seed.");
+
protected double alpha = -1;
- protected Long seed = null;
+ protected RandomFactory rnd;
protected PCARunner<V> pca = null;
@@ -593,22 +589,24 @@ public class ORCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClust
}
protected void configAlpha(Parameterization config) {
- DoubleParameter alphaP = new DoubleParameter(ALPHA_ID, new IntervalConstraint(0, IntervalConstraint.IntervalBoundary.OPEN, 1, IntervalConstraint.IntervalBoundary.CLOSE), 0.5);
- if(config.grab(alphaP)) {
- alpha = alphaP.getValue();
+ DoubleParameter alphaP = new DoubleParameter(ALPHA_ID, 0.5);
+ alphaP.addConstraint(new GreaterConstraint(0));
+ alphaP.addConstraint(new LessEqualConstraint(1));
+ if (config.grab(alphaP)) {
+ alpha = alphaP.doubleValue();
}
}
protected void configSeed(Parameterization config) {
- LongParameter seedP = new LongParameter(SEED_ID, true);
- if(config.grab(seedP)) {
- seed = seedP.getValue();
+ RandomParameter rndP = new RandomParameter(SEED_ID);
+ if (config.grab(rndP)) {
+ rnd = rndP.getValue();
}
}
@Override
protected ORCLUS<V> makeInstance() {
- return new ORCLUS<V>(k, k_i, l, alpha, seed, pca);
+ return new ORCLUS<V>(k, k_i, l, alpha, rnd, pca);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/CASHInterval.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/CASHInterval.java
index 46112498..0153ddc3 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/CASHInterval.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/CASHInterval.java
@@ -42,7 +42,7 @@ import de.lmu.ifi.dbs.elki.logging.LoggingConfiguration;
*/
public class CASHInterval extends HyperBoundingBox implements Comparable<CASHInterval> {
/**
- * Serial version number
+ * Serial version number.
*/
private static final long serialVersionUID = 1;
@@ -141,14 +141,14 @@ public class CASHInterval extends HyperBoundingBox implements Comparable<CASHInt
/**
* Removes the specified ids from this interval.
*
- * @param ids the set of ids to be removed
+ * @param ids2 the set of ids to be removed
*/
- public void removeIDs(DBIDs ids) {
- this.ids.removeDBIDs(ids);
+ public void removeIDs(DBIDs ids2) {
+ this.ids.removeDBIDs(ids2);
}
/**
- * Returns the number of objects associated with this interval
+ * Returns the number of objects associated with this interval.
*
* @return the number of objects associated with this interval
*/
@@ -157,18 +157,6 @@ public class CASHInterval extends HyperBoundingBox implements Comparable<CASHInt
}
/**
- * Returns true if this interval has already been split in the specified
- * dimension.
- *
- * @param d the dimension to be tested
- * @return true if this interval has already been split in the specified
- * dimension
- */
- public boolean isSplit(int d) {
- return maxSplitDimension >= d;
- }
-
- /**
* Returns a String representation of the HyperBoundingBox.
*
* @return String
@@ -286,9 +274,6 @@ public class CASHInterval extends HyperBoundingBox implements Comparable<CASHInt
}
}
- /**
- * @see Object#equals(Object)
- */
@Override
public boolean equals(Object o) {
if(this == o) {
@@ -305,9 +290,6 @@ public class CASHInterval extends HyperBoundingBox implements Comparable<CASHInt
return super.equals(o);
}
- /**
- * Returns the unique id of this interval as hash code.
- */
@Override
public int hashCode() {
return intervalID.hashCode();
@@ -330,24 +312,23 @@ public class CASHInterval extends HyperBoundingBox implements Comparable<CASHInt
return;
}
- int dim = getDimensionality();
- int childLevel = isSplit(dim) ? level + 1 : level;
-
- int splitDim = isSplit(dim) ? 1 : maxSplitDimension + 1;
- double splitPoint = getMin(splitDim) + (getMax(splitDim) - getMin(splitDim)) / 2;
+ final boolean issplit = (maxSplitDimension >= (getDimensionality() - 1));
+ final int childLevel = issplit ? level + 1 : level;
+ final int splitDim = issplit ? 0 : maxSplitDimension + 1;
+ final double splitPoint = getMin(splitDim) + (getMax(splitDim) - getMin(splitDim)) * .5;
// left and right child
for(int i = 0; i < 2; i++) {
- double[] min = SpatialUtil.getMin(this);
- double[] max = SpatialUtil.getMax(this);
+ double[] min = SpatialUtil.getMin(this); // clone
+ double[] max = SpatialUtil.getMax(this); // clone
// right child
if(i == 0) {
- min[splitDim - 1] = splitPoint;
+ min[splitDim] = splitPoint;
}
// left child
else {
- max[splitDim - 1] = splitPoint;
+ max[splitDim] = splitPoint;
}
ModifiableDBIDs childIDs = split.determineIDs(getIDs(), new HyperBoundingBox(min, max), d_min, d_max);
@@ -364,7 +345,7 @@ public class CASHInterval extends HyperBoundingBox implements Comparable<CASHInt
}
if(LoggingConfiguration.DEBUG) {
- StringBuffer msg = new StringBuffer();
+ StringBuilder msg = new StringBuilder();
msg.append("\nchild level ").append(childLevel).append(", split Dim ").append(splitDim);
if(leftChild != null) {
msg.append("\nleft ").append(leftChild);
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/CASHIntervalSplit.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/CASHIntervalSplit.java
index b0a12832..12f10725 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/CASHIntervalSplit.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/CASHIntervalSplit.java
@@ -27,7 +27,6 @@ import java.util.HashMap;
import java.util.Map;
import de.lmu.ifi.dbs.elki.data.HyperBoundingBox;
-import de.lmu.ifi.dbs.elki.data.ParameterizationFunction;
import de.lmu.ifi.dbs.elki.data.spatial.SpatialUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
@@ -69,7 +68,7 @@ public class CASHIntervalSplit {
/**
* The logger of the class.
*/
- private final static Logging logger = Logging.getLogger(CASHIntervalSplit.class);
+ private static final Logging LOG = Logging.getLogger(CASHIntervalSplit.class);
/**
* Initializes the logger and sets the debug status to the given value.
@@ -99,8 +98,8 @@ public class CASHIntervalSplit {
* exceeds minPts, null otherwise
*/
public ModifiableDBIDs determineIDs(DBIDs superSetIDs, HyperBoundingBox interval, double d_min, double d_max) {
- StringBuffer msg = new StringBuffer();
- if(logger.isDebugging()) {
+ StringBuilder msg = LOG.isDebugging() ? new StringBuilder() : null;
+ if(msg != null) {
msg.append("interval ").append(interval);
}
@@ -116,7 +115,7 @@ public class CASHIntervalSplit {
}
for(DBIDIter iter = superSetIDs.iter(); iter.valid(); iter.advance()) {
- DBID id = iter.getDBID();
+ DBID id = DBIDUtil.deref(iter);
Double f_min = minima.get(id);
Double f_max = maxima.get(id);
@@ -129,7 +128,7 @@ public class CASHIntervalSplit {
maxima.put(id, f_max);
}
- if(logger.isDebugging()) {
+ if(msg != null) {
msg.append("\n\nf_min ").append(f_min);
msg.append("\nf_max ").append(f_max);
msg.append("\nd_min ").append(d_min);
@@ -142,21 +141,21 @@ public class CASHIntervalSplit {
if(f_min <= d_max && f_max >= d_min) {
childIDs.add(id);
- if(logger.isDebugging()) {
+ if(msg != null) {
msg.append("\nid ").append(id).append(" appended");
}
}
else {
- if(logger.isDebugging()) {
+ if(msg != null) {
msg.append("\nid ").append(id).append(" NOT appended");
}
}
}
- if(logger.isDebugging()) {
+ if(msg != null) {
msg.append("\nchildIds ").append(childIDs.size());
- logger.debugFine(msg.toString());
+ LOG.debugFine(msg.toString());
}
if(childIDs.size() < minPts) {
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/ParameterizationFunction.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/ParameterizationFunction.java
new file mode 100644
index 00000000..56e68bfe
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/ParameterizationFunction.java
@@ -0,0 +1,530 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.correlation.cash;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.data.HyperBoundingBox;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.spatial.SpatialUtil;
+import de.lmu.ifi.dbs.elki.math.MathUtil;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
+import de.lmu.ifi.dbs.elki.utilities.FormatUtil;
+
+/**
+ * A parameterization function describes all lines in a d-dimensional feature
+ * space intersecting in one point p. A single line in d-dimensional space is
+ * uniquely determined by a translation vector p and (d-1) angles alpha_i
+ * belonging to the normal vector n.
+ *
+ * @author Elke Achtert
+ */
+public class ParameterizationFunction {
+ /**
+ * Available types for the global extremum.
+ *
+ * @apiviz.exclude
+ */
+ public enum ExtremumType {
+ /**
+ * Minimum.
+ */
+ MINIMUM,
+ /**
+ * Maximum.
+ */
+ MAXIMUM,
+ /**
+ * Constant.
+ */
+ CONSTANT
+ }
+
+ /**
+ * A small number to handle numbers near 0 as 0.
+ */
+ public static final double DELTA = 1E-10;
+
+ /**
+ * Holds the alpha values of the global extremum.
+ */
+ private double[] alphaExtremum;
+
+ /**
+ * Holds the type of the global extremum.
+ */
+ private ExtremumType extremumType;
+
+ /**
+ * The actual vector.
+ */
+ private NumberVector<?> vec;
+
+ /**
+ * Provides a new parameterization function describing all lines in a
+ * d-dimensional feature space intersecting in one point p.
+ *
+ * @param vec Existing vector
+ */
+ public ParameterizationFunction(NumberVector<?> vec) {
+ super();
+ this.vec = vec;
+ determineGlobalExtremum();
+ }
+
+ /**
+ * Computes the function value at <code>alpha</code>.
+ *
+ * @param alpha the values of the d-1 angles
+ * @return the function value at alpha
+ */
+ public double function(double[] alpha) {
+ final int d = vec.getDimensionality();
+ if(alpha.length != d - 1) {
+ throw new IllegalArgumentException("Parameter alpha must have a " + "dimensionality of " + (d - 1) + ", read: " + alpha.length);
+ }
+
+ double result = 0;
+ for(int i = 0; i < d; i++) {
+ double alpha_i = i == d - 1 ? 0 : alpha[i];
+ result += vec.doubleValue(i) * sinusProduct(0, i, alpha) * Math.cos(alpha_i);
+ }
+ return result;
+ }
+
+ /**
+ * Determines the alpha values where this function has a minumum and maximum
+ * value in the given interval.
+ *
+ * @param interval the hyper bounding box defining the interval
+ * @return he alpha values where this function has a minumum and maximum value
+ * in the given interval
+ */
+ public HyperBoundingBox determineAlphaMinMax(HyperBoundingBox interval) {
+ final int dim = vec.getDimensionality();
+ if(interval.getDimensionality() != dim - 1) {
+ throw new IllegalArgumentException("Interval needs to have dimensionality d=" + (dim - 1) + ", read: " + interval.getDimensionality());
+ }
+
+ if(extremumType.equals(ExtremumType.CONSTANT)) {
+ double[] centroid = SpatialUtil.centroid(interval);
+ return new HyperBoundingBox(centroid, centroid);
+ }
+
+ double[] alpha_min = new double[dim - 1];
+ double[] alpha_max = new double[dim - 1];
+
+ if(SpatialUtil.contains(interval, alphaExtremum)) {
+ if(extremumType.equals(ExtremumType.MINIMUM)) {
+ alpha_min = alphaExtremum;
+ for(int d = dim - 2; d >= 0; d--) {
+ alpha_max[d] = determineAlphaMax(d, alpha_max, interval);
+ }
+ }
+ else {
+ alpha_max = alphaExtremum;
+ for(int d = dim - 2; d >= 0; d--) {
+ alpha_min[d] = determineAlphaMin(d, alpha_min, interval);
+ }
+ }
+ }
+ else {
+ for(int d = dim - 2; d >= 0; d--) {
+ alpha_min[d] = determineAlphaMin(d, alpha_min, interval);
+ alpha_max[d] = determineAlphaMax(d, alpha_max, interval);
+ }
+ }
+
+ return new HyperBoundingBox(alpha_min, alpha_max);
+ }
+
+ /**
+ * Returns the type of the extremum at the specified alpha values.
+ *
+ * @param n the index until the alpha values are computed
+ * @param alpha_extreme the already computed alpha values
+ * @param interval the hyper bounding box defining the interval in which the
+ * extremum occurs
+ * @return the type of the extremum at the specified alpha_values
+ */
+ private ExtremumType extremumType(int n, double[] alpha_extreme, HyperBoundingBox interval) {
+ // return the type of the global extremum
+ if(n == alpha_extreme.length - 1) {
+ return extremumType;
+ }
+
+ // create random alpha values
+ double[] alpha_extreme_l = new double[alpha_extreme.length];
+ double[] alpha_extreme_r = new double[alpha_extreme.length];
+ double[] alpha_extreme_c = new double[alpha_extreme.length];
+
+ System.arraycopy(alpha_extreme, 0, alpha_extreme_l, 0, alpha_extreme.length);
+ System.arraycopy(alpha_extreme, 0, alpha_extreme_r, 0, alpha_extreme.length);
+ System.arraycopy(alpha_extreme, 0, alpha_extreme_c, 0, alpha_extreme.length);
+
+ double[] centroid = SpatialUtil.centroid(interval);
+ for(int i = 0; i < n; i++) {
+ alpha_extreme_l[i] = centroid[i];
+ alpha_extreme_r[i] = centroid[i];
+ alpha_extreme_c[i] = centroid[i];
+ }
+
+ double intervalLength = interval.getMax(n) - interval.getMin(n);
+ alpha_extreme_l[n] = Math.random() * intervalLength + interval.getMin(n);
+ alpha_extreme_r[n] = Math.random() * intervalLength + interval.getMin(n);
+
+ double f_c = function(alpha_extreme_c);
+ double f_l = function(alpha_extreme_l);
+ double f_r = function(alpha_extreme_r);
+
+ if(f_l < f_c) {
+ if(f_r < f_c || Math.abs(f_r - f_c) < DELTA) {
+ return ExtremumType.MAXIMUM;
+ }
+ }
+ if(f_r < f_c) {
+ if(f_l < f_c || Math.abs(f_l - f_c) < DELTA) {
+ return ExtremumType.MAXIMUM;
+ }
+ }
+
+ if(f_l > f_c) {
+ if(f_r > f_c || Math.abs(f_r - f_c) < DELTA) {
+ return ExtremumType.MINIMUM;
+ }
+ }
+ if(f_r > f_c) {
+ if(f_l > f_c || Math.abs(f_l - f_c) < DELTA) {
+ return ExtremumType.MINIMUM;
+ }
+ }
+
+ if(Math.abs(f_l - f_c) < DELTA && Math.abs(f_r - f_c) < DELTA) {
+ return ExtremumType.CONSTANT;
+ }
+
+ throw new IllegalArgumentException("Houston, we have a problem!\n" + this + "\n" + "f_l " + f_l + "\n" + "f_c " + f_c + "\n" + "f_r " + f_r + "\n" + "p " + vec.getColumnVector() + "\n" + "alpha " + FormatUtil.format(alpha_extreme_c) + "\n" + "alpha_l " + FormatUtil.format(alpha_extreme_l) + "\n" + "alpha_r " + FormatUtil.format(alpha_extreme_r) + "\n" + "n " + n);
+ // + "box min " + FormatUtil.format(interval.getMin()) + "\n"
+ // + "box max " + FormatUtil.format(interval.getMax()) + "\n"
+ }
+
+ /**
+ * Determines the n-th alpha value where this function has a minimum in the
+ * specified interval.
+ *
+ * @param n the index of the alpha value to be determined
+ * @param alpha_min the already computed alpha values
+ * @param interval the hyper bounding box defining the interval
+ * @return the n-th alpha value where this function has a minimum in the
+ * specified interval
+ */
+ private double determineAlphaMin(int n, double[] alpha_min, HyperBoundingBox interval) {
+ double alpha_n = extremum_alpha_n(n, alpha_min);
+ double lower = interval.getMin(n);
+ double upper = interval.getMax(n);
+
+ double[] alpha_extreme = new double[alpha_min.length];
+ System.arraycopy(alpha_min, n, alpha_extreme, n, alpha_extreme.length - n);
+ alpha_extreme[n] = alpha_n;
+
+ ExtremumType type = extremumType(n, alpha_extreme, interval);
+ if(type.equals(ExtremumType.MINIMUM) || type.equals(ExtremumType.CONSTANT)) {
+ // A) lower <= alpha_n <= upper
+ if(lower <= alpha_n && alpha_n <= upper) {
+ return alpha_n;
+ }
+ // B) alpha_n < upper
+ else if(alpha_n < lower) {
+ return lower;
+ }
+ // C) alpha_n > max
+ else {
+ if(alpha_n <= upper) {
+ throw new IllegalStateException("Should never happen!");
+ }
+ return upper;
+ }
+ }
+ // extremum is maximum
+ else {
+ if(lower <= alpha_n && alpha_n <= upper) {
+ // A1) min <= alpha_n <= max && alpha_n - min <= max - alpha_n
+ if(alpha_n - lower <= upper - alpha_n) {
+ return upper;
+ }
+ // A2) min <= alpha_n <= max && alpha_n - min > max - alpha_n
+ else {
+ return lower;
+ }
+ }
+ // B) alpha_n < min
+ else if(alpha_n < lower) {
+ return upper;
+ }
+ // C) alpha_n > max
+ else {
+ if(alpha_n <= upper) {
+ throw new IllegalStateException("Should never happen!");
+ }
+ return lower;
+ }
+ }
+ }
+
+ /**
+ * Determines the n-th alpha value where this function has a maximum in the
+ * specified interval.
+ *
+ * @param n the index of the alpha value to be determined
+ * @param alpha_max the already computed alpha values
+ * @param interval the hyper bounding box defining the interval
+ * @return the n-th alpha value where this function has a minimum in the
+ * specified interval
+ */
+ private double determineAlphaMax(int n, double[] alpha_max, HyperBoundingBox interval) {
+ double alpha_n = extremum_alpha_n(n, alpha_max);
+ double lower = interval.getMin(n);
+ double upper = interval.getMax(n);
+
+ double[] alpha_extreme = new double[alpha_max.length];
+ System.arraycopy(alpha_max, n, alpha_extreme, n, alpha_extreme.length - n);
+ alpha_extreme[n] = alpha_n;
+
+ ExtremumType type = extremumType(n, alpha_extreme, interval);
+ if(type.equals(ExtremumType.MINIMUM) || type.equals(ExtremumType.CONSTANT)) {
+ if(lower <= alpha_n && alpha_n <= upper) {
+ // A1) min <= alpha_n <= max && alpha_n - min <= max - alpha_n
+ if(alpha_n - lower <= upper - alpha_n) {
+ return upper;
+ }
+ // A2) min <= alpha_n <= max && alpha_n - min > max - alpha_n
+ else {
+ return lower;
+ }
+ }
+ // B) alpha_n < min
+ else if(alpha_n < lower) {
+ return upper;
+ }
+ // C) alpha_n > max
+ else {
+ if(alpha_n <= upper) {
+ throw new IllegalStateException("Should never happen!");
+ }
+ return lower;
+ }
+ }
+ // extremum is maximum
+ else {
+ // A) min <= alpha_n <= max
+ if(lower <= alpha_n && alpha_n <= upper) {
+ return alpha_n;
+ }
+ // B) alpha_n < min
+ else if(alpha_n < lower) {
+ return lower;
+ }
+ // C) alpha_n > max
+ else {
+ if(alpha_n <= upper) {
+ throw new IllegalStateException("Should never happen!");
+ }
+ return upper;
+ }
+ }
+ }
+
+ /**
+ * Returns the alpha values of the extremum point in interval [(0,...,0),
+ * (Pi,...,Pi)].
+ *
+ * @return the alpha values of the extremum
+ */
+ public double[] getGlobalAlphaExtremum() {
+ return alphaExtremum;
+ }
+
+ /**
+ * Returns the global extremum of this function in interval [0,...,Pi)^d-1.
+ *
+ * @return the global extremum
+ */
+ public double getGlobalExtremum() {
+ return function(alphaExtremum);
+ }
+
+ /**
+ * Returns the type of the global extremum in interval [0,...,Pi)^d-1.
+ *
+ * @return the type of the global extremum
+ */
+ public ExtremumType getGlobalExtremumType() {
+ return extremumType;
+ }
+
+ /**
+ * Returns a string representation of the object.
+ *
+ * @return a string representation of the object.
+ */
+ @Override
+ public String toString() {
+ return toString(0);
+ }
+
+ /**
+ * Returns a string representation of the object with the specified offset.
+ *
+ * @param offset the offset of the string representation
+ * @return a string representation of the object.
+ */
+ public String toString(int offset) {
+ StringBuilder result = new StringBuilder();
+ for(int d = 0; d < vec.getDimensionality(); d++) {
+ if(d != 0) {
+ result.append(" + \n").append(FormatUtil.whitespace(offset));
+ }
+ result.append(FormatUtil.format(vec.doubleValue(d)));
+ for(int j = 0; j < d; j++) {
+ result.append(" * sin(a_").append(j + 1).append(')');
+ }
+ if(d != vec.getDimensionality() - 1) {
+ result.append(" * cos(a_").append(d + 1).append(')');
+ }
+ }
+ return result.toString();
+ }
+
+ /**
+ * Computes the product of all sinus values of the specified angles from start
+ * to end index.
+ *
+ * @param start the index to start
+ * @param end the index to end
+ * @param alpha the array of angles
+ * @return the product of all sinus values of the specified angles from start
+ * to end index
+ */
+ private double sinusProduct(int start, int end, double[] alpha) {
+ double result = 1;
+ for(int j = start; j < end; j++) {
+ result *= Math.sin(alpha[j]);
+ }
+ return result;
+ }
+
+ /**
+ * Determines the global extremum of this parameterization function.
+ */
+ private void determineGlobalExtremum() {
+ alphaExtremum = new double[vec.getDimensionality() - 1];
+ for(int n = alphaExtremum.length - 1; n >= 0; n--) {
+ alphaExtremum[n] = extremum_alpha_n(n, alphaExtremum);
+ if(Double.isNaN(alphaExtremum[n])) {
+ throw new IllegalStateException("Houston, we have a problem!" + "\n" + this + "\n" + vec.getColumnVector() + "\n" + FormatUtil.format(alphaExtremum));
+ }
+ }
+
+ determineGlobalExtremumType();
+ }
+
+ /**
+ * Determines the type of the global extremum.
+ */
+ private void determineGlobalExtremumType() {
+ final double f = function(alphaExtremum);
+
+ // create random alpha values
+ double[] alpha_1 = new double[alphaExtremum.length];
+ double[] alpha_2 = new double[alphaExtremum.length];
+ for(int i = 0; i < alphaExtremum.length; i++) {
+ alpha_1[i] = Math.random() * Math.PI;
+ alpha_2[i] = Math.random() * Math.PI;
+ }
+
+ // look if f1 and f2 are less, greater or equal to f
+ double f1 = function(alpha_1);
+ double f2 = function(alpha_2);
+
+ if(f1 < f && f2 < f) {
+ extremumType = ExtremumType.MAXIMUM;
+ }
+ else if(f1 > f && f2 > f) {
+ extremumType = ExtremumType.MINIMUM;
+ }
+ else if(Math.abs(f1 - f) < DELTA && Math.abs(f2 - f) < DELTA) {
+ extremumType = ExtremumType.CONSTANT;
+ }
+ else {
+ throw new IllegalStateException("Houston, we have a problem:" + "\n" + this + "\nextremum at " + FormatUtil.format(alphaExtremum) + "\nf " + f + "\nf1 " + f1 + "\nf2 " + f2);
+ }
+ }
+
+ /**
+ * Determines the value for alpha_n where this function has a (local)
+ * extremum.
+ *
+ * @param n the index of the angle
+ * @param alpha the already determined alpha_values for the extremum
+ * @return the value for alpha_n where this function has a (local) extremum
+ */
+ private double extremum_alpha_n(int n, double[] alpha) {
+ // arctan(infinity) = PI/2
+ if(vec.doubleValue(n) == 0) {
+ return MathUtil.HALFPI;
+ }
+
+ double tan = 0;
+ for(int j = n + 1; j < vec.getDimensionality(); j++) {
+ double alpha_j = j == vec.getDimensionality() - 1 ? 0 : alpha[j];
+ tan += vec.doubleValue(j) * sinusProduct(n + 1, j, alpha) * Math.cos(alpha_j);
+ }
+ tan /= vec.doubleValue(n);
+
+ // if (debug) {
+ // debugFiner("tan alpha_" + (n + 1) + " = " + tan);
+ // }
+ double alpha_n = Math.atan(tan);
+ if(alpha_n < 0) {
+ alpha_n = Math.PI + alpha_n;
+ }
+ return alpha_n;
+ }
+
+ /**
+ * Get the actual vector used.
+ *
+ * @return Vector, for projection
+ */
+ public Vector getColumnVector() {
+ return vec.getColumnVector();
+ }
+
+ /**
+ * Get the vector dimensionality.
+ *
+ * @return Vector dimensionality
+ */
+ public int getDimensionality() {
+ return vec.getDimensionality();
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/CorePredicate.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/CorePredicate.java
index e75a89dc..a4440a29 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/CorePredicate.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/CorePredicate.java
@@ -39,11 +39,6 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
*/
public interface CorePredicate {
/**
- * Constant for the generic type {@code List<? extends DistanceResultPair<?>>}
- */
- public static final String NEIGHBOR_LIST = "neighborhood-list";
-
- /**
* Instantiate for a database.
*
* @param database Database to instantiate for
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/EpsilonNeighborPredicate.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/EpsilonNeighborPredicate.java
index cb24e8f1..2b946f1c 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/EpsilonNeighborPredicate.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/EpsilonNeighborPredicate.java
@@ -23,8 +23,6 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.gdbscan;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import java.util.List;
-
import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
@@ -32,18 +30,15 @@ import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.QueryUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.query.DistanceDBIDResult;
-import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
-import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DistanceParameter;
@@ -63,6 +58,8 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
*
* @author Erich Schubert
*
+ * @apiviz.has Instance
+ *
* @param <D> Distance type
*/
@Reference(authors = "M. Ester, H.-P. Kriegel, J. Sander, and X. Xu", title = "A Density-Based Algorithm for Discovering Clusters in Large Spatial Databases with Noise", booktitle = "Proc. 2nd Int. Conf. on Knowledge Discovery and Data Mining (KDD '96), Portland, OR, 1996", url = "http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.71.1980")
@@ -91,18 +88,10 @@ public class EpsilonNeighborPredicate<O, D extends Distance<D>> implements Neigh
@SuppressWarnings("unchecked")
@Override
- public <T> Instance<T> instantiate(Database database, SimpleTypeInformation<?> type) {
- if(TypeUtil.DBIDS.isAssignableFromType(type)) {
- DistanceQuery<O, D> dq = QueryUtil.getDistanceQuery(database, distFunc);
- RangeQuery<O, D> rq = database.getRangeQuery(dq);
- return (Instance<T>) new DBIDInstance<D>(epsilon, rq, dq.getRelation().getDBIDs());
- }
- if(TypeUtil.NEIGHBORLIST.isAssignableFromType(type)) {
- DistanceQuery<O, D> dq = QueryUtil.getDistanceQuery(database, distFunc);
- RangeQuery<O, D> rq = database.getRangeQuery(dq);
- return (Instance<T>) new NeighborListInstance<D>(epsilon, rq, dq.getRelation().getDBIDs());
- }
- throw new AbortException("Incompatible predicate types");
+ public <T> NeighborPredicate.Instance<T> instantiate(Database database, SimpleTypeInformation<?> type) {
+ DistanceQuery<O, D> dq = QueryUtil.getDistanceQuery(database, distFunc);
+ RangeQuery<O, D> rq = database.getRangeQuery(dq);
+ return (NeighborPredicate.Instance<T>) new Instance<D>(epsilon, rq, dq.getRelation().getDBIDs());
}
@Override
@@ -120,7 +109,7 @@ public class EpsilonNeighborPredicate<O, D extends Distance<D>> implements Neigh
*
* @author Erich Schubert
*/
- public static class DBIDInstance<D extends Distance<D>> implements NeighborPredicate.Instance<DBIDs> {
+ public static class Instance<D extends Distance<D>> implements NeighborPredicate.Instance<DistanceDBIDResult<D>> {
/**
* Range to query with
*/
@@ -143,64 +132,7 @@ public class EpsilonNeighborPredicate<O, D extends Distance<D>> implements Neigh
* @param rq Range query to use
* @param ids DBIDs to process
*/
- public DBIDInstance(D epsilon, RangeQuery<?, D> rq, DBIDs ids) {
- super();
- this.epsilon = epsilon;
- this.rq = rq;
- this.ids = ids;
- }
-
- @Override
- public DBIDs getIDs() {
- return ids;
- }
-
- @Override
- public DBIDs getNeighbors(DBIDRef reference) {
- List<DistanceResultPair<D>> res = rq.getRangeForDBID(reference, epsilon);
- // Throw away the actual distance values ...
- ModifiableDBIDs neighbors = DBIDUtil.newHashSet(res.size());
- for(DistanceResultPair<D> dr : res) {
- neighbors.add(dr);
- }
- return neighbors;
- }
-
- @Override
- public void addDBIDs(ModifiableDBIDs ids, DBIDs neighbors) {
- ids.addDBIDs(neighbors);
- }
- }
-
- /**
- * Instance for a particular data set.
- *
- * @author Erich Schubert
- */
- public static class NeighborListInstance<D extends Distance<D>> implements NeighborPredicate.Instance<DistanceDBIDResult<D>> {
- /**
- * Range to query with
- */
- D epsilon;
-
- /**
- * Range query to use on the database.
- */
- RangeQuery<?, D> rq;
-
- /**
- * DBIDs to process
- */
- DBIDs ids;
-
- /**
- * Constructor.
- *
- * @param epsilon Epsilon
- * @param rq Range query to use
- * @param ids DBIDs to process
- */
- public NeighborListInstance(D epsilon, RangeQuery<?, D> rq, DBIDs ids) {
+ public Instance(D epsilon, RangeQuery<?, D> rq, DBIDs ids) {
super();
this.epsilon = epsilon;
this.rq = rq;
@@ -219,9 +151,7 @@ public class EpsilonNeighborPredicate<O, D extends Distance<D>> implements Neigh
@Override
public void addDBIDs(ModifiableDBIDs ids, DistanceDBIDResult<D> neighbors) {
- for(DistanceResultPair<D> neighbor : neighbors) {
- ids.add(neighbor);
- }
+ ids.addDBIDs(neighbors);
}
}
@@ -265,4 +195,4 @@ public class EpsilonNeighborPredicate<O, D extends Distance<D>> implements Neigh
return new EpsilonNeighborPredicate<O, D>(epsilon, distfun);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/GeneralizedDBSCAN.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/GeneralizedDBSCAN.java
index 2e1c2093..ef1cb0dc 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/GeneralizedDBSCAN.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/GeneralizedDBSCAN.java
@@ -67,15 +67,19 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
*
* @author Erich Schubert
* @author Arthur Zimek
+ *
+ * @apiviz.landmark
*
* @apiviz.has Instance
+ * @apiviz.composedOf CorePredicate
+ * @apiviz.composedOf NeighborPredicate
*/
@Reference(authors = "Jörg Sander, Martin Ester, Hans-Peter Kriegel, Xiaowei Xu", title = "Density-Based Clustering in Spatial Databases: The Algorithm GDBSCAN and Its Applications", booktitle = "Data Mining and Knowledge Discovery", url = "http://dx.doi.org/10.1023/A:1009745219419")
public class GeneralizedDBSCAN extends AbstractAlgorithm<Clustering<Model>> implements ClusteringAlgorithm<Clustering<Model>> {
/**
* Get a logger for this algorithm
*/
- final static Logging logger = Logging.getLogger(GeneralizedDBSCAN.class);
+ private static final Logging LOG = Logging.getLogger(GeneralizedDBSCAN.class);
/**
* The neighborhood predicate factory.
@@ -116,16 +120,34 @@ public class GeneralizedDBSCAN extends AbstractAlgorithm<Clustering<Model>> impl
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
* Instance for a particular data set.
*
* @author Erich Schubert
+ *
+ * @apiviz.composedOf CorePredicate.Instance
+ * @apiviz.composedOf NeighborPredicate.Instance
*/
public class Instance<T> {
/**
+ * Unprocessed IDs
+ */
+ private static final int UNPROCESSED = -2;
+
+ /**
+ * Noise IDs
+ */
+ private static final int NOISE = -1;
+
+ /**
+ * Noise IDs
+ */
+ private static final int FIRST_CLUSTER = 0;
+
+ /**
* The neighborhood predicate
*/
final NeighborPredicate.Instance<T> npred;
@@ -148,30 +170,29 @@ public class GeneralizedDBSCAN extends AbstractAlgorithm<Clustering<Model>> impl
}
/**
- * Run the actual DBSCAN algorithm.
+ * Run the actual GDBSCAN algorithm.
*
* @return Clustering result
*/
public Clustering<Model> run() {
final DBIDs ids = npred.getIDs();
// Setup progress logging
- final FiniteProgress progress = logger.isVerbose() ? new FiniteProgress("Clustering", ids.size(), logger) : null;
- final IndefiniteProgress clusprogress = logger.isVerbose() ? new IndefiniteProgress("Clusters", logger) : null;
+ final FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Clustering", ids.size(), LOG) : null;
+ final IndefiniteProgress clusprogress = LOG.isVerbose() ? new IndefiniteProgress("Clusters", LOG) : null;
// (Temporary) store the cluster ID assigned.
- final WritableIntegerDataStore clusterids = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_TEMP, -2);
+ final WritableIntegerDataStore clusterids = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_TEMP, UNPROCESSED);
// Note: these are not exact!
final TIntArrayList clustersizes = new TIntArrayList();
// Implementation Note: using Integer objects should result in
// reduced memory use in the HashMap!
- final int noiseid = -1;
- int clusterid = 0;
+ int clusterid = FIRST_CLUSTER;
int clustersize = 0;
int noisesize = 0;
// Iterate over all objects in the database.
for(DBIDIter id = ids.iter(); id.valid(); id.advance()) {
// Skip already processed ids.
- if(clusterids.intValue(id) > -2) {
+ if(clusterids.intValue(id) != UNPROCESSED) {
continue;
}
// Evaluate Neighborhood predicate
@@ -185,25 +206,25 @@ public class GeneralizedDBSCAN extends AbstractAlgorithm<Clustering<Model>> impl
clustersize = 0;
clusterid += 1;
if(clusprogress != null) {
- clusprogress.setProcessed(clusterid, logger);
+ clusprogress.setProcessed(clusterid, LOG);
}
}
else {
// otherwise, it's a noise point
- clusterids.putInt(id, noiseid);
+ clusterids.putInt(id, NOISE);
noisesize += 1;
}
// We've completed this element
if(progress != null) {
- progress.incrementProcessed(logger);
+ progress.incrementProcessed(LOG);
}
}
// Finish progress logging.
if(progress != null) {
- progress.ensureCompleted(logger);
+ progress.ensureCompleted(LOG);
}
if(clusprogress != null) {
- clusprogress.setCompleted(logger);
+ clusprogress.setCompleted(LOG);
}
// Transform cluster ID mapping into a clustering result:
@@ -240,7 +261,7 @@ public class GeneralizedDBSCAN extends AbstractAlgorithm<Clustering<Model>> impl
* @param neighbors Neighbors acquired by initial getNeighbors call.
* @param progress Progress logging
*
- * @return cluster size;
+ * @return cluster size
*/
protected int setbasedExpandCluster(final int clusterid, final WritableIntegerDataStore clusterids, final T neighbors, final FiniteProgress progress) {
int clustersize = 0;
@@ -264,7 +285,7 @@ public class GeneralizedDBSCAN extends AbstractAlgorithm<Clustering<Model>> impl
npred.addDBIDs(activeSet, newneighbors);
}
if(progress != null) {
- progress.incrementProcessed(logger);
+ progress.incrementProcessed(LOG);
}
}
}
@@ -293,12 +314,12 @@ public class GeneralizedDBSCAN extends AbstractAlgorithm<Clustering<Model>> impl
/**
* Parameter for neighborhood predicate
*/
- public final static OptionID NEIGHBORHOODPRED_ID = OptionID.getOrCreateOptionID("gdbscan.neighborhood", "Neighborhood predicate for GDBSCAN");
+ public static final OptionID NEIGHBORHOODPRED_ID = new OptionID("gdbscan.neighborhood", "Neighborhood predicate for GDBSCAN");
/**
* Parameter for core predicate
*/
- public final static OptionID COREPRED_ID = OptionID.getOrCreateOptionID("gdbscan.core", "Core point predicate for GDBSCAN");
+ public static final OptionID COREPRED_ID = new OptionID("gdbscan.core", "Core point predicate for GDBSCAN");
@Override
protected void makeOptions(Parameterization config) {
@@ -320,4 +341,4 @@ public class GeneralizedDBSCAN extends AbstractAlgorithm<Clustering<Model>> impl
return new GeneralizedDBSCAN(npred, corepred);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/MinPtsCorePredicate.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/MinPtsCorePredicate.java
index b9852eca..47097f9b 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/MinPtsCorePredicate.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/MinPtsCorePredicate.java
@@ -23,16 +23,12 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.gdbscan;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import java.util.List;
-
import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
-import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -72,14 +68,8 @@ public class MinPtsCorePredicate implements CorePredicate {
@SuppressWarnings("unchecked")
@Override
- public <T> Instance<T> instantiate(Database database, SimpleTypeInformation<?> type) {
- if(TypeUtil.DBIDS.isAssignableFromType(type)) {
- return (Instance<T>) new DBIDsInstance(minpts);
- }
- if(TypeUtil.NEIGHBORLIST.isAssignableFromType(type)) {
- return (Instance<T>) new NeighborListInstance(minpts);
- }
- throw new AbortException("Incompatible predicate types");
+ public <T> CorePredicate.Instance<T> instantiate(Database database, SimpleTypeInformation<?> type) {
+ return (CorePredicate.Instance<T>) new Instance(minpts);
}
@Override
@@ -98,7 +88,7 @@ public class MinPtsCorePredicate implements CorePredicate {
*
* @author Erich Schubert
*/
- public static class DBIDsInstance implements CorePredicate.Instance<DBIDs> {
+ public static class Instance implements CorePredicate.Instance<DBIDs> {
/**
* The minpts parameter.
*/
@@ -109,7 +99,7 @@ public class MinPtsCorePredicate implements CorePredicate {
*
* @param minpts MinPts parameter
*/
- public DBIDsInstance(int minpts) {
+ public Instance(int minpts) {
super();
this.minpts = minpts;
}
@@ -121,33 +111,6 @@ public class MinPtsCorePredicate implements CorePredicate {
}
/**
- * Instance for a particular data set.
- *
- * @author Erich Schubert
- */
- public static class NeighborListInstance implements CorePredicate.Instance<List<? extends DistanceResultPair<?>>> {
- /**
- * The minpts parameter.
- */
- int minpts;
-
- /**
- * Constructor for this predicate.
- *
- * @param minpts MinPts parameter
- */
- public NeighborListInstance(int minpts) {
- super();
- this.minpts = minpts;
- }
-
- @Override
- public boolean isCorePoint(DBIDRef point, List<? extends DistanceResultPair<?>> neighbors) {
- return neighbors.size() >= minpts;
- }
- }
-
- /**
* Parameterization class
*
* @author Erich Schubert
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/NeighborPredicate.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/NeighborPredicate.java
index 4f9eca27..ed927696 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/NeighborPredicate.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/NeighborPredicate.java
@@ -91,4 +91,4 @@ public interface NeighborPredicate {
*/
public void addDBIDs(ModifiableDBIDs ids, T neighbors);
}
-}
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeans.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeans.java
index 92862909..47855aad 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeans.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeans.java
@@ -27,10 +27,12 @@ import java.util.ArrayList;
import java.util.List;
import de.lmu.ifi.dbs.elki.algorithm.AbstractPrimitiveDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm;
import de.lmu.ifi.dbs.elki.data.Clustering;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.VectorUtil.SortDBIDsBySingleDimension;
import de.lmu.ifi.dbs.elki.data.model.MeanModel;
+import de.lmu.ifi.dbs.elki.data.type.CombinedTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
@@ -50,12 +52,14 @@ import de.lmu.ifi.dbs.elki.utilities.datastructures.QuickSelect;
*
* @author Erich Schubert
*
+ * @apiviz.has MeanModel
* @apiviz.composedOf KMeansInitialization
*
* @param <V> Vector type
* @param <D> Distance type
+ * @param <M> Cluster model type
*/
-public abstract class AbstractKMeans<V extends NumberVector<V, ?>, D extends Distance<D>> extends AbstractPrimitiveDistanceBasedAlgorithm<NumberVector<?, ?>, D, Clustering<MeanModel<V>>> implements KMeans {
+public abstract class AbstractKMeans<V extends NumberVector<?>, D extends Distance<D>, M extends MeanModel<V>> extends AbstractPrimitiveDistanceBasedAlgorithm<NumberVector<?>, D, Clustering<M>> implements KMeans, ClusteringAlgorithm<Clustering<M>> {
/**
* Holds the value of {@link #K_ID}.
*/
@@ -79,7 +83,7 @@ public abstract class AbstractKMeans<V extends NumberVector<V, ?>, D extends Dis
* @param maxiter Maxiter parameter
* @param initializer Function to generate the initial means
*/
- public AbstractKMeans(PrimitiveDistanceFunction<NumberVector<?, ?>, D> distanceFunction, int k, int maxiter, KMeansInitialization<V> initializer) {
+ public AbstractKMeans(PrimitiveDistanceFunction<? super NumberVector<?>, D> distanceFunction, int k, int maxiter, KMeansInitialization<V> initializer) {
super(distanceFunction);
this.k = k;
this.maxiter = maxiter;
@@ -95,12 +99,12 @@ public abstract class AbstractKMeans<V extends NumberVector<V, ?>, D extends Dis
* @param clusters cluster assignment
* @return true when the object was reassigned
*/
- protected boolean assignToNearestCluster(Relation<V> relation, List<? extends NumberVector<?, ?>> means, List<? extends ModifiableDBIDs> clusters) {
+ protected boolean assignToNearestCluster(Relation<V> relation, List<? extends NumberVector<?>> means, List<? extends ModifiableDBIDs> clusters) {
boolean changed = false;
if(getDistanceFunction() instanceof PrimitiveDoubleDistanceFunction) {
@SuppressWarnings("unchecked")
- final PrimitiveDoubleDistanceFunction<? super NumberVector<?, ?>> df = (PrimitiveDoubleDistanceFunction<? super NumberVector<?, ?>>) getDistanceFunction();
+ final PrimitiveDoubleDistanceFunction<? super NumberVector<?>> df = (PrimitiveDoubleDistanceFunction<? super NumberVector<?>>) getDistanceFunction();
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double mindist = Double.POSITIVE_INFINITY;
V fv = relation.get(iditer);
@@ -127,7 +131,7 @@ public abstract class AbstractKMeans<V extends NumberVector<V, ?>, D extends Dis
}
}
else {
- final PrimitiveDistanceFunction<? super NumberVector<?, ?>, D> df = getDistanceFunction();
+ final PrimitiveDistanceFunction<? super NumberVector<?>, D> df = getDistanceFunction();
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
D mindist = df.getDistanceFactory().infiniteDistance();
V fv = relation.get(iditer);
@@ -158,7 +162,7 @@ public abstract class AbstractKMeans<V extends NumberVector<V, ?>, D extends Dis
@Override
public TypeInformation[] getInputTypeRestriction() {
- return TypeUtil.array(TypeUtil.NUMBER_VECTOR_FIELD);
+ return TypeUtil.array(new CombinedTypeInformation(TypeUtil.NUMBER_VECTOR_FIELD, getDistanceFunction().getInputTypeRestriction()));
}
/**
@@ -169,7 +173,7 @@ public abstract class AbstractKMeans<V extends NumberVector<V, ?>, D extends Dis
* @param database the database containing the vectors
* @return the mean vectors of the given clusters in the given database
*/
- protected List<Vector> means(List<? extends ModifiableDBIDs> clusters, List<? extends NumberVector<?, ?>> means, Relation<V> database) {
+ protected List<Vector> means(List<? extends ModifiableDBIDs> clusters, List<? extends NumberVector<?>> means, Relation<V> database) {
List<Vector> newMeans = new ArrayList<Vector>(k);
for(int i = 0; i < k; i++) {
ModifiableDBIDs list = clusters.get(i);
@@ -200,30 +204,30 @@ public abstract class AbstractKMeans<V extends NumberVector<V, ?>, D extends Dis
* @param database the database containing the vectors
* @return the mean vectors of the given clusters in the given database
*/
- protected List<NumberVector<?, ?>> medians(List<? extends ModifiableDBIDs> clusters, List<? extends NumberVector<?, ?>> medians, Relation<V> database) {
+ protected List<NumberVector<?>> medians(List<? extends ModifiableDBIDs> clusters, List<? extends NumberVector<?>> medians, Relation<V> database) {
final int dim = medians.get(0).getDimensionality();
final SortDBIDsBySingleDimension sorter = new SortDBIDsBySingleDimension(database);
- List<NumberVector<?, ?>> newMedians = new ArrayList<NumberVector<?, ?>>(k);
+ List<NumberVector<?>> newMedians = new ArrayList<NumberVector<?>>(k);
for(int i = 0; i < k; i++) {
ArrayModifiableDBIDs list = DBIDUtil.newArray(clusters.get(i));
if(list.size() > 0) {
Vector mean = new Vector(dim);
for(int d = 0; d < dim; d++) {
- sorter.setDimension(d + 1);
+ sorter.setDimension(d);
DBID id = QuickSelect.median(list, sorter);
- mean.set(d, database.get(id).doubleValue(d + 1));
+ mean.set(d, database.get(id).doubleValue(d));
}
newMedians.add(mean);
}
else {
- newMedians.add((NumberVector<?, ?>) medians.get(i));
+ newMedians.add((NumberVector<?>) medians.get(i));
}
}
return newMedians;
}
/**
- * Compute an incremental update for the mean
+ * Compute an incremental update for the mean.
*
* @param mean Mean to update
* @param vec Object vector
@@ -255,7 +259,7 @@ public abstract class AbstractKMeans<V extends NumberVector<V, ?>, D extends Dis
if(getDistanceFunction() instanceof PrimitiveDoubleDistanceFunction) {
// Raw distance function
@SuppressWarnings("unchecked")
- final PrimitiveDoubleDistanceFunction<? super NumberVector<?, ?>> df = (PrimitiveDoubleDistanceFunction<? super NumberVector<?, ?>>) getDistanceFunction();
+ final PrimitiveDoubleDistanceFunction<? super NumberVector<?>> df = (PrimitiveDoubleDistanceFunction<? super NumberVector<?>>) getDistanceFunction();
// Incremental update
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
@@ -287,7 +291,7 @@ public abstract class AbstractKMeans<V extends NumberVector<V, ?>, D extends Dis
}
else {
// Raw distance function
- final PrimitiveDistanceFunction<? super NumberVector<?, ?>, D> df = getDistanceFunction();
+ final PrimitiveDistanceFunction<? super NumberVector<?>, D> df = getDistanceFunction();
// Incremental update
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
@@ -319,4 +323,4 @@ public abstract class AbstractKMeans<V extends NumberVector<V, ?>, D extends Dis
}
return changed;
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeansInitialization.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeansInitialization.java
index a8effecd..3a69c806 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeansInitialization.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeansInitialization.java
@@ -22,9 +22,10 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.LongParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
/**
* Abstract base class for common k-means initializations.
@@ -35,17 +36,17 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.LongParameter;
*/
public abstract class AbstractKMeansInitialization<V> implements KMeansInitialization<V> {
/**
- * Holds the value of {@link KMeans#SEED_ID}.
+ * Random number generator
*/
- protected Long seed;
+ protected RandomFactory rnd;
/**
* Constructor.
*
- * @param seed Random seed.
+ * @param rnd Random number generator.
*/
- public AbstractKMeansInitialization(Long seed) {
- this.seed = seed;
+ public AbstractKMeansInitialization(RandomFactory rnd) {
+ this.rnd = rnd;
}
/**
@@ -56,14 +57,17 @@ public abstract class AbstractKMeansInitialization<V> implements KMeansInitializ
* @apiviz.exclude
*/
public abstract static class Parameterizer<V> extends AbstractParameterizer {
- protected Long seed;
+ /**
+ * Random generator
+ */
+ protected RandomFactory rnd;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- LongParameter seedP = new LongParameter(KMeans.SEED_ID, true);
- if(config.grab(seedP)) {
- seed = seedP.getValue();
+ RandomParameter rndP = new RandomParameter(KMeans.SEED_ID);
+ if(config.grab(rndP)) {
+ rnd = rndP.getValue();
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/FirstKInitialMeans.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/FirstKInitialMeans.java
index 7a7f2867..1e51f4d6 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/FirstKInitialMeans.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/FirstKInitialMeans.java
@@ -77,7 +77,7 @@ public class FirstKInitialMeans<V> implements KMeansInitialization<V>, KMedoidsI
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractParameterizer {
+ public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
@Override
protected FirstKInitialMeans<V> makeInstance() {
return new FirstKInitialMeans<V>();
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeans.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeans.java
index 37171d4a..68fc4e48 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeans.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeans.java
@@ -34,22 +34,22 @@ public interface KMeans {
/**
* Parameter to specify the initialization method
*/
- public static final OptionID INIT_ID = OptionID.getOrCreateOptionID("kmeans.initialization", "Method to choose the initial means.");
+ public static final OptionID INIT_ID = new OptionID("kmeans.initialization", "Method to choose the initial means.");
/**
* Parameter to specify the number of clusters to find, must be an integer
* greater than 0.
*/
- public static final OptionID K_ID = OptionID.getOrCreateOptionID("kmeans.k", "The number of clusters to find.");
+ public static final OptionID K_ID = new OptionID("kmeans.k", "The number of clusters to find.");
/**
* Parameter to specify the number of clusters to find, must be an integer
* greater or equal to 0, where 0 means no limit.
*/
- public static final OptionID MAXITER_ID = OptionID.getOrCreateOptionID("kmeans.maxiter", "The maximum number of iterations to do. 0 means no limit.");
+ public static final OptionID MAXITER_ID = new OptionID("kmeans.maxiter", "The maximum number of iterations to do. 0 means no limit.");
/**
* Parameter to specify the random generator seed.
*/
- public static final OptionID SEED_ID = OptionID.getOrCreateOptionID("kmeans.seed", "The random number generator seed.");
+ public static final OptionID SEED_ID = new OptionID("kmeans.seed", "The random number generator seed.");
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansInitialization.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansInitialization.java
index 9e5d69f0..54b3a2ce 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansInitialization.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansInitialization.java
@@ -31,6 +31,8 @@ import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
* Interface for initializing K-Means
*
* @author Erich Schubert
+ *
+ * @apiviz.landmark
*
* @param <V> Object type
*/
@@ -44,4 +46,4 @@ public interface KMeansInitialization<V> {
* @return List of chosen means for k-means
*/
public abstract List<V> chooseInitialMeans(Relation<V> relation, int k, PrimitiveDistanceFunction<? super V, ?> distanceFunction);
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansLloyd.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansLloyd.java
index b1b40632..f43c2277 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansLloyd.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansLloyd.java
@@ -27,19 +27,20 @@ import java.util.ArrayList;
import java.util.List;
import de.lmu.ifi.dbs.elki.algorithm.AbstractPrimitiveDistanceBasedAlgorithm;
-import de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm;
import de.lmu.ifi.dbs.elki.data.Cluster;
import de.lmu.ifi.dbs.elki.data.Clustering;
import de.lmu.ifi.dbs.elki.data.NumberVector;
-import de.lmu.ifi.dbs.elki.data.model.MeanModel;
+import de.lmu.ifi.dbs.elki.data.model.KMeansModel;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.SquaredEuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.logging.Logging;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
@@ -62,7 +63,8 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
*
* @author Arthur Zimek
*
- * @apiviz.has MeanModel
+ * @apiviz.landmark
+ * @apiviz.has KMeansModel
*
* @param <V> vector datatype
* @param <D> distance value type
@@ -70,11 +72,11 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
@Title("K-Means")
@Description("Finds a partitioning into k clusters.")
@Reference(authors = "S. Lloyd", title = "Least squares quantization in PCM", booktitle = "IEEE Transactions on Information Theory 28 (2): 129–137.", url = "http://dx.doi.org/10.1109/TIT.1982.1056489")
-public class KMeansLloyd<V extends NumberVector<V, ?>, D extends Distance<D>> extends AbstractKMeans<V, D> implements ClusteringAlgorithm<Clustering<MeanModel<V>>> {
+public class KMeansLloyd<V extends NumberVector<?>, D extends Distance<D>> extends AbstractKMeans<V, D, KMeansModel<V>> {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(KMeansLloyd.class);
+ private static final Logging LOG = Logging.getLogger(KMeansLloyd.class);
/**
* Constructor.
@@ -82,55 +84,56 @@ public class KMeansLloyd<V extends NumberVector<V, ?>, D extends Distance<D>> ex
* @param distanceFunction distance function
* @param k k parameter
* @param maxiter Maxiter parameter
+ * @param initializer Initialization method
*/
- public KMeansLloyd(PrimitiveDistanceFunction<NumberVector<?, ?>, D> distanceFunction, int k, int maxiter, KMeansInitialization<V> initializer) {
+ public KMeansLloyd(PrimitiveDistanceFunction<NumberVector<?>, D> distanceFunction, int k, int maxiter, KMeansInitialization<V> initializer) {
super(distanceFunction, k, maxiter, initializer);
}
/**
- * Run k-means
+ * Run k-means.
*
* @param database Database
* @param relation relation to use
* @return result
*/
- public Clustering<MeanModel<V>> run(Database database, Relation<V> relation) {
- if(relation.size() <= 0) {
- return new Clustering<MeanModel<V>>("k-Means Clustering", "kmeans-clustering");
+ public Clustering<KMeansModel<V>> run(Database database, Relation<V> relation) {
+ if (relation.size() <= 0) {
+ return new Clustering<KMeansModel<V>>("k-Means Clustering", "kmeans-clustering");
}
// Choose initial means
- List<? extends NumberVector<?, ?>> means = initializer.chooseInitialMeans(relation, k, getDistanceFunction());
+ List<? extends NumberVector<?>> means = initializer.chooseInitialMeans(relation, k, getDistanceFunction());
// Setup cluster assignment store
List<ModifiableDBIDs> clusters = new ArrayList<ModifiableDBIDs>();
- for(int i = 0; i < k; i++) {
+ for (int i = 0; i < k; i++) {
clusters.add(DBIDUtil.newHashSet(relation.size() / k));
}
- for(int iteration = 0; maxiter <= 0 || iteration < maxiter; iteration++) {
- if(logger.isVerbose()) {
- logger.verbose("K-Means iteration " + (iteration + 1));
+ for (int iteration = 0; maxiter <= 0 || iteration < maxiter; iteration++) {
+ if (LOG.isVerbose()) {
+ LOG.verbose("K-Means iteration " + (iteration + 1));
}
boolean changed = assignToNearestCluster(relation, means, clusters);
// Stop if no cluster assignment changed.
- if(!changed) {
+ if (!changed) {
break;
}
// Recompute means.
means = means(clusters, means, relation);
}
// Wrap result
- final V factory = DatabaseUtil.assumeVectorField(relation).getFactory();
- Clustering<MeanModel<V>> result = new Clustering<MeanModel<V>>("k-Means Clustering", "kmeans-clustering");
- for(int i = 0; i < clusters.size(); i++) {
- MeanModel<V> model = new MeanModel<V>(factory.newNumberVector(means.get(i).getColumnVector().getArrayRef()));
- result.addCluster(new Cluster<MeanModel<V>>(clusters.get(i), model));
+ final NumberVector.Factory<V, ?> factory = RelationUtil.getNumberVectorFactory(relation);
+ Clustering<KMeansModel<V>> result = new Clustering<KMeansModel<V>>("k-Means Clustering", "kmeans-clustering");
+ for (int i = 0; i < clusters.size(); i++) {
+ KMeansModel<V> model = new KMeansModel<V>(factory.newNumberVector(means.get(i).getColumnVector().getArrayRef()));
+ result.addCluster(new Cluster<KMeansModel<V>>(clusters.get(i), model));
}
return result;
}
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -140,35 +143,53 @@ public class KMeansLloyd<V extends NumberVector<V, ?>, D extends Distance<D>> ex
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<V, ?>, D extends Distance<D>> extends AbstractPrimitiveDistanceBasedAlgorithm.Parameterizer<NumberVector<?, ?>, D> {
+ public static class Parameterizer<V extends NumberVector<?>, D extends Distance<D>> extends AbstractPrimitiveDistanceBasedAlgorithm.Parameterizer<NumberVector<?>, D> {
+ /**
+ * k Parameter.
+ */
protected int k;
+ /**
+ * Number of iterations.
+ */
protected int maxiter;
+ /**
+ * Initialization method.
+ */
protected KMeansInitialization<V> initializer;
@Override
protected void makeOptions(Parameterization config) {
- super.makeOptions(config);
- IntParameter kP = new IntParameter(K_ID, new GreaterConstraint(0));
- if(config.grab(kP)) {
+ ObjectParameter<PrimitiveDistanceFunction<NumberVector<?>, D>> distanceFunctionP = makeParameterDistanceFunction(SquaredEuclideanDistanceFunction.class, PrimitiveDistanceFunction.class);
+ if(config.grab(distanceFunctionP)) {
+ distanceFunction = distanceFunctionP.instantiateClass(config);
+ if (!(distanceFunction instanceof EuclideanDistanceFunction) && !(distanceFunction instanceof SquaredEuclideanDistanceFunction)) {
+ LOG.warning("k-means optimizes the sum of squares - it should be used with squared euclidean distance and may stop converging otherwise!");
+ }
+ }
+
+ IntParameter kP = new IntParameter(K_ID);
+ kP.addConstraint(new GreaterConstraint(0));
+ if (config.grab(kP)) {
k = kP.getValue();
}
ObjectParameter<KMeansInitialization<V>> initialP = new ObjectParameter<KMeansInitialization<V>>(INIT_ID, KMeansInitialization.class, RandomlyGeneratedInitialMeans.class);
- if(config.grab(initialP)) {
+ if (config.grab(initialP)) {
initializer = initialP.instantiateClass(config);
}
- IntParameter maxiterP = new IntParameter(MAXITER_ID, new GreaterEqualConstraint(0), 0);
- if(config.grab(maxiterP)) {
- maxiter = maxiterP.getValue();
+ IntParameter maxiterP = new IntParameter(MAXITER_ID, 0);
+ maxiterP.addConstraint(new GreaterEqualConstraint(0));
+ if (config.grab(maxiterP)) {
+ maxiter = maxiterP.intValue();
}
}
@Override
- protected AbstractKMeans<V, D> makeInstance() {
+ protected KMeansLloyd<V, D> makeInstance() {
return new KMeansLloyd<V, D>(distanceFunction, k, maxiter, initializer);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansMacQueen.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansMacQueen.java
index c729eb10..0cc7c363 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansMacQueen.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansMacQueen.java
@@ -27,21 +27,22 @@ import java.util.ArrayList;
import java.util.List;
import de.lmu.ifi.dbs.elki.algorithm.AbstractPrimitiveDistanceBasedAlgorithm;
-import de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm;
import de.lmu.ifi.dbs.elki.data.Cluster;
import de.lmu.ifi.dbs.elki.data.Clustering;
import de.lmu.ifi.dbs.elki.data.NumberVector;
-import de.lmu.ifi.dbs.elki.data.model.MeanModel;
+import de.lmu.ifi.dbs.elki.data.model.KMeansModel;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.SquaredEuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
@@ -62,8 +63,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
* </p>
*
* @author Erich Schubert
- *
- * @apiviz.has MeanModel
+ * @apiviz.has KMeansModel
*
* @param <V> vector type to use
* @param <D> distance function value type
@@ -71,11 +71,11 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
@Title("K-Means")
@Description("Finds a partitioning into k clusters.")
@Reference(authors = "J. MacQueen", title = "Some Methods for Classification and Analysis of Multivariate Observations", booktitle = "5th Berkeley Symp. Math. Statist. Prob., Vol. 1, 1967, pp 281-297", url = "http://projecteuclid.org/euclid.bsmsp/1200512992")
-public class KMeansMacQueen<V extends NumberVector<V, ?>, D extends Distance<D>> extends AbstractKMeans<V, D> implements ClusteringAlgorithm<Clustering<MeanModel<V>>> {
+public class KMeansMacQueen<V extends NumberVector<?>, D extends Distance<D>> extends AbstractKMeans<V, D, KMeansModel<V>> {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(KMeansMacQueen.class);
+ private static final Logging LOG = Logging.getLogger(KMeansMacQueen.class);
/**
* Constructor.
@@ -83,30 +83,31 @@ public class KMeansMacQueen<V extends NumberVector<V, ?>, D extends Distance<D>>
* @param distanceFunction distance function
* @param k k parameter
* @param maxiter Maxiter parameter
+ * @param initializer Initialization method
*/
- public KMeansMacQueen(PrimitiveDistanceFunction<NumberVector<?, ?>, D> distanceFunction, int k, int maxiter, KMeansInitialization<V> initializer) {
+ public KMeansMacQueen(PrimitiveDistanceFunction<NumberVector<?>, D> distanceFunction, int k, int maxiter, KMeansInitialization<V> initializer) {
super(distanceFunction, k, maxiter, initializer);
}
/**
- * Run k-means
+ * Run k-means.
*
* @param database Database
* @param relation relation to use
* @return Clustering result
*/
- public Clustering<MeanModel<V>> run(Database database, Relation<V> relation) {
- if(relation.size() <= 0) {
- return new Clustering<MeanModel<V>>("k-Means Clustering", "kmeans-clustering");
+ public Clustering<KMeansModel<V>> run(Database database, Relation<V> relation) {
+ if (relation.size() <= 0) {
+ return new Clustering<KMeansModel<V>>("k-Means Clustering", "kmeans-clustering");
}
// Choose initial means
List<Vector> means = new ArrayList<Vector>(k);
- for(NumberVector<?, ?> nv : initializer.chooseInitialMeans(relation, k, getDistanceFunction())) {
+ for (NumberVector<?> nv : initializer.chooseInitialMeans(relation, k, getDistanceFunction())) {
means.add(nv.getColumnVector());
}
// Initialize cluster and assign objects
List<ModifiableDBIDs> clusters = new ArrayList<ModifiableDBIDs>();
- for(int i = 0; i < k; i++) {
+ for (int i = 0; i < k; i++) {
clusters.add(DBIDUtil.newHashSet(relation.size() / k));
}
assignToNearestCluster(relation, means, clusters);
@@ -114,28 +115,28 @@ public class KMeansMacQueen<V extends NumberVector<V, ?>, D extends Distance<D>>
means = means(clusters, means, relation);
// Refine result
- for(int iteration = 0; maxiter <= 0 || iteration < maxiter; iteration++) {
- if(logger.isVerbose()) {
- logger.verbose("K-Means iteration " + (iteration + 1));
+ for (int iteration = 0; maxiter <= 0 || iteration < maxiter; iteration++) {
+ if (LOG.isVerbose()) {
+ LOG.verbose("K-Means iteration " + (iteration + 1));
}
boolean changed = macQueenIterate(relation, means, clusters);
- if(!changed) {
+ if (!changed) {
break;
}
}
- final V factory = DatabaseUtil.assumeVectorField(relation).getFactory();
- Clustering<MeanModel<V>> result = new Clustering<MeanModel<V>>("k-Means Clustering", "kmeans-clustering");
- for(int i = 0; i < clusters.size(); i++) {
+ final NumberVector.Factory<V, ?> factory = RelationUtil.getNumberVectorFactory(relation);
+ Clustering<KMeansModel<V>> result = new Clustering<KMeansModel<V>>("k-Means Clustering", "kmeans-clustering");
+ for (int i = 0; i < clusters.size(); i++) {
DBIDs ids = clusters.get(i);
- MeanModel<V> model = new MeanModel<V>(factory.newNumberVector(means.get(i).getArrayRef()));
- result.addCluster(new Cluster<MeanModel<V>>(ids, model));
+ KMeansModel<V> model = new KMeansModel<V>(factory.newNumberVector(means.get(i).getArrayRef()));
+ result.addCluster(new Cluster<KMeansModel<V>>(ids, model));
}
return result;
}
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -145,35 +146,53 @@ public class KMeansMacQueen<V extends NumberVector<V, ?>, D extends Distance<D>>
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<V, ?>, D extends Distance<D>> extends AbstractPrimitiveDistanceBasedAlgorithm.Parameterizer<NumberVector<?, ?>, D> {
+ public static class Parameterizer<V extends NumberVector<?>, D extends Distance<D>> extends AbstractPrimitiveDistanceBasedAlgorithm.Parameterizer<NumberVector<?>, D> {
+ /**
+ * k Parameter.
+ */
protected int k;
+ /**
+ * Maximum number of iterations.
+ */
protected int maxiter;
+ /**
+ * Initialization method.
+ */
protected KMeansInitialization<V> initializer;
@Override
protected void makeOptions(Parameterization config) {
- super.makeOptions(config);
- IntParameter kP = new IntParameter(K_ID, new GreaterConstraint(0));
- if(config.grab(kP)) {
+ ObjectParameter<PrimitiveDistanceFunction<NumberVector<?>, D>> distanceFunctionP = makeParameterDistanceFunction(SquaredEuclideanDistanceFunction.class, PrimitiveDistanceFunction.class);
+ if (config.grab(distanceFunctionP)) {
+ distanceFunction = distanceFunctionP.instantiateClass(config);
+ if (!(distanceFunction instanceof EuclideanDistanceFunction) && !(distanceFunction instanceof SquaredEuclideanDistanceFunction)) {
+ LOG.warning("k-means optimizes the sum of squares - it should be used with squared euclidean distance and may stop converging otherwise!");
+ }
+ }
+
+ IntParameter kP = new IntParameter(K_ID);
+ kP.addConstraint(new GreaterConstraint(0));
+ if (config.grab(kP)) {
k = kP.getValue();
}
ObjectParameter<KMeansInitialization<V>> initialP = new ObjectParameter<KMeansInitialization<V>>(INIT_ID, KMeansInitialization.class, RandomlyGeneratedInitialMeans.class);
- if(config.grab(initialP)) {
+ if (config.grab(initialP)) {
initializer = initialP.instantiateClass(config);
}
- IntParameter maxiterP = new IntParameter(MAXITER_ID, new GreaterEqualConstraint(0), 0);
- if(config.grab(maxiterP)) {
+ IntParameter maxiterP = new IntParameter(MAXITER_ID, 0);
+ maxiterP.addConstraint(new GreaterEqualConstraint(0));
+ if (config.grab(maxiterP)) {
maxiter = maxiterP.getValue();
}
}
@Override
- protected AbstractKMeans<V, D> makeInstance() {
+ protected KMeansMacQueen<V, D> makeInstance() {
return new KMeansMacQueen<V, D>(distanceFunction, k, maxiter, initializer);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansPlusPlusInitialMeans.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansPlusPlusInitialMeans.java
index 9afeff6c..a07953da 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansPlusPlusInitialMeans.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansPlusPlusInitialMeans.java
@@ -38,6 +38,7 @@ import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.LoggingUtil;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
@@ -62,10 +63,10 @@ public class KMeansPlusPlusInitialMeans<V, D extends NumberDistance<D, ?>> exten
/**
* Constructor.
*
- * @param seed Random seed.
+ * @param rnd Random generator.
*/
- public KMeansPlusPlusInitialMeans(Long seed) {
- super(seed);
+ public KMeansPlusPlusInitialMeans(RandomFactory rnd) {
+ super(rnd);
}
@Override
@@ -81,8 +82,8 @@ public class KMeansPlusPlusInitialMeans<V, D extends NumberDistance<D, ?>> exten
// Chose first mean
List<V> means = new ArrayList<V>(k);
- Random random = (seed != null) ? new Random(seed) : new Random();
- DBID first = DBIDUtil.randomSample(relation.getDBIDs(), 1, random.nextLong()).iter().getDBID();
+ Random random = rnd.getRandom();
+ DBID first = DBIDUtil.deref(DBIDUtil.randomSample(relation.getDBIDs(), 1, new Random(random.nextLong())).iter());
means.add(relation.get(first));
ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
@@ -131,8 +132,8 @@ public class KMeansPlusPlusInitialMeans<V, D extends NumberDistance<D, ?>> exten
// Chose first mean
ArrayModifiableDBIDs means = DBIDUtil.newArray(k);
- Random random = (seed != null) ? new Random(seed) : new Random();
- DBID first = DBIDUtil.randomSample(distQ.getRelation().getDBIDs(), 1, random.nextLong()).iter().getDBID();
+ Random random = rnd.getRandom();
+ DBID first = DBIDUtil.deref(DBIDUtil.randomSample(distQ.getRelation().getDBIDs(), 1, new Random(random.nextLong())).iter());
means.add(first);
ArrayDBIDs ids = DBIDUtil.ensureArray(distQ.getRelation().getDBIDs());
@@ -176,7 +177,7 @@ public class KMeansPlusPlusInitialMeans<V, D extends NumberDistance<D, ?>> exten
double weightsum = 0.0;
DBIDIter it = ids.iter();
for(int i = 0; i < weights.length; i++, it.advance()) {
- if(latest.sameDBID(it)) {
+ if(DBIDUtil.equal(latest, it)) {
weights[i] = 0.0;
}
else {
@@ -243,7 +244,7 @@ public class KMeansPlusPlusInitialMeans<V, D extends NumberDistance<D, ?>> exten
public static class Parameterizer<V, D extends NumberDistance<D, ?>> extends AbstractKMeansInitialization.Parameterizer<V> {
@Override
protected KMeansPlusPlusInitialMeans<V, D> makeInstance() {
- return new KMeansPlusPlusInitialMeans<V, D>(seed);
+ return new KMeansPlusPlusInitialMeans<V, D>(rnd);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMediansLloyd.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMediansLloyd.java
index 8c284981..9917337e 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMediansLloyd.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMediansLloyd.java
@@ -27,7 +27,6 @@ import java.util.ArrayList;
import java.util.List;
import de.lmu.ifi.dbs.elki.algorithm.AbstractPrimitiveDistanceBasedAlgorithm;
-import de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm;
import de.lmu.ifi.dbs.elki.data.Cluster;
import de.lmu.ifi.dbs.elki.data.Clustering;
import de.lmu.ifi.dbs.elki.data.NumberVector;
@@ -36,10 +35,10 @@ import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.logging.Logging;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
@@ -61,18 +60,16 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
*
* @author Erich Schubert
*
- * @apiviz.has MeanModel
- *
* @param <V> vector datatype
* @param <D> distance value type
*/
@Title("K-Medians")
-@Reference(title = "Clustering via Concave Minimization", authors = "P. S. Bradley, O. L. Mangasarian, W. N. Street", booktitle = "Advances in neural information processing systems", url="http://nips.djvuzone.org/djvu/nips09/0368.djvu")
-public class KMediansLloyd<V extends NumberVector<V, ?>, D extends Distance<D>> extends AbstractKMeans<V, D> implements ClusteringAlgorithm<Clustering<MeanModel<V>>> {
+@Reference(title = "Clustering via Concave Minimization", authors = "P. S. Bradley, O. L. Mangasarian, W. N. Street", booktitle = "Advances in neural information processing systems", url = "http://nips.djvuzone.org/djvu/nips09/0368.djvu")
+public class KMediansLloyd<V extends NumberVector<?>, D extends Distance<D>> extends AbstractKMeans<V, D, MeanModel<V>> {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(KMediansLloyd.class);
+ private static final Logging LOG = Logging.getLogger(KMediansLloyd.class);
/**
* Constructor.
@@ -80,46 +77,47 @@ public class KMediansLloyd<V extends NumberVector<V, ?>, D extends Distance<D>>
* @param distanceFunction distance function
* @param k k parameter
* @param maxiter Maxiter parameter
+ * @param initializer Initialization method
*/
- public KMediansLloyd(PrimitiveDistanceFunction<NumberVector<?, ?>, D> distanceFunction, int k, int maxiter, KMeansInitialization<V> initializer) {
+ public KMediansLloyd(PrimitiveDistanceFunction<NumberVector<?>, D> distanceFunction, int k, int maxiter, KMeansInitialization<V> initializer) {
super(distanceFunction, k, maxiter, initializer);
}
/**
- * Run k-medians
+ * Run k-medians.
*
* @param database Database
* @param relation relation to use
* @return result
*/
public Clustering<MeanModel<V>> run(Database database, Relation<V> relation) {
- if(relation.size() <= 0) {
+ if (relation.size() <= 0) {
return new Clustering<MeanModel<V>>("k-Medians Clustering", "kmedians-clustering");
}
// Choose initial medians
- List<? extends NumberVector<?, ?>> medians = initializer.chooseInitialMeans(relation, k, getDistanceFunction());
+ List<? extends NumberVector<?>> medians = initializer.chooseInitialMeans(relation, k, getDistanceFunction());
// Setup cluster assignment store
List<ModifiableDBIDs> clusters = new ArrayList<ModifiableDBIDs>();
- for(int i = 0; i < k; i++) {
+ for (int i = 0; i < k; i++) {
clusters.add(DBIDUtil.newHashSet(relation.size() / k));
}
- for(int iteration = 0; maxiter <= 0 || iteration < maxiter; iteration++) {
- if(logger.isVerbose()) {
- logger.verbose("K-Medians iteration " + (iteration + 1));
+ for (int iteration = 0; maxiter <= 0 || iteration < maxiter; iteration++) {
+ if (LOG.isVerbose()) {
+ LOG.verbose("K-Medians iteration " + (iteration + 1));
}
boolean changed = assignToNearestCluster(relation, medians, clusters);
// Stop if no cluster assignment changed.
- if(!changed) {
+ if (!changed) {
break;
}
// Recompute medians.
medians = medians(clusters, medians, relation);
}
// Wrap result
- final V factory = DatabaseUtil.assumeVectorField(relation).getFactory();
+ final NumberVector.Factory<V, ?> factory = RelationUtil.getNumberVectorFactory(relation);
Clustering<MeanModel<V>> result = new Clustering<MeanModel<V>>("k-Medians Clustering", "kmedians-clustering");
- for(int i = 0; i < clusters.size(); i++) {
+ for (int i = 0; i < clusters.size(); i++) {
MeanModel<V> model = new MeanModel<V>(factory.newNumberVector(medians.get(i).getColumnVector().getArrayRef()));
result.addCluster(new Cluster<MeanModel<V>>(clusters.get(i), model));
}
@@ -128,7 +126,7 @@ public class KMediansLloyd<V extends NumberVector<V, ?>, D extends Distance<D>>
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -138,35 +136,46 @@ public class KMediansLloyd<V extends NumberVector<V, ?>, D extends Distance<D>>
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<V, ?>, D extends Distance<D>> extends AbstractPrimitiveDistanceBasedAlgorithm.Parameterizer<NumberVector<?, ?>, D> {
+ public static class Parameterizer<V extends NumberVector<?>, D extends Distance<D>> extends AbstractPrimitiveDistanceBasedAlgorithm.Parameterizer<NumberVector<?>, D> {
+ /**
+ * k Parameter.
+ */
protected int k;
+ /**
+ * Maximum number of iterations.
+ */
protected int maxiter;
+ /**
+ * Initialization method.
+ */
protected KMeansInitialization<V> initializer;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- IntParameter kP = new IntParameter(K_ID, new GreaterConstraint(0));
- if(config.grab(kP)) {
- k = kP.getValue();
+ IntParameter kP = new IntParameter(K_ID);
+ kP.addConstraint(new GreaterConstraint(0));
+ if (config.grab(kP)) {
+ k = kP.intValue();
}
ObjectParameter<KMeansInitialization<V>> initialP = new ObjectParameter<KMeansInitialization<V>>(INIT_ID, KMeansInitialization.class, RandomlyGeneratedInitialMeans.class);
- if(config.grab(initialP)) {
+ if (config.grab(initialP)) {
initializer = initialP.instantiateClass(config);
}
- IntParameter maxiterP = new IntParameter(MAXITER_ID, new GreaterEqualConstraint(0), 0);
- if(config.grab(maxiterP)) {
- maxiter = maxiterP.getValue();
+ IntParameter maxiterP = new IntParameter(MAXITER_ID, 0);
+ maxiterP.addConstraint(new GreaterEqualConstraint(0));
+ if (config.grab(maxiterP)) {
+ maxiter = maxiterP.intValue();
}
}
@Override
- protected AbstractKMeans<V, D> makeInstance() {
+ protected KMediansLloyd<V, D> makeInstance() {
return new KMediansLloyd<V, D>(distanceFunction, k, maxiter, initializer);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsEM.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsEM.java
index a5c3d675..f4398458 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsEM.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsEM.java
@@ -78,7 +78,7 @@ public class KMedoidsEM<V, D extends NumberDistance<D, ?>> extends AbstractDista
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(KMedoidsEM.class);
+ private static final Logging LOG = Logging.getLogger(KMedoidsEM.class);
/**
* Holds the value of {@link AbstractKMeans#K_ID}.
@@ -118,7 +118,7 @@ public class KMedoidsEM<V, D extends NumberDistance<D, ?>> extends AbstractDista
* @return result
*/
public Clustering<MedoidModel> run(Database database, Relation<V> relation) {
- if(relation.size() <= 0) {
+ if (relation.size() <= 0) {
return new Clustering<MedoidModel>("k-Medoids Clustering", "kmedoids-clustering");
}
DistanceQuery<V, D> distQ = database.getDistanceQuery(relation, getDistanceFunction());
@@ -126,7 +126,7 @@ public class KMedoidsEM<V, D extends NumberDistance<D, ?>> extends AbstractDista
ArrayModifiableDBIDs medoids = DBIDUtil.newArray(initializer.chooseInitialMedoids(k, distQ));
// Setup cluster assignment store
List<ModifiableDBIDs> clusters = new ArrayList<ModifiableDBIDs>();
- for(int i = 0; i < k; i++) {
+ for (int i = 0; i < k; i++) {
clusters.add(DBIDUtil.newHashSet(relation.size() / k));
}
Mean[] mdists = Mean.newArray(k);
@@ -137,41 +137,41 @@ public class KMedoidsEM<V, D extends NumberDistance<D, ?>> extends AbstractDista
// Swap phase
boolean changed = true;
- while(changed) {
+ while (changed) {
changed = false;
// Try to swap the medoid with a better cluster member:
- for(int i = 0; i < k; i++) {
- DBID med = medoids.get(i);
+ int i = 0;
+ for (DBIDIter miter = medoids.iter(); miter.valid(); miter.advance(), i++) {
DBID best = null;
Mean bestm = mdists[i];
- for(DBIDIter iter = clusters.get(i).iter(); iter.valid(); iter.advance()) {
- if(med.sameDBID(iter)) {
+ for (DBIDIter iter = clusters.get(i).iter(); iter.valid(); iter.advance()) {
+ if (DBIDUtil.equal(miter, iter)) {
continue;
}
Mean mdist = new Mean();
- for(DBIDIter iter2 = clusters.get(i).iter(); iter2.valid(); iter2.advance()) {
+ for (DBIDIter iter2 = clusters.get(i).iter(); iter2.valid(); iter2.advance()) {
mdist.put(distQ.distance(iter, iter2).doubleValue());
}
- if(mdist.getMean() < bestm.getMean()) {
- best = iter.getDBID();
+ if (mdist.getMean() < bestm.getMean()) {
+ best = DBIDUtil.deref(iter);
bestm = mdist;
}
}
- if(best != null && !med.sameDBID(best)) {
+ if (best != null && !DBIDUtil.equal(miter, best)) {
changed = true;
medoids.set(i, best);
mdists[i] = bestm;
}
}
// Reassign
- if(changed) {
+ if (changed) {
assignToNearestCluster(medoids, mdists, clusters, distQ);
}
}
// Wrap result
Clustering<MedoidModel> result = new Clustering<MedoidModel>("k-Medoids Clustering", "kmedoids-clustering");
- for(int i = 0; i < clusters.size(); i++) {
+ for (int i = 0; i < clusters.size(); i++) {
MedoidModel model = new MedoidModel(medoids.get(i));
result.addCluster(new Cluster<MedoidModel>(clusters.get(i), model));
}
@@ -192,24 +192,27 @@ public class KMedoidsEM<V, D extends NumberDistance<D, ?>> extends AbstractDista
boolean changed = false;
double[] dists = new double[k];
- for(DBIDIter iditer = distQ.getRelation().iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for (DBIDIter iditer = distQ.getRelation().iterDBIDs(); iditer.valid(); iditer.advance()) {
int minIndex = 0;
double mindist = Double.POSITIVE_INFINITY;
- for(int i = 0; i < k; i++) {
- dists[i] = distQ.distance(iditer, means.get(i)).doubleValue();
- if(dists[i] < mindist) {
- minIndex = i;
- mindist = dists[i];
+ {
+ int i = 0;
+ for (DBIDIter miter = means.iter(); miter.valid(); miter.advance(), i++) {
+ dists[i] = distQ.distance(iditer, miter).doubleValue();
+ if (dists[i] < mindist) {
+ minIndex = i;
+ mindist = dists[i];
+ }
}
}
- if(clusters.get(minIndex).add(iditer)) {
+ if (clusters.get(minIndex).add(iditer)) {
changed = true;
mdist[minIndex].put(mindist);
// Remove from previous cluster
// TODO: keep a list of cluster assignments to save this search?
- for(int i = 0; i < k; i++) {
- if(i != minIndex) {
- if(clusters.get(i).remove(iditer)) {
+ for (int i = 0; i < k; i++) {
+ if (i != minIndex) {
+ if (clusters.get(i).remove(iditer)) {
mdist[minIndex].put(dists[i], -1);
break;
}
@@ -227,7 +230,7 @@ public class KMedoidsEM<V, D extends NumberDistance<D, ?>> extends AbstractDista
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -247,19 +250,21 @@ public class KMedoidsEM<V, D extends NumberDistance<D, ?>> extends AbstractDista
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- IntParameter kP = new IntParameter(KMeans.K_ID, new GreaterConstraint(0));
- if(config.grab(kP)) {
- k = kP.getValue();
+ IntParameter kP = new IntParameter(KMeans.K_ID);
+ kP.addConstraint(new GreaterConstraint(0));
+ if (config.grab(kP)) {
+ k = kP.intValue();
}
ObjectParameter<KMedoidsInitialization<V>> initialP = new ObjectParameter<KMedoidsInitialization<V>>(KMeans.INIT_ID, KMedoidsInitialization.class, PAMInitialMeans.class);
- if(config.grab(initialP)) {
+ if (config.grab(initialP)) {
initializer = initialP.instantiateClass(config);
}
- IntParameter maxiterP = new IntParameter(KMeans.MAXITER_ID, new GreaterEqualConstraint(0), 0);
- if(config.grab(maxiterP)) {
- maxiter = maxiterP.getValue();
+ IntParameter maxiterP = new IntParameter(KMeans.MAXITER_ID, 0);
+ maxiterP.addConstraint(new GreaterEqualConstraint(0));
+ if (config.grab(maxiterP)) {
+ maxiter = maxiterP.intValue();
}
}
@@ -268,4 +273,4 @@ public class KMedoidsEM<V, D extends NumberDistance<D, ?>> extends AbstractDista
return new KMedoidsEM<V, D>(distanceFunction, k, maxiter, initializer);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsPAM.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsPAM.java
index 30c80084..906501e4 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsPAM.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsPAM.java
@@ -83,7 +83,7 @@ public class KMedoidsPAM<V, D extends NumberDistance<D, ?>> extends AbstractDist
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(KMedoidsPAM.class);
+ private static final Logging LOG = Logging.getLogger(KMedoidsPAM.class);
/**
* Holds the value of {@link AbstractKMeans#K_ID}.
@@ -123,7 +123,7 @@ public class KMedoidsPAM<V, D extends NumberDistance<D, ?>> extends AbstractDist
* @return result
*/
public Clustering<MedoidModel> run(Database database, Relation<V> relation) {
- if(relation.size() <= 0) {
+ if (relation.size() <= 0) {
return new Clustering<MedoidModel>("k-Medoids Clustering", "kmedoids-clustering");
}
DistanceQuery<V, D> distQ = database.getDistanceQuery(relation, getDistanceFunction());
@@ -132,7 +132,7 @@ public class KMedoidsPAM<V, D extends NumberDistance<D, ?>> extends AbstractDist
ArrayModifiableDBIDs medoids = DBIDUtil.newArray(initializer.chooseInitialMedoids(k, distQ));
// Setup cluster assignment store
List<ModifiableDBIDs> clusters = new ArrayList<ModifiableDBIDs>();
- for(int i = 0; i < k; i++) {
+ for (int i = 0; i < k; i++) {
clusters.add(DBIDUtil.newHashSet(relation.size() / k));
}
@@ -143,36 +143,35 @@ public class KMedoidsPAM<V, D extends NumberDistance<D, ?>> extends AbstractDist
// Swap phase
boolean changed = true;
- while(changed) {
+ while (changed) {
changed = false;
// Try to swap the medoid with a better cluster member:
double best = 0;
DBID bestid = null;
int bestcluster = -1;
- for(int i = 0; i < k; i++) {
- DBID med = medoids.get(i);
- for(DBIDIter iter = clusters.get(i).iter(); iter.valid(); iter.advance()) {
- if(med.sameDBID(iter)) {
+ int i = 0;
+ for (DBIDIter miter = medoids.iter(); miter.valid(); miter.advance(), i++) {
+ for (DBIDIter iter = clusters.get(i).iter(); iter.valid(); iter.advance()) {
+ if (DBIDUtil.equal(miter, iter)) {
continue;
}
// double disti = distQ.distance(id, med).doubleValue();
double cost = 0;
- for(int j = 0; j < k; j++) {
- for(DBIDIter iter2 = clusters.get(j).iter(); iter2.valid(); iter2.advance()) {
- double distcur = distQ.distance(iter2, medoids.get(j)).doubleValue();
+ DBIDIter olditer = medoids.iter();
+ for (int j = 0; j < k; j++, olditer.advance()) {
+ for (DBIDIter iter2 = clusters.get(j).iter(); iter2.valid(); iter2.advance()) {
+ double distcur = distQ.distance(iter2, olditer).doubleValue();
double distnew = distQ.distance(iter2, iter).doubleValue();
- if(j == i) {
+ if (j == i) {
// Cases 1 and 2.
double distsec = second.doubleValue(iter2);
- if(distcur > distsec) {
+ if (distcur > distsec) {
// Case 1, other would switch to a third medoid
cost += distsec - distcur; // Always positive!
- }
- else { // Would remain with the candidate
+ } else { // Would remain with the candidate
cost += distnew - distcur; // Could be negative
}
- }
- else {
+ } else {
// Cases 3-4: objects from other clusters
if (distcur < distnew) {
// Case 3: no change
@@ -185,20 +184,20 @@ public class KMedoidsPAM<V, D extends NumberDistance<D, ?>> extends AbstractDist
}
if (cost < best) {
best = cost;
- bestid = iter.getDBID();
+ bestid = DBIDUtil.deref(iter);
bestcluster = i;
}
}
}
- if(logger.isDebugging()) {
- logger.debug("Best cost: " + best);
+ if (LOG.isDebugging()) {
+ LOG.debug("Best cost: " + best);
}
- if(bestid != null) {
+ if (bestid != null) {
changed = true;
medoids.set(bestcluster, bestid);
}
// Reassign
- if(changed) {
+ if (changed) {
// TODO: can we save some of these recomputations?
assignToNearestCluster(medoids, ids, second, clusters, distQ);
}
@@ -206,7 +205,7 @@ public class KMedoidsPAM<V, D extends NumberDistance<D, ?>> extends AbstractDist
// Wrap result
Clustering<MedoidModel> result = new Clustering<MedoidModel>("k-Medoids Clustering", "kmedoids-clustering");
- for(int i = 0; i < clusters.size(); i++) {
+ for (int i = 0; i < clusters.size(); i++) {
MedoidModel model = new MedoidModel(medoids.get(i));
result.addCluster(new Cluster<MedoidModel>(clusters.get(i), model));
}
@@ -227,28 +226,30 @@ public class KMedoidsPAM<V, D extends NumberDistance<D, ?>> extends AbstractDist
protected boolean assignToNearestCluster(ArrayDBIDs means, DBIDs ids, WritableDoubleDataStore second, List<? extends ModifiableDBIDs> clusters, DistanceQuery<V, D> distQ) {
boolean changed = false;
- for(DBIDIter iditer = distQ.getRelation().iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for (DBIDIter iditer = distQ.getRelation().iterDBIDs(); iditer.valid(); iditer.advance()) {
int minIndex = 0;
double mindist = Double.POSITIVE_INFINITY;
double mindist2 = Double.POSITIVE_INFINITY;
- for(int i = 0; i < k; i++) {
- double dist = distQ.distance(iditer, means.get(i)).doubleValue();
- if(dist < mindist) {
- minIndex = i;
- mindist2 = mindist;
- mindist = dist;
- }
- else if(dist < mindist2) {
- mindist2 = dist;
+ {
+ int i = 0;
+ for (DBIDIter miter = means.iter(); miter.valid(); miter.advance(), i++) {
+ double dist = distQ.distance(iditer, miter).doubleValue();
+ if (dist < mindist) {
+ minIndex = i;
+ mindist2 = mindist;
+ mindist = dist;
+ } else if (dist < mindist2) {
+ mindist2 = dist;
+ }
}
}
- if(clusters.get(minIndex).add(iditer)) {
+ if (clusters.get(minIndex).add(iditer)) {
changed = true;
// Remove from previous cluster
// TODO: keep a list of cluster assignments to save this search?
- for(int i = 0; i < k; i++) {
- if(i != minIndex) {
- if(clusters.get(i).remove(iditer)) {
+ for (int i = 0; i < k; i++) {
+ if (i != minIndex) {
+ if (clusters.get(i).remove(iditer)) {
break;
}
}
@@ -266,7 +267,7 @@ public class KMedoidsPAM<V, D extends NumberDistance<D, ?>> extends AbstractDist
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -286,19 +287,21 @@ public class KMedoidsPAM<V, D extends NumberDistance<D, ?>> extends AbstractDist
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- IntParameter kP = new IntParameter(KMeans.K_ID, new GreaterConstraint(0));
- if(config.grab(kP)) {
- k = kP.getValue();
+ IntParameter kP = new IntParameter(KMeans.K_ID);
+ kP.addConstraint(new GreaterConstraint(0));
+ if (config.grab(kP)) {
+ k = kP.intValue();
}
ObjectParameter<KMedoidsInitialization<V>> initialP = new ObjectParameter<KMedoidsInitialization<V>>(KMeans.INIT_ID, KMedoidsInitialization.class, PAMInitialMeans.class);
- if(config.grab(initialP)) {
+ if (config.grab(initialP)) {
initializer = initialP.instantiateClass(config);
}
- IntParameter maxiterP = new IntParameter(KMeans.MAXITER_ID, new GreaterEqualConstraint(0), 0);
- if(config.grab(maxiterP)) {
- maxiter = maxiterP.getValue();
+ IntParameter maxiterP = new IntParameter(KMeans.MAXITER_ID, 0);
+ maxiterP.addConstraint(new GreaterEqualConstraint(0));
+ if (config.grab(maxiterP)) {
+ maxiter = maxiterP.intValue();
}
}
@@ -307,4 +310,4 @@ public class KMedoidsPAM<V, D extends NumberDistance<D, ?>> extends AbstractDist
return new KMedoidsPAM<V, D>(distanceFunction, k, maxiter, initializer);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/PAMInitialMeans.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/PAMInitialMeans.java
index 094c37bb..1fc7160e 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/PAMInitialMeans.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/PAMInitialMeans.java
@@ -113,7 +113,7 @@ public class PAMInitialMeans<V, D extends NumberDistance<D, ?>> implements KMean
}
if(mean.getMean() < best) {
best = mean.getMean();
- bestid = iter.getDBID();
+ bestid = DBIDUtil.deref(iter);
if(bestd != null) {
bestd.destroy();
}
@@ -133,23 +133,21 @@ public class PAMInitialMeans<V, D extends NumberDistance<D, ?>> implements KMean
DBID bestid = null;
WritableDoubleDataStore bestd = null;
for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
- DBID id = iter.getDBID();
- if(medids.contains(id)) {
+ if(medids.contains(iter)) {
continue;
}
WritableDoubleDataStore newd = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
mean.reset();
for(DBIDIter iter2 = ids.iter(); iter2.valid(); iter2.advance()) {
- DBID other = iter2.getDBID();
- double dn = distQ.distance(id, other).doubleValue();
- double v = Math.min(dn, mindist.doubleValue(other));
+ double dn = distQ.distance(iter, iter2).doubleValue();
+ double v = Math.min(dn, mindist.doubleValue(iter2));
mean.put(v);
- newd.put(other, v);
+ newd.put(iter2, v);
}
assert (mean.getCount() == ids.size());
if(mean.getMean() < best) {
best = mean.getMean();
- bestid = id;
+ bestid = DBIDUtil.deref(iter);
if(bestd != null) {
bestd.destroy();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyChosenInitialMeans.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyChosenInitialMeans.java
index 5b9da923..78e59be7 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyChosenInitialMeans.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyChosenInitialMeans.java
@@ -31,6 +31,7 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
/**
* Initialize K-means by randomly choosing k exsiting elements as cluster
@@ -44,15 +45,15 @@ public class RandomlyChosenInitialMeans<V> extends AbstractKMeansInitialization<
/**
* Constructor.
*
- * @param seed Random seed.
+ * @param rnd Random generator.
*/
- public RandomlyChosenInitialMeans(Long seed) {
- super(seed);
+ public RandomlyChosenInitialMeans(RandomFactory rnd) {
+ super(rnd);
}
@Override
public List<V> chooseInitialMeans(Relation<V> relation, int k, PrimitiveDistanceFunction<? super V, ?> distanceFunction) {
- DBIDs ids = DBIDUtil.randomSample(relation.getDBIDs(), k, seed);
+ DBIDs ids = DBIDUtil.randomSample(relation.getDBIDs(), k, rnd);
List<V> means = new ArrayList<V>(k);
for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
means.add(relation.get(iter));
@@ -62,7 +63,7 @@ public class RandomlyChosenInitialMeans<V> extends AbstractKMeansInitialization<
@Override
public DBIDs chooseInitialMedoids(int k, DistanceQuery<? super V, ?> distanceFunction) {
- return DBIDUtil.randomSample(distanceFunction.getRelation().getDBIDs(), k, seed);
+ return DBIDUtil.randomSample(distanceFunction.getRelation().getDBIDs(), k, rnd);
}
/**
@@ -76,7 +77,7 @@ public class RandomlyChosenInitialMeans<V> extends AbstractKMeansInitialization<
@Override
protected RandomlyChosenInitialMeans<V> makeInstance() {
- return new RandomlyChosenInitialMeans<V>(seed);
+ return new RandomlyChosenInitialMeans<V>(rnd);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyGeneratedInitialMeans.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyGeneratedInitialMeans.java
index 00ed08c4..300f5cb0 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyGeneratedInitialMeans.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyGeneratedInitialMeans.java
@@ -28,9 +28,11 @@ import java.util.Random;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
import de.lmu.ifi.dbs.elki.math.MathUtil;
import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
/**
@@ -41,29 +43,30 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
*
* @param <V> Vector type
*/
-public class RandomlyGeneratedInitialMeans<V extends NumberVector<V, ?>> extends AbstractKMeansInitialization<V> {
+public class RandomlyGeneratedInitialMeans<V extends NumberVector<?>> extends AbstractKMeansInitialization<V> {
/**
* Constructor.
*
- * @param seed Random seed.
+ * @param rnd Random generator.
*/
- public RandomlyGeneratedInitialMeans(Long seed) {
- super(seed);
+ public RandomlyGeneratedInitialMeans(RandomFactory rnd) {
+ super(rnd);
}
@Override
public List<V> chooseInitialMeans(Relation<V> relation, int k, PrimitiveDistanceFunction<? super V, ?> distanceFunction) {
- final int dim = DatabaseUtil.dimensionality(relation);
+ final int dim = RelationUtil.dimensionality(relation);
+ NumberVector.Factory<V, ?> factory = RelationUtil.getNumberVectorFactory(relation);
Pair<V, V> minmax = DatabaseUtil.computeMinMax(relation);
List<V> means = new ArrayList<V>(k);
- final Random random = (this.seed != null) ? new Random(this.seed) : new Random();
+ final Random random = rnd.getRandom();
for(int i = 0; i < k; i++) {
double[] r = MathUtil.randomDoubleArray(dim, random);
// Rescale
for(int d = 0; d < dim; d++) {
- r[d] = minmax.first.doubleValue(d + 1) + (minmax.second.doubleValue(d + 1) - minmax.first.doubleValue(d + 1)) * r[d];
+ r[d] = minmax.first.doubleValue(d) + (minmax.second.doubleValue(d) - minmax.first.doubleValue(d)) * r[d];
}
- means.add(minmax.first.newNumberVector(r));
+ means.add(factory.newNumberVector(r));
}
return means;
}
@@ -75,10 +78,10 @@ public class RandomlyGeneratedInitialMeans<V extends NumberVector<V, ?>> extends
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractKMeansInitialization.Parameterizer<V> {
+ public static class Parameterizer<V extends NumberVector<?>> extends AbstractKMeansInitialization.Parameterizer<V> {
@Override
protected RandomlyGeneratedInitialMeans<V> makeInstance() {
- return new RandomlyGeneratedInitialMeans<V>(seed);
+ return new RandomlyGeneratedInitialMeans<V>(rnd);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/package-info.java
index eed031df..4ba1ce09 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/package-info.java
@@ -8,6 +8,10 @@
* partition the database complete or is in any other sense a relaxed clustering result.
*
* @apiviz.exclude de.lmu.ifi.dbs.elki.algorithm.clustering.OPTICSXi.SteepAreaResult
+ * @apiviz.exclude de.lmu.ifi.dbs.elki.algorithm.Algorithm
+ * @apiviz.exclude de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm
+ * @apiviz.exclude de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm
+ * @apiviz.exclude de.lmu.ifi.dbs.elki.algorithm.clustering.trivial.*
*
* @see de.lmu.ifi.dbs.elki.algorithm
*/
@@ -33,4 +37,4 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-package de.lmu.ifi.dbs.elki.algorithm.clustering; \ No newline at end of file
+package de.lmu.ifi.dbs.elki.algorithm.clustering;
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/CLIQUE.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/CLIQUE.java
index 01a693e4..37b3eb57 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/CLIQUE.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/CLIQUE.java
@@ -43,13 +43,13 @@ import de.lmu.ifi.dbs.elki.data.Subspace;
import de.lmu.ifi.dbs.elki.data.model.SubspaceModel;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.FormatUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
@@ -57,7 +57,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.IntervalConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag;
@@ -96,11 +96,11 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
@Title("CLIQUE: Automatic Subspace Clustering of High Dimensional Data for Data Mining Applications")
@Description("Grid-based algorithm to identify dense clusters in subspaces of maximum dimensionality.")
@Reference(authors = "R. Agrawal, J. Gehrke, D. Gunopulos, P. Raghavan", title = "Automatic Subspace Clustering of High Dimensional Data for Data Mining Applications", booktitle = "Proc. SIGMOD Conference, Seattle, WA, 1998", url = "http://dx.doi.org/10.1145/276304.276314")
-public class CLIQUE<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clustering<SubspaceModel<V>>> implements SubspaceClusteringAlgorithm<SubspaceModel<V>> {
+public class CLIQUE<V extends NumberVector<?>> extends AbstractAlgorithm<Clustering<SubspaceModel<V>>> implements SubspaceClusteringAlgorithm<SubspaceModel<V>> {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(CLIQUE.class);
+ private static final Logging LOG = Logging.getLogger(CLIQUE.class);
/**
* Parameter to specify the number of intervals (units) in each dimension,
@@ -109,7 +109,7 @@ public class CLIQUE<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
* Key: {@code -clique.xsi}
* </p>
*/
- public static final OptionID XSI_ID = OptionID.getOrCreateOptionID("clique.xsi", "The number of intervals (units) in each dimension.");
+ public static final OptionID XSI_ID = new OptionID("clique.xsi", "The number of intervals (units) in each dimension.");
/**
* Parameter to specify the density threshold for the selectivity of a unit,
@@ -119,7 +119,7 @@ public class CLIQUE<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
* Key: {@code -clique.tau}
* </p>
*/
- public static final OptionID TAU_ID = OptionID.getOrCreateOptionID("clique.tau", "The density threshold for the selectivity of a unit, where the selectivity is" + "the fraction of total feature vectors contained in this unit.");
+ public static final OptionID TAU_ID = new OptionID("clique.tau", "The density threshold for the selectivity of a unit, where the selectivity is" + "the fraction of total feature vectors contained in this unit.");
/**
* Flag to indicate that only subspaces with large coverage (i.e. the fraction
@@ -129,7 +129,7 @@ public class CLIQUE<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
* Key: {@code -clique.prune}
* </p>
*/
- public static final OptionID PRUNE_ID = OptionID.getOrCreateOptionID("clique.prune", "Flag to indicate that only subspaces with large coverage " + "(i.e. the fraction of the database that is covered by the dense units) " + "are selected, the rest will be pruned.");
+ public static final OptionID PRUNE_ID = new OptionID("clique.prune", "Flag to indicate that only subspaces with large coverage " + "(i.e. the fraction of the database that is covered by the dense units) " + "are selected, the rest will be pruned.");
/**
* Holds the value of {@link #XSI_ID}.
@@ -169,53 +169,53 @@ public class CLIQUE<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
public Clustering<SubspaceModel<V>> run(Relation<V> relation) {
// 1. Identification of subspaces that contain clusters
// TODO: use step logging.
- if(logger.isVerbose()) {
- logger.verbose("*** 1. Identification of subspaces that contain clusters ***");
+ if(LOG.isVerbose()) {
+ LOG.verbose("*** 1. Identification of subspaces that contain clusters ***");
}
SortedMap<Integer, List<CLIQUESubspace<V>>> dimensionToDenseSubspaces = new TreeMap<Integer, List<CLIQUESubspace<V>>>();
List<CLIQUESubspace<V>> denseSubspaces = findOneDimensionalDenseSubspaces(relation);
- dimensionToDenseSubspaces.put(0, denseSubspaces);
- if(logger.isVerbose()) {
- logger.verbose(" 1-dimensional dense subspaces: " + denseSubspaces.size());
+ dimensionToDenseSubspaces.put(Integer.valueOf(0), denseSubspaces);
+ if(LOG.isVerbose()) {
+ LOG.verbose(" 1-dimensional dense subspaces: " + denseSubspaces.size());
}
- if(logger.isDebugging()) {
+ if(LOG.isDebugging()) {
for(CLIQUESubspace<V> s : denseSubspaces) {
- logger.debug(s.toString(" "));
+ LOG.debug(s.toString(" "));
}
}
- int dimensionality = DatabaseUtil.dimensionality(relation);
+ int dimensionality = RelationUtil.dimensionality(relation);
for(int k = 2; k <= dimensionality && !denseSubspaces.isEmpty(); k++) {
denseSubspaces = findDenseSubspaces(relation, denseSubspaces);
- dimensionToDenseSubspaces.put(k - 1, denseSubspaces);
- if(logger.isVerbose()) {
- logger.verbose(" " + k + "-dimensional dense subspaces: " + denseSubspaces.size());
+ dimensionToDenseSubspaces.put(Integer.valueOf(k - 1), denseSubspaces);
+ if(LOG.isVerbose()) {
+ LOG.verbose(" " + k + "-dimensional dense subspaces: " + denseSubspaces.size());
}
- if(logger.isDebugging()) {
+ if(LOG.isDebugging()) {
for(CLIQUESubspace<V> s : denseSubspaces) {
- logger.debug(s.toString(" "));
+ LOG.debug(s.toString(" "));
}
}
}
// 2. Identification of clusters
- if(logger.isVerbose()) {
- logger.verbose("*** 2. Identification of clusters ***");
+ if(LOG.isVerbose()) {
+ LOG.verbose("*** 2. Identification of clusters ***");
}
// build result
int numClusters = 1;
Clustering<SubspaceModel<V>> result = new Clustering<SubspaceModel<V>>("CLIQUE clustering", "clique-clustering");
for(Integer dim : dimensionToDenseSubspaces.keySet()) {
List<CLIQUESubspace<V>> subspaces = dimensionToDenseSubspaces.get(dim);
- List<Pair<Subspace<V>, ModifiableDBIDs>> modelsAndClusters = determineClusters(subspaces);
+ List<Pair<Subspace, ModifiableDBIDs>> modelsAndClusters = determineClusters(subspaces);
- if(logger.isVerbose()) {
- logger.verbose(" " + (dim + 1) + "-dimensional clusters: " + modelsAndClusters.size());
+ if(LOG.isVerbose()) {
+ LOG.verbose(" " + (dim + 1) + "-dimensional clusters: " + modelsAndClusters.size());
}
- for(Pair<Subspace<V>, ModifiableDBIDs> modelAndCluster : modelsAndClusters) {
+ for(Pair<Subspace, ModifiableDBIDs> modelAndCluster : modelsAndClusters) {
Cluster<SubspaceModel<V>> newCluster = new Cluster<SubspaceModel<V>>(modelAndCluster.second);
- newCluster.setModel(new SubspaceModel<V>(modelAndCluster.first, DatabaseUtil.centroid(relation, modelAndCluster.second)));
+ newCluster.setModel(new SubspaceModel<V>(modelAndCluster.first, Centroid.make(relation, modelAndCluster.second).toVector(relation)));
newCluster.setName("cluster_" + numClusters++);
result.addCluster(newCluster);
}
@@ -232,13 +232,13 @@ public class CLIQUE<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
* @return the clusters in the specified dense subspaces and the corresponding
* cluster models
*/
- private List<Pair<Subspace<V>, ModifiableDBIDs>> determineClusters(List<CLIQUESubspace<V>> denseSubspaces) {
- List<Pair<Subspace<V>, ModifiableDBIDs>> clusters = new ArrayList<Pair<Subspace<V>, ModifiableDBIDs>>();
+ private List<Pair<Subspace, ModifiableDBIDs>> determineClusters(List<CLIQUESubspace<V>> denseSubspaces) {
+ List<Pair<Subspace, ModifiableDBIDs>> clusters = new ArrayList<Pair<Subspace, ModifiableDBIDs>>();
for(CLIQUESubspace<V> subspace : denseSubspaces) {
- List<Pair<Subspace<V>, ModifiableDBIDs>> clustersInSubspace = subspace.determineClusters();
- if(logger.isDebugging()) {
- logger.debugFine("Subspace " + subspace + " clusters " + clustersInSubspace.size());
+ List<Pair<Subspace, ModifiableDBIDs>> clustersInSubspace = subspace.determineClusters();
+ if(LOG.isDebugging()) {
+ LOG.debugFine("Subspace " + subspace + " clusters " + clustersInSubspace.size());
}
clusters.addAll(clustersInSubspace);
}
@@ -289,7 +289,7 @@ public class CLIQUE<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
* @return the created one dimensional units
*/
private Collection<CLIQUEUnit<V>> initOneDimensionalUnits(Relation<V> database) {
- int dimensionality = DatabaseUtil.dimensionality(database);
+ int dimensionality = RelationUtil.dimensionality(database);
// initialize minima and maxima
double[] minima = new double[dimensionality];
double[] maxima = new double[dimensionality];
@@ -312,12 +312,12 @@ public class CLIQUE<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
unit_lengths[d] = (maxima[d] - minima[d]) / xsi;
}
- if(logger.isDebuggingFiner()) {
- StringBuffer msg = new StringBuffer();
+ if(LOG.isDebuggingFiner()) {
+ StringBuilder msg = new StringBuilder();
msg.append(" minima: ").append(FormatUtil.format(minima, ", ", 2));
msg.append("\n maxima: ").append(FormatUtil.format(maxima, ", ", 2));
msg.append("\n unit lengths: ").append(FormatUtil.format(unit_lengths, ", ", 2));
- logger.debugFiner(msg.toString());
+ LOG.debugFiner(msg.toString());
}
// determine the boundaries of the units
@@ -332,10 +332,10 @@ public class CLIQUE<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
}
}
}
- if(logger.isDebuggingFiner()) {
- StringBuffer msg = new StringBuffer();
+ if(LOG.isDebuggingFiner()) {
+ StringBuilder msg = new StringBuilder();
msg.append(" unit bounds ").append(FormatUtil.format(new Matrix(unit_bounds), " "));
- logger.debugFiner(msg.toString());
+ LOG.debugFiner(msg.toString());
}
// build the 1 dimensional units
@@ -346,10 +346,10 @@ public class CLIQUE<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
}
}
- if(logger.isDebuggingFiner()) {
- StringBuffer msg = new StringBuffer();
+ if(LOG.isDebuggingFiner()) {
+ StringBuilder msg = new StringBuilder();
msg.append(" total number of 1-dim units: ").append(units.size());
- logger.debugFiner(msg.toString());
+ LOG.debugFiner(msg.toString());
}
return units;
@@ -367,12 +367,12 @@ public class CLIQUE<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
if(minima.length != featureVector.getDimensionality()) {
throw new IllegalArgumentException("FeatureVectors differ in length.");
}
- for(int d = 1; d <= featureVector.getDimensionality(); d++) {
- if((featureVector.doubleValue(d)) > maxima[d - 1]) {
- maxima[d - 1] = (featureVector.doubleValue(d));
+ for(int d = 0; d < featureVector.getDimensionality(); d++) {
+ if((featureVector.doubleValue(d)) > maxima[d]) {
+ maxima[d] = (featureVector.doubleValue(d));
}
- if((featureVector.doubleValue(d)) < minima[d - 1]) {
- minima[d - 1] = (featureVector.doubleValue(d));
+ if((featureVector.doubleValue(d)) < minima[d]) {
+ minima[d] = (featureVector.doubleValue(d));
}
}
}
@@ -387,38 +387,37 @@ public class CLIQUE<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
*/
private List<CLIQUESubspace<V>> findOneDimensionalDenseSubspaceCandidates(Relation<V> database) {
Collection<CLIQUEUnit<V>> units = initOneDimensionalUnits(database);
- Collection<CLIQUEUnit<V>> denseUnits = new ArrayList<CLIQUEUnit<V>>();
- Map<Integer, CLIQUESubspace<V>> denseSubspaces = new HashMap<Integer, CLIQUESubspace<V>>();
-
// identify dense units
double total = database.size();
- for(DBIDIter it = database.iterDBIDs(); it.valid();) {
+ for(DBIDIter it = database.iterDBIDs(); it.valid(); it.advance()) {
V featureVector = database.get(it);
- final DBID id = it.getDBID();
- it.advance();
for(CLIQUEUnit<V> unit : units) {
- unit.addFeatureVector(id, featureVector);
- // unit is a dense unit
- // FIXME: why it.valid()?
- if(!it.valid() && unit.selectivity(total) >= tau) {
- denseUnits.add(unit);
- // add the dense unit to its subspace
- int dim = unit.getIntervals().iterator().next().getDimension();
- CLIQUESubspace<V> subspace_d = denseSubspaces.get(dim);
- if(subspace_d == null) {
- subspace_d = new CLIQUESubspace<V>(dim);
- denseSubspaces.put(dim, subspace_d);
- }
- subspace_d.addDenseUnit(unit);
+ unit.addFeatureVector(it, featureVector);
+ }
+ }
+
+ Collection<CLIQUEUnit<V>> denseUnits = new ArrayList<CLIQUEUnit<V>>();
+ Map<Integer, CLIQUESubspace<V>> denseSubspaces = new HashMap<Integer, CLIQUESubspace<V>>();
+ for(CLIQUEUnit<V> unit : units) {
+ // unit is a dense unit
+ if(unit.selectivity(total) >= tau) {
+ denseUnits.add(unit);
+ // add the dense unit to its subspace
+ int dim = unit.getIntervals().iterator().next().getDimension();
+ CLIQUESubspace<V> subspace_d = denseSubspaces.get(Integer.valueOf(dim));
+ if(subspace_d == null) {
+ subspace_d = new CLIQUESubspace<V>(dim);
+ denseSubspaces.put(Integer.valueOf(dim), subspace_d);
}
+ subspace_d.addDenseUnit(unit);
}
}
- if(logger.isDebugging()) {
- StringBuffer msg = new StringBuffer();
+ if(LOG.isDebugging()) {
+ StringBuilder msg = new StringBuilder();
msg.append(" number of 1-dim dense units: ").append(denseUnits.size());
msg.append("\n number of 1-dim dense subspace candidates: ").append(denseSubspaces.size());
- logger.debugFine(msg.toString());
+ LOG.debugFine(msg.toString());
}
List<CLIQUESubspace<V>> subspaceCandidates = new ArrayList<CLIQUESubspace<V>>(denseSubspaces.values());
@@ -574,7 +573,7 @@ public class CLIQUE<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -584,7 +583,7 @@ public class CLIQUE<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractParameterizer {
+ public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
protected int xsi;
protected double tau;
@@ -594,19 +593,22 @@ public class CLIQUE<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- IntParameter xsiP = new IntParameter(XSI_ID, new GreaterConstraint(0));
+ IntParameter xsiP = new IntParameter(XSI_ID);
+ xsiP.addConstraint(new GreaterConstraint(0));
if(config.grab(xsiP)) {
- xsi = xsiP.getValue();
+ xsi = xsiP.intValue();
}
- DoubleParameter tauP = new DoubleParameter(TAU_ID, new IntervalConstraint(0, IntervalConstraint.IntervalBoundary.OPEN, 1, IntervalConstraint.IntervalBoundary.OPEN));
+ DoubleParameter tauP = new DoubleParameter(TAU_ID);
+ tauP.addConstraint(new GreaterConstraint(0));
+ tauP.addConstraint(new LessConstraint(1));
if(config.grab(tauP)) {
- tau = tauP.getValue();
+ tau = tauP.doubleValue();
}
Flag pruneF = new Flag(PRUNE_ID);
if(config.grab(pruneF)) {
- prune = pruneF.getValue();
+ prune = pruneF.isTrue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DiSH.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DiSH.java
index df3fe8b5..a3496a0e 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DiSH.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DiSH.java
@@ -47,6 +47,7 @@ import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.IndexBasedDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.ProxyDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.DiSHDistanceFunction;
@@ -55,10 +56,11 @@ import de.lmu.ifi.dbs.elki.distance.distancevalue.PreferenceVectorBasedCorrelati
import de.lmu.ifi.dbs.elki.index.preprocessed.preference.DiSHPreferenceVectorIndex;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.ProjectedCentroid;
import de.lmu.ifi.dbs.elki.result.optics.ClusterOrderEntry;
import de.lmu.ifi.dbs.elki.result.optics.ClusterOrderResult;
import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.FormatUtil;
import de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy.HierarchyReferenceLists;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
@@ -99,11 +101,11 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
@Title("DiSH: Detecting Subspace cluster Hierarchies")
@Description("Algorithm to find hierarchical correlation clusters in subspaces.")
@Reference(authors = "E. Achtert, C. Böhm, H.-P. Kriegel, P. Kröger, I. Müller-Gorman, A. Zimek", title = "Detection and Visualization of Subspace Cluster Hierarchies", booktitle = "Proc. 12th International Conference on Database Systems for Advanced Applications (DASFAA), Bangkok, Thailand, 2007", url = "http://dx.doi.org/10.1007/978-3-540-71703-4_15")
-public class DiSH<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clustering<SubspaceModel<V>>> implements SubspaceClusteringAlgorithm<SubspaceModel<V>> {
+public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clustering<SubspaceModel<V>>> implements SubspaceClusteringAlgorithm<SubspaceModel<V>> {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(DiSH.class);
+ private static final Logging LOG = Logging.getLogger(DiSH.class);
/**
* Parameter that specifies the maximum radius of the neighborhood to be
@@ -116,7 +118,7 @@ public class DiSH<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Cluste
* Key: {@code -dish.epsilon}
* </p>
*/
- public static final OptionID EPSILON_ID = OptionID.getOrCreateOptionID("dish.epsilon", "The maximum radius of the neighborhood " + "to be considered in each dimension for determination of " + "the preference vector.");
+ public static final OptionID EPSILON_ID = new OptionID("dish.epsilon", "The maximum radius of the neighborhood " + "to be considered in each dimension for determination of " + "the preference vector.");
/**
* Parameter that specifies the a minimum number of points as a smoothing
@@ -128,7 +130,7 @@ public class DiSH<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Cluste
* Key: {@code -dish.mu}
* </p>
*/
- public static final OptionID MU_ID = OptionID.getOrCreateOptionID("dish.mu", "The minimum number of points as a smoothing factor to avoid the single-link-effekt.");
+ public static final OptionID MU_ID = new OptionID("dish.mu", "The minimum number of points as a smoothing factor to avoid the single-link-effekt.");
/**
* Holds the value of {@link #EPSILON_ID}.
@@ -167,13 +169,13 @@ public class DiSH<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Cluste
*/
public Clustering<SubspaceModel<V>> run(Database database, Relation<V> relation) {
// Instantiate DiSH distance (and thus run the preprocessor)
- if(logger.isVerbose()) {
- logger.verbose("*** Run DiSH preprocessor.");
+ if (LOG.isVerbose()) {
+ LOG.verbose("*** Run DiSH preprocessor.");
}
DiSHDistanceFunction.Instance<V> dishDistanceQuery = dishDistance.instantiate(relation);
// Configure and run OPTICS.
- if(logger.isVerbose()) {
- logger.verbose("*** Run OPTICS algorithm.");
+ if (LOG.isVerbose()) {
+ LOG.verbose("*** Run OPTICS algorithm.");
}
ListParameterization opticsconfig = new ListParameterization(opticsAlgorithmParameters);
opticsconfig.addParameter(OPTICS.DISTANCE_FUNCTION_ID, ProxyDistanceFunction.proxy(dishDistanceQuery));
@@ -183,8 +185,8 @@ public class DiSH<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Cluste
optics = opticsconfig.tryInstantiate(cls);
ClusterOrderResult<PreferenceVectorBasedCorrelationDistance> opticsResult = optics.run(database, relation);
- if(logger.isVerbose()) {
- logger.verbose("*** Compute Clusters.");
+ if (LOG.isVerbose()) {
+ LOG.verbose("*** Compute Clusters.");
}
return computeClusters(relation, opticsResult, dishDistanceQuery);
}
@@ -197,64 +199,64 @@ public class DiSH<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Cluste
* @param distFunc Distance function
*/
private Clustering<SubspaceModel<V>> computeClusters(Relation<V> database, ClusterOrderResult<PreferenceVectorBasedCorrelationDistance> clusterOrder, DiSHDistanceFunction.Instance<V> distFunc) {
- int dimensionality = DatabaseUtil.dimensionality(database);
+ int dimensionality = RelationUtil.dimensionality(database);
int minpts = dishDistance.getMinpts();
// extract clusters
Map<BitSet, List<Pair<BitSet, ArrayModifiableDBIDs>>> clustersMap = extractClusters(database, distFunc, clusterOrder);
- if(logger.isVerbose()) {
- StringBuffer msg = new StringBuffer("Step 1: extract clusters");
- for(List<Pair<BitSet, ArrayModifiableDBIDs>> clusterList : clustersMap.values()) {
- for(Pair<BitSet, ArrayModifiableDBIDs> c : clusterList) {
- msg.append("\n").append(FormatUtil.format(dimensionality, c.first)).append(" ids ").append(c.second.size());
+ if (LOG.isVerbose()) {
+ StringBuilder msg = new StringBuilder("Step 1: extract clusters");
+ for (List<Pair<BitSet, ArrayModifiableDBIDs>> clusterList : clustersMap.values()) {
+ for (Pair<BitSet, ArrayModifiableDBIDs> c : clusterList) {
+ msg.append('\n').append(FormatUtil.format(dimensionality, c.first)).append(" ids ").append(c.second.size());
}
}
- logger.verbose(msg.toString());
+ LOG.verbose(msg.toString());
}
// check if there are clusters < minpts
checkClusters(database, distFunc, clustersMap, minpts);
- if(logger.isVerbose()) {
- StringBuffer msg = new StringBuffer("Step 2: check clusters");
- for(List<Pair<BitSet, ArrayModifiableDBIDs>> clusterList : clustersMap.values()) {
- for(Pair<BitSet, ArrayModifiableDBIDs> c : clusterList) {
- msg.append("\n").append(FormatUtil.format(dimensionality, c.first)).append(" ids ").append(c.second.size());
+ if (LOG.isVerbose()) {
+ StringBuilder msg = new StringBuilder("Step 2: check clusters");
+ for (List<Pair<BitSet, ArrayModifiableDBIDs>> clusterList : clustersMap.values()) {
+ for (Pair<BitSet, ArrayModifiableDBIDs> c : clusterList) {
+ msg.append('\n').append(FormatUtil.format(dimensionality, c.first)).append(" ids ").append(c.second.size());
}
}
- logger.verbose(msg.toString());
+ LOG.verbose(msg.toString());
}
// sort the clusters
List<Cluster<SubspaceModel<V>>> clusters = sortClusters(database, clustersMap);
- if(logger.isVerbose()) {
- StringBuffer msg = new StringBuffer("Step 3: sort clusters");
- for(Cluster<SubspaceModel<V>> c : clusters) {
- msg.append("\n").append(FormatUtil.format(dimensionality, c.getModel().getSubspace().getDimensions())).append(" ids ").append(c.size());
+ if (LOG.isVerbose()) {
+ StringBuilder msg = new StringBuilder("Step 3: sort clusters");
+ for (Cluster<SubspaceModel<V>> c : clusters) {
+ msg.append('\n').append(FormatUtil.format(dimensionality, c.getModel().getSubspace().getDimensions())).append(" ids ").append(c.size());
}
- logger.verbose(msg.toString());
+ LOG.verbose(msg.toString());
}
// build the hierarchy
buildHierarchy(database, distFunc, clusters, dimensionality);
- if(logger.isVerbose()) {
- StringBuffer msg = new StringBuffer("Step 4: build hierarchy");
- for(Cluster<SubspaceModel<V>> c : clusters) {
- msg.append("\n").append(FormatUtil.format(dimensionality, c.getModel().getDimensions())).append(" ids ").append(c.size());
- for(Cluster<SubspaceModel<V>> cluster : c.getParents()) {
+ if (LOG.isVerbose()) {
+ StringBuilder msg = new StringBuilder("Step 4: build hierarchy");
+ for (Cluster<SubspaceModel<V>> c : clusters) {
+ msg.append('\n').append(FormatUtil.format(dimensionality, c.getModel().getDimensions())).append(" ids ").append(c.size());
+ for (Cluster<SubspaceModel<V>> cluster : c.getParents()) {
msg.append("\n parent ").append(cluster);
}
- for(Cluster<SubspaceModel<V>> cluster : c.getChildren()) {
+ for (Cluster<SubspaceModel<V>> cluster : c.getChildren()) {
msg.append("\n child ").append(cluster);
}
}
- logger.verbose(msg.toString());
+ LOG.verbose(msg.toString());
}
// build result
Clustering<SubspaceModel<V>> result = new Clustering<SubspaceModel<V>>("DiSH clustering", "dish-clustering");
- for(Cluster<SubspaceModel<V>> c : clusters) {
- if(c.getParents() == null || c.getParents().isEmpty()) {
+ for (Cluster<SubspaceModel<V>> c : clusters) {
+ if (c.getParents() == null || c.getParents().isEmpty()) {
result.addCluster(c);
}
}
@@ -270,12 +272,12 @@ public class DiSH<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Cluste
* @return the extracted clusters
*/
private Map<BitSet, List<Pair<BitSet, ArrayModifiableDBIDs>>> extractClusters(Relation<V> database, DiSHDistanceFunction.Instance<V> distFunc, ClusterOrderResult<PreferenceVectorBasedCorrelationDistance> clusterOrder) {
- FiniteProgress progress = logger.isVerbose() ? new FiniteProgress("Extract Clusters", database.size(), logger) : null;
+ FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Extract Clusters", database.size(), LOG) : null;
int processed = 0;
Map<BitSet, List<Pair<BitSet, ArrayModifiableDBIDs>>> clustersMap = new HashMap<BitSet, List<Pair<BitSet, ArrayModifiableDBIDs>>>();
Map<DBID, ClusterOrderEntry<PreferenceVectorBasedCorrelationDistance>> entryMap = new HashMap<DBID, ClusterOrderEntry<PreferenceVectorBasedCorrelationDistance>>();
Map<DBID, Pair<BitSet, ArrayModifiableDBIDs>> entryToClusterMap = new HashMap<DBID, Pair<BitSet, ArrayModifiableDBIDs>>();
- for(Iterator<ClusterOrderEntry<PreferenceVectorBasedCorrelationDistance>> it = clusterOrder.iterator(); it.hasNext();) {
+ for (Iterator<ClusterOrderEntry<PreferenceVectorBasedCorrelationDistance>> it = clusterOrder.iterator(); it.hasNext();) {
ClusterOrderEntry<PreferenceVectorBasedCorrelationDistance> entry = it.next();
entryMap.put(entry.getID(), entry);
@@ -284,68 +286,68 @@ public class DiSH<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Cluste
// get the list of (parallel) clusters for the preference vector
List<Pair<BitSet, ArrayModifiableDBIDs>> parallelClusters = clustersMap.get(preferenceVector);
- if(parallelClusters == null) {
+ if (parallelClusters == null) {
parallelClusters = new ArrayList<Pair<BitSet, ArrayModifiableDBIDs>>();
clustersMap.put(preferenceVector, parallelClusters);
}
// look for the proper cluster
Pair<BitSet, ArrayModifiableDBIDs> cluster = null;
- for(Pair<BitSet, ArrayModifiableDBIDs> c : parallelClusters) {
- V c_centroid = DatabaseUtil.centroid(database, c.second, c.first);
+ for (Pair<BitSet, ArrayModifiableDBIDs> c : parallelClusters) {
+ V c_centroid = ProjectedCentroid.make(c.first, database, c.second).toVector(database);
PreferenceVectorBasedCorrelationDistance dist = distFunc.correlationDistance(object, c_centroid, preferenceVector, preferenceVector);
- if(dist.getCorrelationValue() == entry.getReachability().getCorrelationValue()) {
+ if (dist.getCorrelationValue() == entry.getReachability().getCorrelationValue()) {
double d = distFunc.weightedDistance(object, c_centroid, dist.getCommonPreferenceVector());
- if(d <= 2 * epsilon) {
+ if (d <= 2 * epsilon) {
cluster = c;
break;
}
}
}
- if(cluster == null) {
+ if (cluster == null) {
cluster = new Pair<BitSet, ArrayModifiableDBIDs>(preferenceVector, DBIDUtil.newArray());
parallelClusters.add(cluster);
}
cluster.second.add(entry.getID());
entryToClusterMap.put(entry.getID(), cluster);
- if(progress != null) {
- progress.setProcessed(++processed, logger);
+ if (progress != null) {
+ progress.setProcessed(++processed, LOG);
}
}
- if(progress != null) {
- progress.ensureCompleted(logger);
+ if (progress != null) {
+ progress.ensureCompleted(LOG);
}
- if(logger.isDebuggingFiner()) {
- StringBuffer msg = new StringBuffer("Step 0");
- for(List<Pair<BitSet, ArrayModifiableDBIDs>> clusterList : clustersMap.values()) {
- for(Pair<BitSet, ArrayModifiableDBIDs> c : clusterList) {
- msg.append("\n").append(FormatUtil.format(DatabaseUtil.dimensionality(database), c.first)).append(" ids ").append(c.second.size());
+ if (LOG.isDebuggingFiner()) {
+ StringBuilder msg = new StringBuilder("Step 0");
+ for (List<Pair<BitSet, ArrayModifiableDBIDs>> clusterList : clustersMap.values()) {
+ for (Pair<BitSet, ArrayModifiableDBIDs> c : clusterList) {
+ msg.append('\n').append(FormatUtil.format(RelationUtil.dimensionality(database), c.first)).append(" ids ").append(c.second.size());
}
}
- logger.debugFiner(msg.toString());
+ LOG.debugFiner(msg.toString());
}
// add the predecessor to the cluster
- for(BitSet pv : clustersMap.keySet()) {
+ for (BitSet pv : clustersMap.keySet()) {
List<Pair<BitSet, ArrayModifiableDBIDs>> parallelClusters = clustersMap.get(pv);
- for(Pair<BitSet, ArrayModifiableDBIDs> cluster : parallelClusters) {
- if(cluster.second.isEmpty()) {
+ for (Pair<BitSet, ArrayModifiableDBIDs> cluster : parallelClusters) {
+ if (cluster.second.isEmpty()) {
continue;
}
DBID firstID = cluster.second.get(0);
ClusterOrderEntry<PreferenceVectorBasedCorrelationDistance> entry = entryMap.get(firstID);
DBID predecessorID = entry.getPredecessorID();
- if(predecessorID == null) {
+ if (predecessorID == null) {
continue;
}
ClusterOrderEntry<PreferenceVectorBasedCorrelationDistance> predecessor = entryMap.get(predecessorID);
// parallel cluster
- if(predecessor.getReachability().getCommonPreferenceVector().equals(entry.getReachability().getCommonPreferenceVector())) {
+ if (predecessor.getReachability().getCommonPreferenceVector().equals(entry.getReachability().getCommonPreferenceVector())) {
continue;
}
- if(predecessor.getReachability().compareTo(entry.getReachability()) < 0) {
+ if (predecessor.getReachability().compareTo(entry.getReachability()) < 0) {
continue;
}
@@ -369,22 +371,21 @@ public class DiSH<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Cluste
* @return a sorted list of the clusters
*/
private List<Cluster<SubspaceModel<V>>> sortClusters(Relation<V> database, Map<BitSet, List<Pair<BitSet, ArrayModifiableDBIDs>>> clustersMap) {
- final int db_dim = DatabaseUtil.dimensionality(database);
+ final int db_dim = RelationUtil.dimensionality(database);
// int num = 1;
List<Cluster<SubspaceModel<V>>> clusters = new ArrayList<Cluster<SubspaceModel<V>>>();
- for(BitSet pv : clustersMap.keySet()) {
+ for (BitSet pv : clustersMap.keySet()) {
List<Pair<BitSet, ArrayModifiableDBIDs>> parallelClusters = clustersMap.get(pv);
- for(int i = 0; i < parallelClusters.size(); i++) {
+ for (int i = 0; i < parallelClusters.size(); i++) {
Pair<BitSet, ArrayModifiableDBIDs> c = parallelClusters.get(i);
Cluster<SubspaceModel<V>> cluster = new Cluster<SubspaceModel<V>>(c.second);
- cluster.setModel(new SubspaceModel<V>(new Subspace<V>(c.first), DatabaseUtil.centroid(database, c.second)));
+ cluster.setModel(new SubspaceModel<V>(new Subspace(c.first), Centroid.make(database, c.second).toVector(database)));
cluster.setHierarchy(new HierarchyReferenceLists<Cluster<SubspaceModel<V>>>(cluster, new ArrayList<Cluster<SubspaceModel<V>>>(), new ArrayList<Cluster<SubspaceModel<V>>>()));
// cluster.setName("Cluster_" + num++);
String subspace = FormatUtil.format(cluster.getModel().getSubspace().getDimensions(), db_dim, "");
- if(parallelClusters.size() > 1) {
+ if (parallelClusters.size() > 1) {
cluster.setName("Cluster_" + subspace + "_" + i);
- }
- else {
+ } else {
cluster.setName("Cluster_" + subspace);
}
clusters.add(cluster);
@@ -417,11 +418,11 @@ public class DiSH<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Cluste
List<Pair<BitSet, ArrayModifiableDBIDs>> notAssigned = new ArrayList<Pair<BitSet, ArrayModifiableDBIDs>>();
Map<BitSet, List<Pair<BitSet, ArrayModifiableDBIDs>>> newClustersMap = new HashMap<BitSet, List<Pair<BitSet, ArrayModifiableDBIDs>>>();
Pair<BitSet, ArrayModifiableDBIDs> noise = new Pair<BitSet, ArrayModifiableDBIDs>(new BitSet(), DBIDUtil.newArray());
- for(BitSet pv : clustersMap.keySet()) {
+ for (BitSet pv : clustersMap.keySet()) {
// noise
- if(pv.cardinality() == 0) {
+ if (pv.cardinality() == 0) {
List<Pair<BitSet, ArrayModifiableDBIDs>> parallelClusters = clustersMap.get(pv);
- for(Pair<BitSet, ArrayModifiableDBIDs> c : parallelClusters) {
+ for (Pair<BitSet, ArrayModifiableDBIDs> c : parallelClusters) {
noise.second.addDBIDs(c.second);
}
}
@@ -429,11 +430,10 @@ public class DiSH<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Cluste
else {
List<Pair<BitSet, ArrayModifiableDBIDs>> parallelClusters = clustersMap.get(pv);
List<Pair<BitSet, ArrayModifiableDBIDs>> newParallelClusters = new ArrayList<Pair<BitSet, ArrayModifiableDBIDs>>(parallelClusters.size());
- for(Pair<BitSet, ArrayModifiableDBIDs> c : parallelClusters) {
- if(!pv.equals(new BitSet()) && c.second.size() < minpts) {
+ for (Pair<BitSet, ArrayModifiableDBIDs> c : parallelClusters) {
+ if (!pv.equals(new BitSet()) && c.second.size() < minpts) {
notAssigned.add(c);
- }
- else {
+ } else {
newParallelClusters.add(c);
}
}
@@ -444,15 +444,14 @@ public class DiSH<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Cluste
clustersMap.clear();
clustersMap.putAll(newClustersMap);
- for(Pair<BitSet, ArrayModifiableDBIDs> c : notAssigned) {
- if(c.second.isEmpty()) {
+ for (Pair<BitSet, ArrayModifiableDBIDs> c : notAssigned) {
+ if (c.second.isEmpty()) {
continue;
}
Pair<BitSet, ArrayModifiableDBIDs> parent = findParent(database, distFunc, c, clustersMap);
- if(parent != null) {
+ if (parent != null) {
parent.second.addDBIDs(c.second);
- }
- else {
+ } else {
noise.second.addDBIDs(c.second);
}
}
@@ -472,30 +471,30 @@ public class DiSH<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Cluste
* @return the parent of the specified cluster
*/
private Pair<BitSet, ArrayModifiableDBIDs> findParent(Relation<V> database, DiSHDistanceFunction.Instance<V> distFunc, Pair<BitSet, ArrayModifiableDBIDs> child, Map<BitSet, List<Pair<BitSet, ArrayModifiableDBIDs>>> clustersMap) {
- V child_centroid = DatabaseUtil.centroid(database, child.second, child.first);
+ V child_centroid = ProjectedCentroid.make(child.first, database, child.second).toVector(database);
Pair<BitSet, ArrayModifiableDBIDs> result = null;
int resultCardinality = -1;
BitSet childPV = child.first;
int childCardinality = childPV.cardinality();
- for(BitSet parentPV : clustersMap.keySet()) {
+ for (BitSet parentPV : clustersMap.keySet()) {
int parentCardinality = parentPV.cardinality();
- if(parentCardinality >= childCardinality) {
+ if (parentCardinality >= childCardinality) {
continue;
}
- if(resultCardinality != -1 && parentCardinality <= resultCardinality) {
+ if (resultCardinality != -1 && parentCardinality <= resultCardinality) {
continue;
}
BitSet pv = (BitSet) childPV.clone();
pv.and(parentPV);
- if(pv.equals(parentPV)) {
+ if (pv.equals(parentPV)) {
List<Pair<BitSet, ArrayModifiableDBIDs>> parentList = clustersMap.get(parentPV);
- for(Pair<BitSet, ArrayModifiableDBIDs> parent : parentList) {
- V parent_centroid = DatabaseUtil.centroid(database, parent.second, parentPV);
+ for (Pair<BitSet, ArrayModifiableDBIDs> parent : parentList) {
+ V parent_centroid = ProjectedCentroid.make(parentPV, database, parent.second).toVector(database);
double d = distFunc.weightedDistance(child_centroid, parent_centroid, parentPV);
- if(d <= 2 * epsilon) {
+ if (d <= 2 * epsilon) {
result = parent;
resultCardinality = parentCardinality;
break;
@@ -516,64 +515,62 @@ public class DiSH<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Cluste
* @param database the database containing the data objects
*/
private void buildHierarchy(Relation<V> database, DiSHDistanceFunction.Instance<V> distFunc, List<Cluster<SubspaceModel<V>>> clusters, int dimensionality) {
- StringBuffer msg = new StringBuffer();
- final int db_dim = DatabaseUtil.dimensionality(database);
+ StringBuilder msg = new StringBuilder();
+ final int db_dim = RelationUtil.dimensionality(database);
- for(int i = 0; i < clusters.size() - 1; i++) {
+ for (int i = 0; i < clusters.size() - 1; i++) {
Cluster<SubspaceModel<V>> c_i = clusters.get(i);
int subspaceDim_i = dimensionality - c_i.getModel().getSubspace().dimensionality();
- V ci_centroid = DatabaseUtil.centroid(database, c_i.getIDs(), c_i.getModel().getDimensions());
+ V ci_centroid = ProjectedCentroid.make(c_i.getModel().getDimensions(), database, c_i.getIDs()).toVector(database);
- for(int j = i + 1; j < clusters.size(); j++) {
+ for (int j = i + 1; j < clusters.size(); j++) {
Cluster<SubspaceModel<V>> c_j = clusters.get(j);
int subspaceDim_j = dimensionality - c_j.getModel().getSubspace().dimensionality();
- if(subspaceDim_i < subspaceDim_j) {
- if(logger.isDebugging()) {
- msg.append("\n l_i=").append(subspaceDim_i).append(" pv_i=[").append(FormatUtil.format(db_dim, c_i.getModel().getSubspace().getDimensions())).append("]");
- msg.append("\n l_j=").append(subspaceDim_j).append(" pv_j=[").append(FormatUtil.format(db_dim, c_j.getModel().getSubspace().getDimensions())).append("]");
+ if (subspaceDim_i < subspaceDim_j) {
+ if (LOG.isDebugging()) {
+ msg.append("\n l_i=").append(subspaceDim_i).append(" pv_i=[").append(FormatUtil.format(db_dim, c_i.getModel().getSubspace().getDimensions())).append(']');
+ msg.append("\n l_j=").append(subspaceDim_j).append(" pv_j=[").append(FormatUtil.format(db_dim, c_j.getModel().getSubspace().getDimensions())).append(']');
}
// noise level reached
- if(c_j.getModel().getSubspace().dimensionality() == 0) {
+ if (c_j.getModel().getSubspace().dimensionality() == 0) {
// no parents exists -> parent is noise
- if(c_i.getParents().isEmpty()) {
+ if (c_i.getParents().isEmpty()) {
c_j.getChildren().add(c_i);
c_i.getParents().add(c_j);
- if(logger.isDebugging()) {
+ if (LOG.isDebugging()) {
msg.append("\n [").append(FormatUtil.format(db_dim, c_j.getModel().getSubspace().getDimensions()));
msg.append("] is parent of [").append(FormatUtil.format(db_dim, c_i.getModel().getSubspace().getDimensions()));
- msg.append("]");
+ msg.append(']');
}
}
- }
- else {
- V cj_centroid = DatabaseUtil.centroid(database, c_j.getIDs(), c_j.getModel().getDimensions());
+ } else {
+ V cj_centroid = ProjectedCentroid.make(c_j.getModel().getDimensions(), database, c_j.getIDs()).toVector(database);
PreferenceVectorBasedCorrelationDistance distance = distFunc.correlationDistance(ci_centroid, cj_centroid, c_i.getModel().getSubspace().getDimensions(), c_j.getModel().getSubspace().getDimensions());
double d = distFunc.weightedDistance(ci_centroid, cj_centroid, distance.getCommonPreferenceVector());
- if(logger.isDebugging()) {
+ if (LOG.isDebugging()) {
msg.append("\n dist = ").append(distance.getCorrelationValue());
}
- if(distance.getCorrelationValue() == subspaceDim_j) {
- if(logger.isDebugging()) {
+ if (distance.getCorrelationValue() == subspaceDim_j) {
+ if (LOG.isDebugging()) {
msg.append("\n d = ").append(d);
}
- if(d <= 2 * epsilon) {
+ if (d <= 2 * epsilon) {
// no parent exists or c_j is not a parent of the already
// existing parents
- if(c_i.getParents().isEmpty() || !isParent(database, distFunc, c_j, c_i.getParents())) {
+ if (c_i.getParents().isEmpty() || !isParent(database, distFunc, c_j, c_i.getParents())) {
c_j.getChildren().add(c_i);
c_i.getParents().add(c_j);
- if(logger.isDebugging()) {
+ if (LOG.isDebugging()) {
msg.append("\n [").append(FormatUtil.format(db_dim, c_j.getModel().getSubspace().getDimensions()));
msg.append("] is parent of [");
msg.append(FormatUtil.format(db_dim, c_i.getModel().getSubspace().getDimensions()));
- msg.append("]");
+ msg.append(']');
}
}
- }
- else {
+ } else {
throw new RuntimeException("Should never happen: d = " + d);
}
}
@@ -581,8 +578,8 @@ public class DiSH<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Cluste
}
}
}
- if(logger.isDebugging()) {
- logger.debug(msg.toString());
+ if (LOG.isDebugging()) {
+ LOG.debug(msg.toString());
}
}
@@ -599,14 +596,14 @@ public class DiSH<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Cluste
* the children clusters, false otherwise
*/
private boolean isParent(Relation<V> database, DiSHDistanceFunction.Instance<V> distFunc, Cluster<SubspaceModel<V>> parent, List<Cluster<SubspaceModel<V>>> children) {
- V parent_centroid = DatabaseUtil.centroid(database, parent.getIDs(), parent.getModel().getDimensions());
- int dimensionality = DatabaseUtil.dimensionality(database);
+ V parent_centroid = ProjectedCentroid.make(parent.getModel().getDimensions(), database, parent.getIDs()).toVector(database);
+ int dimensionality = RelationUtil.dimensionality(database);
int subspaceDim_parent = dimensionality - parent.getModel().getSubspace().dimensionality();
- for(Cluster<SubspaceModel<V>> child : children) {
- V child_centroid = DatabaseUtil.centroid(database, child.getIDs(), child.getModel().getDimensions());
+ for (Cluster<SubspaceModel<V>> child : children) {
+ V child_centroid = ProjectedCentroid.make(child.getModel().getDimensions(), database, child.getIDs()).toVector(database);
PreferenceVectorBasedCorrelationDistance distance = distFunc.correlationDistance(parent_centroid, child_centroid, parent.getModel().getSubspace().getDimensions(), child.getModel().getSubspace().getDimensions());
- if(distance.getCorrelationValue() == subspaceDim_parent) {
+ if (distance.getCorrelationValue() == subspaceDim_parent) {
return true;
}
}
@@ -621,7 +618,7 @@ public class DiSH<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Cluste
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -631,7 +628,7 @@ public class DiSH<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Cluste
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractParameterizer {
+ public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
protected double epsilon = 0.0;
protected int mu = 1;
@@ -644,14 +641,16 @@ public class DiSH<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Cluste
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- DoubleParameter epsilonP = new DoubleParameter(EPSILON_ID, new GreaterEqualConstraint(0), 0.001);
- if(config.grab(epsilonP)) {
- epsilon = epsilonP.getValue();
+ DoubleParameter epsilonP = new DoubleParameter(EPSILON_ID, 0.001);
+ epsilonP.addConstraint(new GreaterEqualConstraint(0));
+ if (config.grab(epsilonP)) {
+ epsilon = epsilonP.doubleValue();
}
- IntParameter muP = new IntParameter(MU_ID, new GreaterConstraint(0), 1);
- if(config.grab(muP)) {
- mu = muP.getValue();
+ IntParameter muP = new IntParameter(MU_ID, 1);
+ muP.addConstraint(new GreaterConstraint(0));
+ if (config.grab(muP)) {
+ mu = muP.intValue();
}
configDiSHDistance(config, epsilon, mu);
@@ -703,4 +702,4 @@ public class DiSH<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Cluste
return new DiSH<V>(epsilon, dishDistance, opticsO);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/HiSC.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/HiSC.java
index 40ab60a8..58f3acef 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/HiSC.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/HiSC.java
@@ -34,7 +34,8 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.IntervalConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ChainedParameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
@@ -60,11 +61,11 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
@Title("Finding Hierarchies of Subspace Clusters")
@Description("Algorithm for detecting hierarchies of subspace clusters.")
@Reference(authors = "E. Achtert, C. Böhm, H.-P. Kriegel, P. Kröger, I. Müller-Gorman, A. Zimek", title = "Finding Hierarchies of Subspace Clusters", booktitle = "Proc. 10th Europ. Conf. on Principles and Practice of Knowledge Discovery in Databases (PKDD'06), Berlin, Germany, 2006", url = "http://www.dbs.ifi.lmu.de/Publikationen/Papers/PKDD06-HiSC.pdf")
-public class HiSC<V extends NumberVector<V, ?>> extends OPTICS<V, PreferenceVectorBasedCorrelationDistance> {
+public class HiSC<V extends NumberVector<?>> extends OPTICS<V, PreferenceVectorBasedCorrelationDistance> {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(HiSC.class);
+ private static final Logging LOG = Logging.getLogger(HiSC.class);
/**
* Constructor.
@@ -77,7 +78,7 @@ public class HiSC<V extends NumberVector<V, ?>> extends OPTICS<V, PreferenceVect
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -87,16 +88,18 @@ public class HiSC<V extends NumberVector<V, ?>> extends OPTICS<V, PreferenceVect
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractParameterizer {
+ public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
HiSCDistanceFunction<V> distanceFunction;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- DoubleParameter alphaP = new DoubleParameter(HiSCPreferenceVectorIndex.Factory.ALPHA_ID, new IntervalConstraint(0.0, IntervalConstraint.IntervalBoundary.OPEN, 1.0, IntervalConstraint.IntervalBoundary.OPEN), HiSCPreferenceVectorIndex.Factory.DEFAULT_ALPHA);
+ DoubleParameter alphaP = new DoubleParameter(HiSCPreferenceVectorIndex.Factory.ALPHA_ID, HiSCPreferenceVectorIndex.Factory.DEFAULT_ALPHA);
+ alphaP.addConstraint(new GreaterConstraint(0.0));
+ alphaP.addConstraint(new LessConstraint(1.0));
double alpha = 0.0;
if(config.grab(alphaP)) {
- alpha = alphaP.getValue();
+ alpha = alphaP.doubleValue();
}
// Configure HiSC distance function
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PROCLUS.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PROCLUS.java
index 4eedbecd..ef49ff10 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PROCLUS.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PROCLUS.java
@@ -23,15 +23,17 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.subspace;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+import gnu.trove.iterator.TIntIterator;
+import gnu.trove.set.TIntSet;
+import gnu.trove.set.hash.TIntHashSet;
+
import java.util.ArrayList;
import java.util.BitSet;
import java.util.Collections;
import java.util.HashMap;
-import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Random;
-import java.util.Set;
import de.lmu.ifi.dbs.elki.algorithm.clustering.AbstractProjectedClustering;
import de.lmu.ifi.dbs.elki.data.Cluster;
@@ -47,16 +49,20 @@ import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DistanceDBIDPair;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
-import de.lmu.ifi.dbs.elki.database.query.GenericDistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultUtil;
import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+import de.lmu.ifi.dbs.elki.math.Mean;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
@@ -64,7 +70,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.LongParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
import de.lmu.ifi.dbs.elki.utilities.pairs.CTriple;
import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
@@ -90,11 +96,11 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
@Title("PROCLUS: PROjected CLUStering")
@Description("Algorithm to find subspace clusters in high dimensional spaces.")
@Reference(authors = "C. C. Aggarwal, C. Procopiuc, J. L. Wolf, P. S. Yu, J. S. Park", title = "Fast Algorithms for Projected Clustering", booktitle = "Proc. ACM SIGMOD Int. Conf. on Management of Data (SIGMOD '99)", url = "http://dx.doi.org/10.1145/304181.304188")
-public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClustering<Clustering<SubspaceModel<V>>, V> implements SubspaceClusteringAlgorithm<SubspaceModel<V>> {
+public class PROCLUS<V extends NumberVector<?>> extends AbstractProjectedClustering<Clustering<SubspaceModel<V>>, V> implements SubspaceClusteringAlgorithm<SubspaceModel<V>> {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(PROCLUS.class);
+ private static final Logging LOG = Logging.getLogger(PROCLUS.class);
/**
* Parameter to specify the multiplier for the initial number of medoids, must
@@ -106,12 +112,7 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
* Key: {@code -proclus.mi}
* </p>
*/
- public static final OptionID M_I_ID = OptionID.getOrCreateOptionID("proclus.mi", "The multiplier for the initial number of medoids.");
-
- /**
- * Parameter to specify the random generator seed.
- */
- public static final OptionID SEED_ID = OptionID.getOrCreateOptionID("proclus.seed", "The random number generator seed.");
+ public static final OptionID M_I_ID = new OptionID("proclus.mi", "The multiplier for the initial number of medoids.");
/**
* Holds the value of {@link #M_I_ID}.
@@ -119,9 +120,9 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
private int m_i;
/**
- * Holds the value of {@link #SEED_ID}.
+ * Random generator
*/
- private Long seed;
+ private RandomFactory rnd;
/**
* Java constructor.
@@ -130,12 +131,12 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
* @param k_i k_i Parameter
* @param l l Parameter
* @param m_i m_i Parameter
- * @param seed Random generator seed
+ * @param rnd Random generator
*/
- public PROCLUS(int k, int k_i, int l, int m_i, Long seed) {
+ public PROCLUS(int k, int k_i, int l, int m_i, RandomFactory rnd) {
super(k, k_i, l);
this.m_i = m_i;
- this.seed = seed;
+ this.rnd = rnd;
}
/**
@@ -147,19 +148,16 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
public Clustering<SubspaceModel<V>> run(Database database, Relation<V> relation) {
DistanceQuery<V, DoubleDistance> distFunc = this.getDistanceQuery(database);
RangeQuery<V, DoubleDistance> rangeQuery = database.getRangeQuery(distFunc);
- final Random random = new Random();
- if(seed != null) {
- random.setSeed(seed);
- }
+ final Random random = rnd.getRandom();
- if(DatabaseUtil.dimensionality(relation) < l) {
- throw new IllegalStateException("Dimensionality of data < parameter l! " + "(" + DatabaseUtil.dimensionality(relation) + " < " + l + ")");
+ if(RelationUtil.dimensionality(relation) < l) {
+ throw new IllegalStateException("Dimensionality of data < parameter l! " + "(" + RelationUtil.dimensionality(relation) + " < " + l + ")");
}
// TODO: use a StepProgress!
// initialization phase
- if(logger.isVerbose()) {
- logger.verbose("1. Initialization phase...");
+ if(LOG.isVerbose()) {
+ LOG.verbose("1. Initialization phase...");
}
int sampleSize = Math.min(relation.size(), k_i * k);
DBIDs sampleSet = DBIDUtil.randomSample(relation.getDBIDs(), sampleSize, random.nextLong());
@@ -167,39 +165,39 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
int medoidSize = Math.min(relation.size(), m_i * k);
DBIDs medoids = greedy(distFunc, sampleSet, medoidSize, random);
- if(logger.isDebugging()) {
- StringBuffer msg = new StringBuffer();
- msg.append("\n");
- msg.append("sampleSize ").append(sampleSize).append("\n");
- msg.append("sampleSet ").append(sampleSet).append("\n");
- msg.append("medoidSize ").append(medoidSize).append("\n");
- msg.append("m ").append(medoids).append("\n");
- logger.debugFine(msg.toString());
+ if(LOG.isDebugging()) {
+ StringBuilder msg = new StringBuilder();
+ msg.append('\n');
+ msg.append("sampleSize ").append(sampleSize).append('\n');
+ msg.append("sampleSet ").append(sampleSet).append('\n');
+ msg.append("medoidSize ").append(medoidSize).append('\n');
+ msg.append("m ").append(medoids).append('\n');
+ LOG.debugFine(msg.toString());
}
// iterative phase
- if(logger.isVerbose()) {
- logger.verbose("2. Iterative phase...");
+ if(LOG.isVerbose()) {
+ LOG.verbose("2. Iterative phase...");
}
double bestObjective = Double.POSITIVE_INFINITY;
ModifiableDBIDs m_best = null;
ModifiableDBIDs m_bad = null;
ModifiableDBIDs m_current = initialSet(medoids, k, random);
- if(logger.isDebugging()) {
- StringBuffer msg = new StringBuffer();
- msg.append("\n");
- msg.append("m_c ").append(m_current).append("\n");
- logger.debugFine(msg.toString());
+ if(LOG.isDebugging()) {
+ StringBuilder msg = new StringBuilder();
+ msg.append('\n');
+ msg.append("m_c ").append(m_current).append('\n');
+ LOG.debugFine(msg.toString());
}
- IndefiniteProgress cprogress = logger.isVerbose() ? new IndefiniteProgress("Current number of clusters:", logger) : null;
+ IndefiniteProgress cprogress = LOG.isVerbose() ? new IndefiniteProgress("Current number of clusters:", LOG) : null;
// TODO: Use DataStore and Trove for performance
Map<DBID, PROCLUSCluster> clusters = null;
int loops = 0;
while(loops < 10) {
- Map<DBID, Set<Integer>> dimensions = findDimensions(m_current, relation, distFunc, rangeQuery);
+ Map<DBID, TIntSet> dimensions = findDimensions(m_current, relation, distFunc, rangeQuery);
clusters = assignPoints(dimensions, relation);
double objectiveFunction = evaluateClusters(clusters, dimensions, relation);
@@ -214,20 +212,20 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
m_current = computeM_current(medoids, m_best, m_bad, random);
loops++;
if(cprogress != null) {
- cprogress.setProcessed(clusters.size(), logger);
+ cprogress.setProcessed(clusters.size(), LOG);
}
}
if(cprogress != null) {
- cprogress.setCompleted(logger);
+ cprogress.setCompleted(LOG);
}
// refinement phase
- if(logger.isVerbose()) {
- logger.verbose("3. Refinement phase...");
+ if(LOG.isVerbose()) {
+ LOG.verbose("3. Refinement phase...");
}
- List<Pair<V, Set<Integer>>> dimensions = findDimensions(new ArrayList<PROCLUSCluster>(clusters.values()), relation);
+ List<Pair<V, TIntSet>> dimensions = findDimensions(new ArrayList<PROCLUSCluster>(clusters.values()), relation);
List<PROCLUSCluster> finalClusters = finalAssignment(dimensions, relation);
// build result
@@ -235,7 +233,7 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
Clustering<SubspaceModel<V>> result = new Clustering<SubspaceModel<V>>("ProClus clustering", "proclus-clustering");
for(PROCLUSCluster c : finalClusters) {
Cluster<SubspaceModel<V>> cluster = new Cluster<SubspaceModel<V>>(c.objectIDs);
- cluster.setModel(new SubspaceModel<V>(new Subspace<V>(c.getDimensions()), c.centroid));
+ cluster.setModel(new SubspaceModel<V>(new Subspace(c.getDimensions()), c.centroid));
cluster.setName("cluster_" + numClusters++);
result.addCluster(cluster);
@@ -259,40 +257,41 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
// m_1 is random point of S
DBID m_i = s.remove(random.nextInt(s.size()));
medoids.add(m_i);
- if(logger.isDebugging()) {
- logger.debugFiner("medoids " + medoids);
+ if(LOG.isDebugging()) {
+ LOG.debugFiner("medoids " + medoids);
}
// compute distances between each point in S and m_i
- Map<DBID, DistanceResultPair<DoubleDistance>> distances = new HashMap<DBID, DistanceResultPair<DoubleDistance>>();
+ // FIXME: don't use maps, so we can work with DBIDRef
+ Map<DBID, DistanceDBIDPair<DoubleDistance>> distances = new HashMap<DBID, DistanceDBIDPair<DoubleDistance>>();
for(DBIDIter iter = s.iter(); iter.valid(); iter.advance()) {
- DBID id = iter.getDBID();
+ DBID id = DBIDUtil.deref(iter);
DoubleDistance dist = distFunc.distance(id, m_i);
- distances.put(id, new GenericDistanceResultPair<DoubleDistance>(dist, id));
+ distances.put(id, DBIDUtil.newDistancePair(dist, id));
}
for(int i = 1; i < m; i++) {
// choose medoid m_i to be far from prevois medoids
- List<DistanceResultPair<DoubleDistance>> d = new ArrayList<DistanceResultPair<DoubleDistance>>(distances.values());
- Collections.sort(d);
+ List<DistanceDBIDPair<DoubleDistance>> d = new ArrayList<DistanceDBIDPair<DoubleDistance>>(distances.values());
+ DistanceDBIDResultUtil.sortByDistance(d);
- m_i = d.get(d.size() - 1).getDBID();
+ m_i = DBIDUtil.deref(d.get(d.size() - 1));
medoids.add(m_i);
s.remove(m_i);
distances.remove(m_i);
// compute distances of each point to closest medoid
for(DBIDIter iter = s.iter(); iter.valid(); iter.advance()) {
- DBID id = iter.getDBID();
+ DBID id = DBIDUtil.deref(iter);
DoubleDistance dist_new = distFunc.distance(id, m_i);
DoubleDistance dist_old = distances.get(id).getDistance();
DoubleDistance dist = dist_new.compareTo(dist_old) < 0 ? dist_new : dist_old;
- distances.put(id, new GenericDistanceResultPair<DoubleDistance>(dist, id));
+ distances.put(id, DBIDUtil.newDistancePair(dist, id));
}
- if(logger.isDebugging()) {
- logger.debugFiner("medoids " + medoids);
+ if(LOG.isDebugging()) {
+ LOG.debugFiner("medoids " + medoids);
}
}
@@ -332,7 +331,7 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
ModifiableDBIDs m_current = DBIDUtil.newHashSet();
for(DBIDIter iter = m_best.iter(); iter.valid(); iter.advance()) {
- DBID m_i = iter.getDBID();
+ DBID m_i = DBIDUtil.deref(iter);
if(m_bad.contains(m_i)) {
int currentSize = m_current.size();
while(m_current.size() == currentSize) {
@@ -359,17 +358,17 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
* @param distFunc the distance function
* @return a mapping of the medoid's id to its locality
*/
- private Map<DBID, List<DistanceResultPair<DoubleDistance>>> getLocalities(DBIDs medoids, Relation<V> database, DistanceQuery<V, DoubleDistance> distFunc, RangeQuery<V, DoubleDistance> rangeQuery) {
- Map<DBID, List<DistanceResultPair<DoubleDistance>>> result = new HashMap<DBID, List<DistanceResultPair<DoubleDistance>>>();
+ private Map<DBID, DistanceDBIDResult<DoubleDistance>> getLocalities(DBIDs medoids, Relation<V> database, DistanceQuery<V, DoubleDistance> distFunc, RangeQuery<V, DoubleDistance> rangeQuery) {
+ Map<DBID, DistanceDBIDResult<DoubleDistance>> result = new HashMap<DBID, DistanceDBIDResult<DoubleDistance>>();
for(DBIDIter iter = medoids.iter(); iter.valid(); iter.advance()) {
- DBID m = iter.getDBID();
+ DBID m = DBIDUtil.deref(iter);
// determine minimum distance between current medoid m and any other
// medoid m_i
DoubleDistance minDist = null;
for(DBIDIter iter2 = medoids.iter(); iter2.valid(); iter2.advance()) {
- DBID m_i = iter2.getDBID();
- if(m_i == m) {
+ DBID m_i = DBIDUtil.deref(iter2);
+ if(DBIDUtil.equal(m_i, m)) {
continue;
}
DoubleDistance currentDist = distFunc.distance(m, m_i);
@@ -380,7 +379,7 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
// determine points in sphere centered at m with radius minDist
assert minDist != null;
- List<DistanceResultPair<DoubleDistance>> qr = rangeQuery.getRangeForDBID(m, minDist);
+ DistanceDBIDResult<DoubleDistance> qr = rangeQuery.getRangeForDBID(m, minDist);
result.put(m, qr);
}
@@ -397,23 +396,23 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
* @return the set of correlated dimensions for each medoid in the specified
* medoid set
*/
- private Map<DBID, Set<Integer>> findDimensions(DBIDs medoids, Relation<V> database, DistanceQuery<V, DoubleDistance> distFunc, RangeQuery<V, DoubleDistance> rangeQuery) {
+ private Map<DBID, TIntSet> findDimensions(DBIDs medoids, Relation<V> database, DistanceQuery<V, DoubleDistance> distFunc, RangeQuery<V, DoubleDistance> rangeQuery) {
// get localities
- Map<DBID, List<DistanceResultPair<DoubleDistance>>> localities = getLocalities(medoids, database, distFunc, rangeQuery);
+ Map<DBID, DistanceDBIDResult<DoubleDistance>> localities = getLocalities(medoids, database, distFunc, rangeQuery);
// compute x_ij = avg distance from points in l_i to medoid m_i
- int dim = DatabaseUtil.dimensionality(database);
+ int dim = RelationUtil.dimensionality(database);
Map<DBID, double[]> averageDistances = new HashMap<DBID, double[]>();
for(DBIDIter iter = medoids.iter(); iter.valid(); iter.advance()) {
- DBID m_i = iter.getDBID();
+ DBID m_i = DBIDUtil.deref(iter);
V medoid_i = database.get(m_i);
- List<DistanceResultPair<DoubleDistance>> l_i = localities.get(m_i);
+ DistanceDBIDResult<DoubleDistance> l_i = localities.get(m_i);
double[] x_i = new double[dim];
- for(DistanceResultPair<DoubleDistance> qr : l_i) {
- V o = database.get(qr.getDBID());
+ for(DBIDIter qr = l_i.iter(); qr.valid(); qr.advance()) {
+ V o = database.get(qr);
for(int d = 0; d < dim; d++) {
- x_i[d] += Math.abs(medoid_i.doubleValue(d + 1) - o.doubleValue(d + 1));
+ x_i[d] += Math.abs(medoid_i.doubleValue(d) - o.doubleValue(d));
}
}
for(int d = 0; d < dim; d++) {
@@ -422,11 +421,11 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
averageDistances.put(m_i, x_i);
}
- Map<DBID, Set<Integer>> dimensionMap = new HashMap<DBID, Set<Integer>>();
+ Map<DBID, TIntSet> dimensionMap = new HashMap<DBID, TIntSet>();
List<CTriple<Double, DBID, Integer>> z_ijs = new ArrayList<CTriple<Double, DBID, Integer>>();
for(DBIDIter iter = medoids.iter(); iter.valid(); iter.advance()) {
- DBID m_i = iter.getDBID();
- Set<Integer> dims_i = new HashSet<Integer>();
+ DBID m_i = DBIDUtil.deref(iter);
+ TIntSet dims_i = new TIntHashSet();
dimensionMap.put(m_i, dims_i);
double[] x_i = averageDistances.get(m_i);
@@ -447,7 +446,7 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
sigma_i = Math.sqrt(sigma_i);
for(int j = 0; j < dim; j++) {
- z_ijs.add(new CTriple<Double, DBID, Integer>((x_i[j] - y_i) / sigma_i, m_i, j + 1));
+ z_ijs.add(new CTriple<Double, DBID, Integer>((x_i[j] - y_i) / sigma_i, m_i, j));
}
}
Collections.sort(z_ijs);
@@ -455,15 +454,15 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
int max = Math.max(k * l, 2);
for(int m = 0; m < max; m++) {
CTriple<Double, DBID, Integer> z_ij = z_ijs.get(m);
- Set<Integer> dims_i = dimensionMap.get(z_ij.getSecond());
+ TIntSet dims_i = dimensionMap.get(z_ij.getSecond());
dims_i.add(z_ij.getThird());
- if(logger.isDebugging()) {
- StringBuffer msg = new StringBuffer();
- msg.append("\n");
- msg.append("z_ij ").append(z_ij).append("\n");
- msg.append("D_i ").append(dims_i).append("\n");
- logger.debugFiner(msg.toString());
+ if(LOG.isDebugging()) {
+ StringBuilder msg = new StringBuilder();
+ msg.append('\n');
+ msg.append("z_ij ").append(z_ij).append('\n');
+ msg.append("D_i ").append(dims_i).append('\n');
+ LOG.debugFiner(msg.toString());
}
}
return dimensionMap;
@@ -478,9 +477,9 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
* @return the set of correlated dimensions for each specified cluster
* centroid
*/
- private List<Pair<V, Set<Integer>>> findDimensions(List<PROCLUSCluster> clusters, Relation<V> database) {
+ private List<Pair<V, TIntSet>> findDimensions(List<PROCLUSCluster> clusters, Relation<V> database) {
// compute x_ij = avg distance from points in c_i to c_i.centroid
- int dim = DatabaseUtil.dimensionality(database);
+ int dim = RelationUtil.dimensionality(database);
Map<Integer, double[]> averageDistances = new HashMap<Integer, double[]>();
for(int i = 0; i < clusters.size(); i++) {
@@ -489,7 +488,7 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
for(DBIDIter iter = c_i.objectIDs.iter(); iter.valid(); iter.advance()) {
V o = database.get(iter);
for(int d = 0; d < dim; d++) {
- x_i[d] += Math.abs(c_i.centroid.doubleValue(d + 1) - o.doubleValue(d + 1));
+ x_i[d] += Math.abs(c_i.centroid.doubleValue(d) - o.doubleValue(d));
}
}
for(int d = 0; d < dim; d++) {
@@ -518,38 +517,38 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
sigma_i = Math.sqrt(sigma_i);
for(int j = 0; j < dim; j++) {
- z_ijs.add(new CTriple<Double, Integer, Integer>((x_i[j] - y_i) / sigma_i, i, j + 1));
+ z_ijs.add(new CTriple<Double, Integer, Integer>((x_i[j] - y_i) / sigma_i, i, j));
}
}
Collections.sort(z_ijs);
// mapping cluster index -> dimensions
- Map<Integer, Set<Integer>> dimensionMap = new HashMap<Integer, Set<Integer>>();
+ Map<Integer, TIntSet> dimensionMap = new HashMap<Integer, TIntSet>();
int max = Math.max(k * l, 2);
for(int m = 0; m < max; m++) {
CTriple<Double, Integer, Integer> z_ij = z_ijs.get(m);
- Set<Integer> dims_i = dimensionMap.get(z_ij.getSecond());
+ TIntSet dims_i = dimensionMap.get(z_ij.getSecond());
if(dims_i == null) {
- dims_i = new HashSet<Integer>();
+ dims_i = new TIntHashSet();
dimensionMap.put(z_ij.getSecond(), dims_i);
}
dims_i.add(z_ij.getThird());
- if(logger.isDebugging()) {
- StringBuffer msg = new StringBuffer();
- msg.append("\n");
- msg.append("z_ij ").append(z_ij).append("\n");
- msg.append("D_i ").append(dims_i).append("\n");
- logger.debugFiner(msg.toString());
+ if(LOG.isDebugging()) {
+ StringBuilder msg = new StringBuilder();
+ msg.append('\n');
+ msg.append("z_ij ").append(z_ij).append('\n');
+ msg.append("D_i ").append(dims_i).append('\n');
+ LOG.debugFiner(msg.toString());
}
}
// mapping cluster -> dimensions
- List<Pair<V, Set<Integer>>> result = new ArrayList<Pair<V, Set<Integer>>>();
+ List<Pair<V, TIntSet>> result = new ArrayList<Pair<V, TIntSet>>();
for(int i : dimensionMap.keySet()) {
- Set<Integer> dims_i = dimensionMap.get(i);
+ TIntSet dims_i = dimensionMap.get(i);
PROCLUSCluster c_i = clusters.get(i);
- result.add(new Pair<V, Set<Integer>>(c_i.centroid, dims_i));
+ result.add(new Pair<V, TIntSet>(c_i.centroid, dims_i));
}
return result;
}
@@ -562,26 +561,26 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
* @param database the database containing the objects
* @return the assignments of the object to the clusters
*/
- private Map<DBID, PROCLUSCluster> assignPoints(Map<DBID, Set<Integer>> dimensions, Relation<V> database) {
+ private Map<DBID, PROCLUSCluster> assignPoints(Map<DBID, TIntSet> dimensions, Relation<V> database) {
Map<DBID, ModifiableDBIDs> clusterIDs = new HashMap<DBID, ModifiableDBIDs>();
for(DBID m_i : dimensions.keySet()) {
clusterIDs.put(m_i, DBIDUtil.newHashSet());
}
for(DBIDIter it = database.iterDBIDs(); it.valid(); it.advance()) {
- DBID p_id = it.getDBID();
+ DBID p_id = DBIDUtil.deref(it);
V p = database.get(p_id);
- DistanceResultPair<DoubleDistance> minDist = null;
+ DistanceDBIDPair<DoubleDistance> minDist = null;
for(DBID m_i : dimensions.keySet()) {
V m = database.get(m_i);
- DistanceResultPair<DoubleDistance> currentDist = new GenericDistanceResultPair<DoubleDistance>(manhattanSegmentalDistance(p, m, dimensions.get(m_i)), m_i);
- if(minDist == null || currentDist.compareTo(minDist) < 0) {
+ DistanceDBIDPair<DoubleDistance> currentDist = DBIDUtil.newDistancePair(manhattanSegmentalDistance(p, m, dimensions.get(m_i)), m_i);
+ if(minDist == null || currentDist.compareByDistance(minDist) < 0) {
minDist = currentDist;
}
}
// add p to cluster with mindist
assert minDist != null;
- ModifiableDBIDs ids = clusterIDs.get(minDist.getDBID());
+ ModifiableDBIDs ids = clusterIDs.get(DBIDUtil.deref(minDist));
ids.add(p_id);
}
@@ -589,17 +588,17 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
for(DBID m_i : dimensions.keySet()) {
ModifiableDBIDs objectIDs = clusterIDs.get(m_i);
if(!objectIDs.isEmpty()) {
- Set<Integer> clusterDimensions = dimensions.get(m_i);
- V centroid = DatabaseUtil.centroid(database, objectIDs);
+ TIntSet clusterDimensions = dimensions.get(m_i);
+ V centroid = Centroid.make(database, objectIDs).toVector(database);
clusters.put(m_i, new PROCLUSCluster(objectIDs, clusterDimensions, centroid));
}
}
- if(logger.isDebugging()) {
- StringBuffer msg = new StringBuffer();
- msg.append("\n");
- msg.append("clusters ").append(clusters).append("\n");
- logger.debugFine(msg.toString());
+ if(LOG.isDebugging()) {
+ StringBuilder msg = new StringBuilder();
+ msg.append('\n');
+ msg.append("clusters ").append(clusters).append('\n');
+ LOG.debugFine(msg.toString());
}
return clusters;
}
@@ -612,20 +611,20 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
* @param database the database containing the objects
* @return the assignments of the object to the clusters
*/
- private List<PROCLUSCluster> finalAssignment(List<Pair<V, Set<Integer>>> dimensions, Relation<V> database) {
+ private List<PROCLUSCluster> finalAssignment(List<Pair<V, TIntSet>> dimensions, Relation<V> database) {
Map<Integer, ModifiableDBIDs> clusterIDs = new HashMap<Integer, ModifiableDBIDs>();
for(int i = 0; i < dimensions.size(); i++) {
clusterIDs.put(i, DBIDUtil.newHashSet());
}
for(DBIDIter it = database.iterDBIDs(); it.valid(); it.advance()) {
- DBID p_id = it.getDBID();
+ DBID p_id = DBIDUtil.deref(it);
V p = database.get(p_id);
Pair<DoubleDistance, Integer> minDist = null;
for(int i = 0; i < dimensions.size(); i++) {
- Pair<V, Set<Integer>> pair_i = dimensions.get(i);
+ Pair<V, TIntSet> pair_i = dimensions.get(i);
V c_i = pair_i.first;
- Set<Integer> dimensions_i = pair_i.second;
+ TIntSet dimensions_i = pair_i.second;
DoubleDistance currentDist = manhattanSegmentalDistance(p, c_i, dimensions_i);
if(minDist == null || currentDist.compareTo(minDist.first) < 0) {
minDist = new Pair<DoubleDistance, Integer>(currentDist, i);
@@ -641,17 +640,17 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
for(int i = 0; i < dimensions.size(); i++) {
ModifiableDBIDs objectIDs = clusterIDs.get(i);
if(!objectIDs.isEmpty()) {
- Set<Integer> clusterDimensions = dimensions.get(i).second;
- V centroid = DatabaseUtil.centroid(database, objectIDs);
+ TIntSet clusterDimensions = dimensions.get(i).second;
+ V centroid = Centroid.make(database, objectIDs).toVector(database);
clusters.add(new PROCLUSCluster(objectIDs, clusterDimensions, centroid));
}
}
- if(logger.isDebugging()) {
- StringBuffer msg = new StringBuffer();
- msg.append("\n");
- msg.append("clusters ").append(clusters).append("\n");
- logger.debugFine(msg.toString());
+ if(LOG.isDebugging()) {
+ StringBuilder msg = new StringBuilder();
+ msg.append('\n');
+ msg.append("clusters ").append(clusters).append('\n');
+ LOG.debugFine(msg.toString());
}
return clusters;
}
@@ -666,9 +665,10 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
* @return the Manhattan segmental distance between o1 and o2 relative to the
* specified dimensions
*/
- private DoubleDistance manhattanSegmentalDistance(V o1, V o2, Set<Integer> dimensions) {
+ private DoubleDistance manhattanSegmentalDistance(V o1, V o2, TIntSet dimensions) {
double result = 0;
- for(Integer d : dimensions) {
+ for (TIntIterator iter = dimensions.iterator(); iter.hasNext(); ) {
+ final int d = iter.next();
result += Math.abs(o1.doubleValue(d) - o2.doubleValue(d));
}
result /= dimensions.size();
@@ -683,15 +683,16 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
* @param database the database holding the objects
* @return a measure for the cluster quality
*/
- private double evaluateClusters(Map<DBID, PROCLUSCluster> clusters, Map<DBID, Set<Integer>> dimensions, Relation<V> database) {
+ private double evaluateClusters(Map<DBID, PROCLUSCluster> clusters, Map<DBID, TIntSet> dimensions, Relation<V> database) {
double result = 0;
for(DBID m_i : clusters.keySet()) {
PROCLUSCluster c_i = clusters.get(m_i);
V centroid_i = c_i.centroid;
- Set<Integer> dims_i = dimensions.get(m_i);
+ TIntSet dims_i = dimensions.get(m_i);
double w_i = 0;
- for(Integer j : dims_i) {
+ for (TIntIterator iter = dims_i.iterator(); iter.hasNext(); ) {
+ final int j = iter.next();
w_i += avgDistance(centroid_i, c_i.objectIDs, database, j);
}
@@ -714,12 +715,12 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
* specified dimension
*/
private double avgDistance(V centroid, DBIDs objectIDs, Relation<V> database, int dimension) {
- double avg = 0;
+ Mean avg = new Mean();
for(DBIDIter iter = objectIDs.iter(); iter.valid(); iter.advance()) {
V o = database.get(iter);
- avg += Math.abs(centroid.doubleValue(dimension) - o.doubleValue(dimension));
+ avg.put(Math.abs(centroid.doubleValue(dimension) - o.doubleValue(dimension)));
}
- return avg / objectIDs.size();
+ return avg.getMean();
}
/**
@@ -748,7 +749,7 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -765,7 +766,7 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
/**
* The correlated dimensions of this cluster.
*/
- Set<Integer> dimensions;
+ TIntSet dimensions;
/**
* The centroids of this cluster along each dimension.
@@ -779,7 +780,7 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
* @param dimensions the correlated dimensions of this cluster
* @param centroid the centroid of this cluster
*/
- public PROCLUSCluster(ModifiableDBIDs objectIDs, Set<Integer> dimensions, V centroid) {
+ public PROCLUSCluster(ModifiableDBIDs objectIDs, TIntSet dimensions, V centroid) {
this.objectIDs = objectIDs;
this.dimensions = dimensions;
this.centroid = centroid;
@@ -787,19 +788,19 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
@Override
public String toString() {
- StringBuffer result = new StringBuffer();
+ StringBuilder result = new StringBuilder();
result.append("Dimensions: [");
boolean notFirst = false;
- for(Integer d : dimensions) {
+ for(TIntIterator iter = dimensions.iterator(); iter.hasNext(); ) {
if(notFirst) {
- result.append(",");
+ result.append(',');
}
else {
notFirst = true;
}
- result.append(d);
+ result.append(iter.next());
}
- result.append("]");
+ result.append(']');
result.append("\nCentroid: ").append(centroid);
return result.toString();
@@ -812,8 +813,8 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
*/
public BitSet getDimensions() {
BitSet result = new BitSet();
- for(int d : dimensions) {
- result.set(d - 1);
+ for(TIntIterator iter = dimensions.iterator(); iter.hasNext(); ) {
+ result.set(iter.next());
}
return result;
}
@@ -826,10 +827,15 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractProjectedClustering.Parameterizer {
+ public static class Parameterizer<V extends NumberVector<?>> extends AbstractProjectedClustering.Parameterizer {
+ /**
+ * Parameter to specify the random generator seed.
+ */
+ public static final OptionID SEED_ID = new OptionID("proclus.seed", "The random number generator seed.");
+
protected int m_i = -1;
- protected Long seed = null;
+ protected RandomFactory rnd;
@Override
protected void makeOptions(Parameterization config) {
@@ -845,15 +851,15 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
m_i = m_iP.getValue();
}
- LongParameter seedP = new LongParameter(SEED_ID, true);
- if(config.grab(seedP)) {
- seed = seedP.getValue();
+ RandomParameter rndP = new RandomParameter(SEED_ID);
+ if(config.grab(rndP)) {
+ rnd = rndP.getValue();
}
}
@Override
protected PROCLUS<V> makeInstance() {
- return new PROCLUS<V>(k, k_i, l, m_i, seed);
+ return new PROCLUS<V>(k, k_i, l, m_i, rnd);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PreDeCon.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PreDeCon.java
index 4ca5a564..fc3228eb 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PreDeCon.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PreDeCon.java
@@ -58,11 +58,11 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameteriz
@Title("PreDeCon: Subspace Preference weighted Density Connected Clustering")
@Description("PreDeCon computes clusters of subspace preference weighted connected points. " + "The algorithm searches for local subgroups of a set of feature vectors having " + "a low variance along one or more (but not all) attributes.")
@Reference(authors = "C. Böhm, K. Kailing, H.-P. Kriegel, P. Kröger", title = "Density Connected Clustering with Local Subspace Preferences", booktitle = "Proc. 4th IEEE Int. Conf. on Data Mining (ICDM'04), Brighton, UK, 2004", url = "http://dx.doi.org/10.1109/ICDM.2004.10087")
-public class PreDeCon<V extends NumberVector<V, ?>> extends AbstractProjectedDBSCAN<Clustering<Model>, V> {
+public class PreDeCon<V extends NumberVector<?>> extends AbstractProjectedDBSCAN<Clustering<Model>, V> {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(PreDeCon.class);
+ private static final Logging LOG = Logging.getLogger(PreDeCon.class);
/**
* Constructor.
@@ -88,7 +88,7 @@ public class PreDeCon<V extends NumberVector<V, ?>> extends AbstractProjectedDBS
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -98,7 +98,7 @@ public class PreDeCon<V extends NumberVector<V, ?>> extends AbstractProjectedDBS
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractProjectedDBSCAN.Parameterizer<V, DoubleDistance> {
+ public static class Parameterizer<V extends NumberVector<?>> extends AbstractProjectedDBSCAN.Parameterizer<V, DoubleDistance> {
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SUBCLU.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SUBCLU.java
index c47c74b6..46c5f0b8 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SUBCLU.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SUBCLU.java
@@ -42,12 +42,13 @@ import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.ProxyDatabase;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.AbstractDimensionsSelectingDoubleDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.StepProgress;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
@@ -84,11 +85,11 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
@Title("SUBCLU: Density connected Subspace Clustering")
@Description("Algorithm to detect arbitrarily shaped and positioned clusters in subspaces. SUBCLU delivers for each subspace the same clusters DBSCAN would have found, when applied to this subspace seperately.")
@Reference(authors = "K. Kailing, H.-P. Kriegel, P. Kröger", title = "Density connected Subspace Clustering for High Dimensional Data. ", booktitle = "Proc. SIAM Int. Conf. on Data Mining (SDM'04), Lake Buena Vista, FL, 2004")
-public class SUBCLU<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clustering<SubspaceModel<V>>> implements SubspaceClusteringAlgorithm<SubspaceModel<V>> {
+public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Clustering<SubspaceModel<V>>> implements SubspaceClusteringAlgorithm<SubspaceModel<V>> {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(SUBCLU.class);
+ private static final Logging LOG = Logging.getLogger(SUBCLU.class);
/**
* The distance function to determine the distance between database objects.
@@ -99,7 +100,7 @@ public class SUBCLU<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
* Key: {@code -subclu.distancefunction}
* </p>
*/
- public static final OptionID DISTANCE_FUNCTION_ID = OptionID.getOrCreateOptionID("subclu.distancefunction", "Distance function to determine the distance between database objects.");
+ public static final OptionID DISTANCE_FUNCTION_ID = new OptionID("subclu.distancefunction", "Distance function to determine the distance between database objects.");
/**
* Parameter to specify the maximum radius of the neighborhood to be
@@ -109,7 +110,7 @@ public class SUBCLU<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
* Key: {@code -subclu.epsilon}
* </p>
*/
- public static final OptionID EPSILON_ID = OptionID.getOrCreateOptionID("subclu.epsilon", "The maximum radius of the neighborhood to be considered.");
+ public static final OptionID EPSILON_ID = new OptionID("subclu.epsilon", "The maximum radius of the neighborhood to be considered.");
/**
* Parameter to specify the threshold for minimum number of points in the
@@ -118,7 +119,7 @@ public class SUBCLU<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
* Key: {@code -subclu.minpts}
* </p>
*/
- public static final OptionID MINPTS_ID = OptionID.getOrCreateOptionID("subclu.minpts", "Threshold for minimum number of points in the epsilon-neighborhood of a point.");
+ public static final OptionID MINPTS_ID = new OptionID("subclu.minpts", "Threshold for minimum number of points in the epsilon-neighborhood of a point.");
/**
* Holds the instance of the distance function specified by
@@ -162,36 +163,36 @@ public class SUBCLU<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
* @return Clustering result
*/
public Clustering<SubspaceModel<V>> run(Relation<V> relation) {
- final int dimensionality = DatabaseUtil.dimensionality(relation);
+ final int dimensionality = RelationUtil.dimensionality(relation);
- StepProgress stepprog = logger.isVerbose() ? new StepProgress(dimensionality) : null;
+ StepProgress stepprog = LOG.isVerbose() ? new StepProgress(dimensionality) : null;
// Generate all 1-dimensional clusters
if(stepprog != null) {
- stepprog.beginStep(1, "Generate all 1-dimensional clusters.", logger);
+ stepprog.beginStep(1, "Generate all 1-dimensional clusters.", LOG);
}
// mapping of dimensionality to set of subspaces
- HashMap<Integer, List<Subspace<V>>> subspaceMap = new HashMap<Integer, List<Subspace<V>>>();
+ HashMap<Integer, List<Subspace>> subspaceMap = new HashMap<Integer, List<Subspace>>();
// list of 1-dimensional subspaces containing clusters
- List<Subspace<V>> s_1 = new ArrayList<Subspace<V>>();
+ List<Subspace> s_1 = new ArrayList<Subspace>();
subspaceMap.put(0, s_1);
// mapping of subspaces to list of clusters
- TreeMap<Subspace<V>, List<Cluster<Model>>> clusterMap = new TreeMap<Subspace<V>, List<Cluster<Model>>>(new Subspace.DimensionComparator());
+ TreeMap<Subspace, List<Cluster<Model>>> clusterMap = new TreeMap<Subspace, List<Cluster<Model>>>(new Subspace.DimensionComparator());
for(int d = 0; d < dimensionality; d++) {
- Subspace<V> currentSubspace = new Subspace<V>(d);
+ Subspace currentSubspace = new Subspace(d);
List<Cluster<Model>> clusters = runDBSCAN(relation, null, currentSubspace);
- if(logger.isDebuggingFiner()) {
- StringBuffer msg = new StringBuffer();
- msg.append("\n").append(clusters.size()).append(" clusters in subspace ").append(currentSubspace.dimensonsToString()).append(": \n");
+ if(LOG.isDebuggingFiner()) {
+ StringBuilder msg = new StringBuilder();
+ msg.append('\n').append(clusters.size()).append(" clusters in subspace ").append(currentSubspace.dimensonsToString()).append(": \n");
for(Cluster<Model> cluster : clusters) {
msg.append(" " + cluster.getIDs() + "\n");
}
- logger.debugFiner(msg.toString());
+ LOG.debugFiner(msg.toString());
}
if(!clusters.isEmpty()) {
@@ -203,26 +204,26 @@ public class SUBCLU<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
// Generate (d+1)-dimensional clusters from d-dimensional clusters
for(int d = 0; d < dimensionality - 1; d++) {
if(stepprog != null) {
- stepprog.beginStep(d + 2, "Generate " + (d + 2) + "-dimensional clusters from " + (d + 1) + "-dimensional clusters.", logger);
+ stepprog.beginStep(d + 2, "Generate " + (d + 2) + "-dimensional clusters from " + (d + 1) + "-dimensional clusters.", LOG);
}
- List<Subspace<V>> subspaces = subspaceMap.get(d);
+ List<Subspace> subspaces = subspaceMap.get(d);
if(subspaces == null || subspaces.isEmpty()) {
if(stepprog != null) {
for(int dim = d + 1; dim < dimensionality - 1; dim++) {
- stepprog.beginStep(dim + 2, "Generation of" + (dim + 2) + "-dimensional clusters not applicable, because no more " + (d + 2) + "-dimensional subspaces found.", logger);
+ stepprog.beginStep(dim + 2, "Generation of" + (dim + 2) + "-dimensional clusters not applicable, because no more " + (d + 2) + "-dimensional subspaces found.", LOG);
}
}
break;
}
- List<Subspace<V>> candidates = generateSubspaceCandidates(subspaces);
- List<Subspace<V>> s_d = new ArrayList<Subspace<V>>();
+ List<Subspace> candidates = generateSubspaceCandidates(subspaces);
+ List<Subspace> s_d = new ArrayList<Subspace>();
- for(Subspace<V> candidate : candidates) {
- Subspace<V> bestSubspace = bestSubspace(subspaces, candidate, clusterMap);
- if(logger.isDebuggingFine()) {
- logger.debugFine("best subspace of " + candidate.dimensonsToString() + ": " + bestSubspace.dimensonsToString());
+ for(Subspace candidate : candidates) {
+ Subspace bestSubspace = bestSubspace(subspaces, candidate, clusterMap);
+ if(LOG.isDebuggingFine()) {
+ LOG.debugFine("best subspace of " + candidate.dimensonsToString() + ": " + bestSubspace.dimensonsToString());
}
List<Cluster<Model>> bestSubspaceClusters = clusterMap.get(bestSubspace);
@@ -234,13 +235,13 @@ public class SUBCLU<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
}
}
- if(logger.isDebuggingFine()) {
- StringBuffer msg = new StringBuffer();
+ if(LOG.isDebuggingFine()) {
+ StringBuilder msg = new StringBuilder();
msg.append(clusters.size() + " cluster(s) in subspace " + candidate + ": \n");
for(Cluster<Model> c : clusters) {
msg.append(" " + c.getIDs() + "\n");
}
- logger.debugFine(msg.toString());
+ LOG.debugFine(msg.toString());
}
if(!clusters.isEmpty()) {
@@ -257,18 +258,18 @@ public class SUBCLU<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
// build result
int numClusters = 1;
result = new Clustering<SubspaceModel<V>>("SUBCLU clustering", "subclu-clustering");
- for(Subspace<V> subspace : clusterMap.descendingKeySet()) {
+ for(Subspace subspace : clusterMap.descendingKeySet()) {
List<Cluster<Model>> clusters = clusterMap.get(subspace);
for(Cluster<Model> cluster : clusters) {
Cluster<SubspaceModel<V>> newCluster = new Cluster<SubspaceModel<V>>(cluster.getIDs());
- newCluster.setModel(new SubspaceModel<V>(subspace, DatabaseUtil.centroid(relation, cluster.getIDs())));
+ newCluster.setModel(new SubspaceModel<V>(subspace, Centroid.make(relation, cluster.getIDs()).toVector(relation)));
newCluster.setName("cluster_" + numClusters++);
result.addCluster(newCluster);
}
}
if(stepprog != null) {
- stepprog.setCompleted(logger);
+ stepprog.setCompleted(LOG);
}
return result;
}
@@ -294,7 +295,7 @@ public class SUBCLU<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
* @param subspace the subspace to run DBSCAN on
* @return the clustering result of the DBSCAN run
*/
- private List<Cluster<Model>> runDBSCAN(Relation<V> relation, DBIDs ids, Subspace<V> subspace) {
+ private List<Cluster<Model>> runDBSCAN(Relation<V> relation, DBIDs ids, Subspace subspace) {
// distance function
distanceFunction.setSelectedDimensions(subspace.getDimensions());
@@ -309,8 +310,8 @@ public class SUBCLU<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
DBSCAN<V, DoubleDistance> dbscan = new DBSCAN<V, DoubleDistance>(distanceFunction, epsilon, minpts);
// run DBSCAN
- if(logger.isVerbose()) {
- logger.verbose("\nRun DBSCAN on subspace " + subspace.dimensonsToString());
+ if(LOG.isVerbose()) {
+ LOG.verbose("\nRun DBSCAN on subspace " + subspace.dimensonsToString());
}
Clustering<Model> dbsres = dbscan.run(proxy);
@@ -332,8 +333,8 @@ public class SUBCLU<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
* @param subspaces the {@code d}-dimensional subspaces
* @return the {@code d+1}-dimensional subspace candidates
*/
- private List<Subspace<V>> generateSubspaceCandidates(List<Subspace<V>> subspaces) {
- List<Subspace<V>> candidates = new ArrayList<Subspace<V>>();
+ private List<Subspace> generateSubspaceCandidates(List<Subspace> subspaces) {
+ List<Subspace> candidates = new ArrayList<Subspace>();
if(subspaces.isEmpty()) {
return candidates;
@@ -342,28 +343,28 @@ public class SUBCLU<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
// Generate (d+1)-dimensional candidate subspaces
int d = subspaces.get(0).dimensionality();
- StringBuffer msgFine = new StringBuffer("\n");
- if(logger.isDebuggingFiner()) {
- msgFine.append("subspaces ").append(subspaces).append("\n");
+ StringBuilder msgFine = new StringBuilder("\n");
+ if(LOG.isDebuggingFiner()) {
+ msgFine.append("subspaces ").append(subspaces).append('\n');
}
for(int i = 0; i < subspaces.size(); i++) {
- Subspace<V> s1 = subspaces.get(i);
+ Subspace s1 = subspaces.get(i);
for(int j = i + 1; j < subspaces.size(); j++) {
- Subspace<V> s2 = subspaces.get(j);
- Subspace<V> candidate = s1.join(s2);
+ Subspace s2 = subspaces.get(j);
+ Subspace candidate = s1.join(s2);
if(candidate != null) {
- if(logger.isDebuggingFiner()) {
- msgFine.append("candidate: ").append(candidate.dimensonsToString()).append("\n");
+ if(LOG.isDebuggingFiner()) {
+ msgFine.append("candidate: ").append(candidate.dimensonsToString()).append('\n');
}
// prune irrelevant candidate subspaces
- List<Subspace<V>> lowerSubspaces = lowerSubspaces(candidate);
- if(logger.isDebuggingFiner()) {
- msgFine.append("lowerSubspaces: ").append(lowerSubspaces).append("\n");
+ List<Subspace> lowerSubspaces = lowerSubspaces(candidate);
+ if(LOG.isDebuggingFiner()) {
+ msgFine.append("lowerSubspaces: ").append(lowerSubspaces).append('\n');
}
boolean irrelevantCandidate = false;
- for(Subspace<V> s : lowerSubspaces) {
+ for(Subspace s : lowerSubspaces) {
if(!subspaces.contains(s)) {
irrelevantCandidate = true;
break;
@@ -376,16 +377,16 @@ public class SUBCLU<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
}
}
- if(logger.isDebuggingFiner()) {
- logger.debugFiner(msgFine.toString());
+ if(LOG.isDebuggingFiner()) {
+ LOG.debugFiner(msgFine.toString());
}
- if(logger.isDebugging()) {
- StringBuffer msg = new StringBuffer();
+ if(LOG.isDebugging()) {
+ StringBuilder msg = new StringBuilder();
msg.append(d + 1).append("-dimensional candidate subspaces: ");
- for(Subspace<V> candidate : candidates) {
- msg.append(candidate.dimensonsToString()).append(" ");
+ for(Subspace candidate : candidates) {
+ msg.append(candidate.dimensonsToString()).append(' ');
}
- logger.debug(msg.toString());
+ LOG.debug(msg.toString());
}
return candidates;
@@ -398,19 +399,19 @@ public class SUBCLU<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
* @param subspace the {@code d}-dimensional subspace
* @return a list of all {@code (d-1)}-dimensional subspaces
*/
- private List<Subspace<V>> lowerSubspaces(Subspace<V> subspace) {
+ private List<Subspace> lowerSubspaces(Subspace subspace) {
int dimensionality = subspace.dimensionality();
if(dimensionality <= 1) {
return null;
}
// order result according to the dimensions
- List<Subspace<V>> result = new ArrayList<Subspace<V>>();
+ List<Subspace> result = new ArrayList<Subspace>();
BitSet dimensions = subspace.getDimensions();
for(int dim = dimensions.nextSetBit(0); dim >= 0; dim = dimensions.nextSetBit(dim + 1)) {
BitSet newDimensions = (BitSet) dimensions.clone();
newDimensions.set(dim, false);
- result.add(new Subspace<V>(newDimensions));
+ result.add(new Subspace(newDimensions));
}
return result;
@@ -428,10 +429,10 @@ public class SUBCLU<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
* -dimensional candidate with minimal number of objects in the
* cluster
*/
- private Subspace<V> bestSubspace(List<Subspace<V>> subspaces, Subspace<V> candidate, TreeMap<Subspace<V>, List<Cluster<Model>>> clusterMap) {
- Subspace<V> bestSubspace = null;
+ private Subspace bestSubspace(List<Subspace> subspaces, Subspace candidate, TreeMap<Subspace, List<Cluster<Model>>> clusterMap) {
+ Subspace bestSubspace = null;
- for(Subspace<V> subspace : subspaces) {
+ for(Subspace subspace : subspaces) {
int min = Integer.MAX_VALUE;
if(subspace.isSubspace(candidate)) {
@@ -456,7 +457,7 @@ public class SUBCLU<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -466,7 +467,7 @@ public class SUBCLU<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractParameterizer {
+ public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
protected int minpts = 0;
protected DoubleDistance epsilon = null;
@@ -486,7 +487,8 @@ public class SUBCLU<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
epsilon = epsilonP.getValue();
}
- IntParameter minptsP = new IntParameter(MINPTS_ID, new GreaterConstraint(0));
+ IntParameter minptsP = new IntParameter(MINPTS_ID);
+ minptsP.addConstraint(new GreaterConstraint(0));
if(config.grab(minptsP)) {
minpts = minptsP.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUESubspace.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUESubspace.java
index eff71a35..6b22b233 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUESubspace.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUESubspace.java
@@ -46,7 +46,7 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
*
* @param <V> the type of NumberVector this subspace contains
*/
-public class CLIQUESubspace<V extends NumberVector<V, ?>> extends Subspace<V> {
+public class CLIQUESubspace<V extends NumberVector<?>> extends Subspace {
/**
* The dense units belonging to this subspace.
*/
@@ -103,14 +103,14 @@ public class CLIQUESubspace<V extends NumberVector<V, ?>> extends Subspace<V> {
*
* @return the clusters in this subspace and the corresponding cluster models
*/
- public List<Pair<Subspace<V>, ModifiableDBIDs>> determineClusters() {
- List<Pair<Subspace<V>, ModifiableDBIDs>> clusters = new ArrayList<Pair<Subspace<V>, ModifiableDBIDs>>();
+ public List<Pair<Subspace, ModifiableDBIDs>> determineClusters() {
+ List<Pair<Subspace, ModifiableDBIDs>> clusters = new ArrayList<Pair<Subspace, ModifiableDBIDs>>();
for(CLIQUEUnit<V> unit : getDenseUnits()) {
if(!unit.isAssigned()) {
ModifiableDBIDs cluster = DBIDUtil.newHashSet();
CLIQUESubspace<V> model = new CLIQUESubspace<V>(getDimensions());
- clusters.add(new Pair<Subspace<V>, ModifiableDBIDs>(model, cluster));
+ clusters.add(new Pair<Subspace, ModifiableDBIDs>(model, cluster));
dfs(unit, cluster, model);
}
}
@@ -151,8 +151,8 @@ public class CLIQUESubspace<V extends NumberVector<V, ?>> extends Subspace<V> {
* @param dim the dimension
* @return the left neighbor of the given unit in the specified dimension
*/
- public CLIQUEUnit<V> leftNeighbor(CLIQUEUnit<V> unit, Integer dim) {
- Interval i = unit.getInterval(dim);
+ public CLIQUEUnit<V> leftNeighbor(CLIQUEUnit<V> unit, int dim) {
+ Interval i = unit.getInterval(Integer.valueOf(dim));
for(CLIQUEUnit<V> u : getDenseUnits()) {
if(u.containsLeftNeighbor(i)) {
@@ -238,10 +238,10 @@ public class CLIQUESubspace<V extends NumberVector<V, ?>> extends Subspace<V> {
*/
@Override
public String toString(String pre) {
- StringBuffer result = new StringBuffer();
+ StringBuilder result = new StringBuilder();
result.append(super.toString(pre));
- result.append("\n").append(pre).append("Coverage: ").append(coverage);
- result.append("\n").append(pre).append("Units: " + "\n");
+ result.append('\n').append(pre).append("Coverage: ").append(coverage);
+ result.append('\n').append(pre).append("Units: " + "\n");
for(CLIQUEUnit<V> denseUnit : getDenseUnits()) {
result.append(pre).append(" ").append(denseUnit.toString()).append(" ").append(denseUnit.getIds().size()).append(" objects\n");
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUEUnit.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUEUnit.java
index db687567..70f251c9 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUEUnit.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUEUnit.java
@@ -23,15 +23,15 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.clique;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import java.util.HashMap;
+import gnu.trove.map.hash.TIntObjectHashMap;
+
import java.util.Iterator;
-import java.util.Map;
import java.util.SortedSet;
import java.util.TreeSet;
import de.lmu.ifi.dbs.elki.data.Interval;
import de.lmu.ifi.dbs.elki.data.NumberVector;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs;
@@ -46,7 +46,7 @@ import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
*
* @param <V> the type of NumberVector this unit contains
*/
-public class CLIQUEUnit<V extends NumberVector<V, ?>> {
+public class CLIQUEUnit<V extends NumberVector<?>> {
/**
* The one-dimensional intervals of which this unit is build.
*/
@@ -56,7 +56,7 @@ public class CLIQUEUnit<V extends NumberVector<V, ?>> {
* Provides a mapping of particular dimensions to the intervals of which this
* unit is build.
*/
- private Map<Integer, Interval> dimensionToInterval;
+ private TIntObjectHashMap<Interval> dimensionToInterval;
/**
* The ids of the feature vectors this unit contains.
@@ -77,7 +77,7 @@ public class CLIQUEUnit<V extends NumberVector<V, ?>> {
public CLIQUEUnit(SortedSet<Interval> intervals, ModifiableDBIDs ids) {
this.intervals = intervals;
- dimensionToInterval = new HashMap<Integer, Interval>();
+ dimensionToInterval = new TIntObjectHashMap<Interval>();
for(Interval interval : intervals) {
dimensionToInterval.put(interval.getDimension(), interval);
}
@@ -96,7 +96,7 @@ public class CLIQUEUnit<V extends NumberVector<V, ?>> {
intervals = new TreeSet<Interval>();
intervals.add(interval);
- dimensionToInterval = new HashMap<Integer, Interval>();
+ dimensionToInterval = new TIntObjectHashMap<Interval>();
dimensionToInterval.put(interval.getDimension(), interval);
ids = DBIDUtil.newHashSet();
@@ -114,7 +114,7 @@ public class CLIQUEUnit<V extends NumberVector<V, ?>> {
*/
public boolean contains(V vector) {
for(Interval interval : intervals) {
- double value = vector.doubleValue(interval.getDimension() + 1);
+ final double value = vector.doubleValue(interval.getDimension());
if(interval.getMin() > value || value >= interval.getMax()) {
return false;
}
@@ -131,7 +131,7 @@ public class CLIQUEUnit<V extends NumberVector<V, ?>> {
* @return true, if this unit contains the specified feature vector, false
* otherwise
*/
- public boolean addFeatureVector(DBID id, V vector) {
+ public boolean addFeatureVector(DBIDRef id, V vector) {
if(contains(vector)) {
ids.add(id);
return true;
@@ -284,9 +284,9 @@ public class CLIQUEUnit<V extends NumberVector<V, ?>> {
*/
@Override
public String toString() {
- StringBuffer result = new StringBuffer();
+ StringBuilder result = new StringBuilder();
for(Interval interval : intervals) {
- result.append(interval).append(" ");
+ result.append(interval).append(' ');
}
return result.toString();
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelClustering.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelClustering.java
index ee42a59f..af8fb1ea 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelClustering.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelClustering.java
@@ -79,19 +79,19 @@ public class ByLabelClustering extends AbstractAlgorithm<Clustering<Model>> impl
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(ByLabelClustering.class);
+ private static final Logging LOG = Logging.getLogger(ByLabelClustering.class);
/**
* Flag to indicate that multiple cluster assignment is possible. If an
* assignment to multiple clusters is desired, the labels indicating the
* clusters need to be separated by blanks.
*/
- public static final OptionID MULTIPLE_ID = OptionID.getOrCreateOptionID("bylabelclustering.multiple", "Flag to indicate that only subspaces with large coverage " + "(i.e. the fraction of the database that is covered by the dense units) " + "are selected, the rest will be pruned.");
+ public static final OptionID MULTIPLE_ID = new OptionID("bylabelclustering.multiple", "Flag to indicate that only subspaces with large coverage " + "(i.e. the fraction of the database that is covered by the dense units) " + "are selected, the rest will be pruned.");
/**
* Pattern to recognize noise clusters by.
*/
- public static final OptionID NOISE_ID = OptionID.getOrCreateOptionID("bylabelclustering.noise", "Pattern to recognize noise classes by their label.");
+ public static final OptionID NOISE_ID = new OptionID("bylabelclustering.noise", "Pattern to recognize noise classes by their label.");
/**
* Holds the value of {@link #MULTIPLE_ID}.
@@ -226,7 +226,7 @@ public class ByLabelClustering extends AbstractAlgorithm<Clustering<Model>> impl
}
}
else {
- labelMap.put(label, id.getDBID());
+ labelMap.put(label, DBIDUtil.deref(id));
}
}
@@ -237,7 +237,7 @@ public class ByLabelClustering extends AbstractAlgorithm<Clustering<Model>> impl
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -260,7 +260,8 @@ public class ByLabelClustering extends AbstractAlgorithm<Clustering<Model>> impl
multiple = multipleF.getValue();
}
- PatternParameter noisepatP = new PatternParameter(NOISE_ID, true);
+ PatternParameter noisepatP = new PatternParameter(NOISE_ID);
+ noisepatP.setOptional(true);
if(config.grab(noisepatP)) {
noisepat = noisepatP.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelHierarchicalClustering.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelHierarchicalClustering.java
index 26bf525a..dfb7d37f 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelHierarchicalClustering.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelHierarchicalClustering.java
@@ -73,7 +73,7 @@ public class ByLabelHierarchicalClustering extends AbstractAlgorithm<Clustering<
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(ByLabelHierarchicalClustering.class);
+ private static final Logging LOG = Logging.getLogger(ByLabelHierarchicalClustering.class);
/**
* Constructor without parameters
@@ -178,7 +178,7 @@ public class ByLabelHierarchicalClustering extends AbstractAlgorithm<Clustering<
}
}
else {
- labelMap.put(label, id.getDBID());
+ labelMap.put(label, DBIDUtil.deref(id));
}
}
@@ -189,6 +189,6 @@ public class ByLabelHierarchicalClustering extends AbstractAlgorithm<Clustering<
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByModelClustering.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByModelClustering.java
index 90ca3625..2114ac16 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByModelClustering.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByModelClustering.java
@@ -65,12 +65,12 @@ public class ByModelClustering extends AbstractAlgorithm<Clustering<Model>> impl
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(ByModelClustering.class);
+ private static final Logging LOG = Logging.getLogger(ByModelClustering.class);
/**
* Pattern to recognize noise clusters with
*/
- public static final OptionID NOISE_ID = OptionID.getOrCreateOptionID("bymodel.noise", "Pattern to recognize noise models by their label.");
+ public static final OptionID NOISE_ID = new OptionID("bymodel.noise", "Pattern to recognize noise models by their label.");
/**
* Holds the value of {@link #NOISE_ID}.
@@ -133,7 +133,7 @@ public class ByModelClustering extends AbstractAlgorithm<Clustering<Model>> impl
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -149,7 +149,8 @@ public class ByModelClustering extends AbstractAlgorithm<Clustering<Model>> impl
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- PatternParameter noisepatP = new PatternParameter(NOISE_ID, true);
+ PatternParameter noisepatP = new PatternParameter(NOISE_ID);
+ noisepatP.setOptional(true);
if(config.grab(noisepatP)) {
noisepat = noisepatP.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/TrivialAllInOne.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/TrivialAllInOne.java
index 2e7d006d..eaa5d2b2 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/TrivialAllInOne.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/TrivialAllInOne.java
@@ -51,7 +51,7 @@ public class TrivialAllInOne extends AbstractAlgorithm<Clustering<Model>> implem
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(TrivialAllInOne.class);
+ private static final Logging LOG = Logging.getLogger(TrivialAllInOne.class);
/**
* Constructor, adhering to
@@ -76,6 +76,6 @@ public class TrivialAllInOne extends AbstractAlgorithm<Clustering<Model>> implem
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/TrivialAllNoise.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/TrivialAllNoise.java
index c497632c..dd0f94a5 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/TrivialAllNoise.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/TrivialAllNoise.java
@@ -50,7 +50,7 @@ public class TrivialAllNoise extends AbstractAlgorithm<Clustering<Model>> implem
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(TrivialAllNoise.class);
+ private static final Logging LOG = Logging.getLogger(TrivialAllNoise.class);
/**
* Constructor, adhering to
@@ -75,6 +75,6 @@ public class TrivialAllNoise extends AbstractAlgorithm<Clustering<Model>> implem
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ABOD.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ABOD.java
index 88a62e38..d52a81fd 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ABOD.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ABOD.java
@@ -25,13 +25,11 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
import java.util.Collections;
import java.util.HashMap;
-import java.util.Iterator;
import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
-import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.QueryUtil;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
@@ -42,13 +40,13 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.distance.similarityfunction.PrimitiveSimilarityFunction;
import de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.KernelMatrix;
@@ -66,11 +64,11 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
-import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair;
/**
* Angle-Based Outlier Detection
@@ -92,39 +90,39 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair;
@Title("ABOD: Angle-Based Outlier Detection")
@Description("Outlier detection using variance analysis on angles, especially for high dimensional data sets.")
@Reference(authors = "H.-P. Kriegel, M. Schubert, and A. Zimek", title = "Angle-Based Outlier Detection in High-dimensional Data", booktitle = "Proc. 14th ACM SIGKDD Int. Conf. on Knowledge Discovery and Data Mining (KDD '08), Las Vegas, NV, 2008", url = "http://dx.doi.org/10.1145/1401890.1401946")
-public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlgorithm<V, DoubleDistance, OutlierResult> implements OutlierAlgorithm {
+public class ABOD<V extends NumberVector<?>> extends AbstractDistanceBasedAlgorithm<V, DoubleDistance, OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(ABOD.class);
+ private static final Logging LOG = Logging.getLogger(ABOD.class);
/**
* Parameter for k, the number of neighbors used in kNN queries.
*/
- public static final OptionID K_ID = OptionID.getOrCreateOptionID("abod.k", "Parameter k for kNN queries.");
+ public static final OptionID K_ID = new OptionID("abod.k", "Parameter k for kNN queries.");
/**
* Parameter for sample size to be used in fast mode.
*/
- public static final OptionID FAST_SAMPLE_ID = OptionID.getOrCreateOptionID("abod.samplesize", "Sample size to enable fast mode.");
+ public static final OptionID FAST_SAMPLE_ID = new OptionID("abod.samplesize", "Sample size to enable fast mode.");
/**
* Parameter for the kernel function.
*/
- public static final OptionID KERNEL_FUNCTION_ID = OptionID.getOrCreateOptionID("abod.kernelfunction", "Kernel function to use.");
+ public static final OptionID KERNEL_FUNCTION_ID = new OptionID("abod.kernelfunction", "Kernel function to use.");
/**
* The preprocessor used to materialize the kNN neighborhoods.
*/
- public static final OptionID PREPROCESSOR_ID = OptionID.getOrCreateOptionID("abod.knnquery", "Processor to compute the kNN neighborhoods.");
+ public static final OptionID PREPROCESSOR_ID = new OptionID("abod.knnquery", "Processor to compute the kNN neighborhoods.");
/**
- * use alternate code below
+ * use alternate code below.
*/
- private static final boolean useRNDSample = false;
+ private static final boolean USE_RND_SAMPLE = false;
/**
- * k parameter
+ * k parameter.
*/
private int k;
@@ -134,10 +132,13 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg
int sampleSize = 0;
/**
- * Store the configured Kernel version
+ * Store the configured Kernel version.
*/
private PrimitiveSimilarityFunction<? super V, DoubleDistance> primitiveKernelFunction;
+ /**
+ * Static DBID map.
+ */
private ArrayModifiableDBIDs staticids = null;
/**
@@ -173,41 +174,32 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg
* Main part of the algorithm. Exact version.
*
* @param relation Relation to query
- * @param k k for kNN queries
* @return result
*/
- public OutlierResult getRanking(Relation<V> relation, int k) {
+ public OutlierResult getRanking(Relation<V> relation) {
// Fix a static set of IDs
staticids = DBIDUtil.newArray(relation.getDBIDs());
staticids.sort();
KernelMatrix kernelMatrix = new KernelMatrix(primitiveKernelFunction, relation, staticids);
- Heap<DoubleObjPair<DBID>> pq = new Heap<DoubleObjPair<DBID>>(relation.size(), Collections.reverseOrder());
+ Heap<DoubleDBIDPair> pq = new Heap<DoubleDBIDPair>(relation.size(), Collections.reverseOrder());
// preprocess kNN neighborhoods
- assert (k == this.k);
KNNQuery<V, DoubleDistance> knnQuery = QueryUtil.getKNNQuery(relation, getDistanceFunction(), k);
MeanVariance s = new MeanVariance();
- for(DBIDIter objKey = relation.iterDBIDs(); objKey.valid(); objKey.advance()) {
+ for (DBIDIter objKey = relation.iterDBIDs(); objKey.valid(); objKey.advance()) {
s.reset();
- // System.out.println("Processing: " +objKey);
KNNResult<DoubleDistance> neighbors = knnQuery.getKNNForDBID(objKey, k);
- Iterator<DistanceResultPair<DoubleDistance>> iter = neighbors.iterator();
- while(iter.hasNext()) {
- DistanceResultPair<DoubleDistance> key1 = iter.next();
- // Iterator iter2 = data.keyIterator();
- Iterator<DistanceResultPair<DoubleDistance>> iter2 = neighbors.iterator();
- // PriorityQueue best = new PriorityQueue(false, k);
- while(iter2.hasNext()) {
- DistanceResultPair<DoubleDistance> key2 = iter2.next();
- if(key2.sameDBID(key1) || key1.sameDBID(objKey) || key2.sameDBID(objKey)) {
+ for (DBIDIter key1 = neighbors.iter(); key1.valid(); key1.advance()) {
+ for (DBIDIter key2 = neighbors.iter(); key2.valid(); key2.advance()) {
+ if (DBIDUtil.equal(key2, key1) || DBIDUtil.equal(key1, objKey) || DBIDUtil.equal(key2, objKey)) {
continue;
}
double nenner = calcDenominator(kernelMatrix, objKey, key1, key2);
- if(nenner != 0) {
+ if (nenner != 0) {
double sqrtnenner = Math.sqrt(nenner);
double tmp = calcNumerator(kernelMatrix, objKey, key1, key2) / nenner;
s.put(tmp, 1 / sqrtnenner);
@@ -217,14 +209,14 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg
}
// Sample variance probably would be correct, however the numerical
// instabilities can actually break ABOD here.
- pq.add(new DoubleObjPair<DBID>(s.getNaiveVariance(), objKey.getDBID()));
+ pq.add(DBIDUtil.newPair(s.getNaiveVariance(), objKey));
}
DoubleMinMax minmaxabod = new DoubleMinMax();
WritableDoubleDataStore abodvalues = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
- for(DoubleObjPair<DBID> pair : pq) {
- abodvalues.putDouble(pair.getSecond(), pair.first);
- minmaxabod.put(pair.first);
+ for (DoubleDBIDPair pair : pq) {
+ abodvalues.putDouble(pair, pair.doubleValue());
+ minmaxabod.put(pair.doubleValue());
}
// Build result representation.
Relation<Double> scoreResult = new MaterializedRelation<Double>("Angle-based Outlier Degree", "abod-outlier", TypeUtil.DOUBLE, abodvalues, relation.getDBIDs());
@@ -236,11 +228,9 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg
* Main part of the algorithm. Fast version.
*
* @param relation Relation to use
- * @param k k for kNN queries
- * @param sampleSize Sample size
* @return result
*/
- public OutlierResult getFastRanking(Relation<V> relation, int k, int sampleSize) {
+ public OutlierResult getFastRanking(Relation<V> relation) {
final DBIDs ids = relation.getDBIDs();
// Fix a static set of IDs
// TODO: add a DBIDUtil.ensureSorted?
@@ -249,92 +239,72 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg
KernelMatrix kernelMatrix = new KernelMatrix(primitiveKernelFunction, relation, staticids);
- Heap<DoubleObjPair<DBID>> pq = new Heap<DoubleObjPair<DBID>>(relation.size(), Collections.reverseOrder());
+ Heap<DoubleDBIDPair> pq = new Heap<DoubleDBIDPair>(relation.size(), Collections.reverseOrder());
// get Candidate Ranking
- for(DBIDIter aKey = relation.iterDBIDs(); aKey.valid(); aKey.advance()) {
+ for (DBIDIter aKey = relation.iterDBIDs(); aKey.valid(); aKey.advance()) {
WritableDoubleDataStore dists = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT);
// determine kNearestNeighbors and pairwise distances
- Heap<DoubleObjPair<DBID>> nn;
- if(!useRNDSample) {
+ Heap<DoubleDBIDPair> nn;
+ if (!USE_RND_SAMPLE) {
nn = calcDistsandNN(relation, kernelMatrix, sampleSize, aKey, dists);
- }
- else {
+ } else {
// alternative:
nn = calcDistsandRNDSample(relation, kernelMatrix, sampleSize, aKey, dists);
}
// get normalization
double[] counter = calcFastNormalization(aKey, dists, staticids);
- // System.out.println(counter[0] + " " + counter2[0] + " " + counter[1] +
- // " " + counter2[1]);
// umsetzen von Pq zu list
ModifiableDBIDs neighbors = DBIDUtil.newArray(nn.size());
- while(!nn.isEmpty()) {
- neighbors.add(nn.remove().getSecond());
+ while (!nn.isEmpty()) {
+ neighbors.add(nn.poll());
}
// getFilter
double var = getAbofFilter(kernelMatrix, aKey, dists, counter[1], counter[0], neighbors);
- pq.add(new DoubleObjPair<DBID>(var, aKey.getDBID()));
- // System.out.println("prog "+(prog++));
+ pq.add(DBIDUtil.newPair(var, aKey));
}
// refine Candidates
- Heap<DoubleObjPair<DBID>> resqueue = new Heap<DoubleObjPair<DBID>>(k);
- // System.out.println(pq.size() + " objects ordered into candidate list.");
- // int v = 0;
+ Heap<DoubleDBIDPair> resqueue = new Heap<DoubleDBIDPair>(k);
MeanVariance s = new MeanVariance();
- while(!pq.isEmpty()) {
- if(resqueue.size() == k && pq.peek().first > resqueue.peek().first) {
+ while (!pq.isEmpty()) {
+ if (resqueue.size() == k && pq.peek().doubleValue() > resqueue.peek().doubleValue()) {
break;
}
// double approx = pq.peek().getFirst();
- DBID aKey = pq.remove().getSecond();
- // if(!result.isEmpty()) {
- // System.out.println("Best Candidate " + aKey+" : " + pq.firstPriority()
- // + " worst result: " + result.firstPriority());
- // } else {
- // System.out.println("Best Candidate " + aKey+" : " + pq.firstPriority()
- // + " worst result: " + Double.MAX_VALUE);
- // }
- // v++;
+ DBIDRef aKey = pq.poll();
s.reset();
- for(DBIDIter bKey = relation.iterDBIDs(); bKey.valid(); bKey.advance()) {
- if(bKey.sameDBID(aKey)) {
+ for (DBIDIter bKey = relation.iterDBIDs(); bKey.valid(); bKey.advance()) {
+ if (DBIDUtil.equal(bKey, aKey)) {
continue;
}
- for(DBIDIter cKey = relation.iterDBIDs(); cKey.valid(); cKey.advance()) {
- if(cKey.sameDBID(aKey)) {
+ for (DBIDIter cKey = relation.iterDBIDs(); cKey.valid(); cKey.advance()) {
+ if (DBIDUtil.equal(cKey, aKey)) {
continue;
}
// double nenner = dists[y]*dists[z];
double nenner = calcDenominator(kernelMatrix, aKey, bKey, cKey);
- if(nenner != 0) {
+ if (nenner != 0) {
double tmp = calcNumerator(kernelMatrix, aKey, bKey, cKey) / nenner;
double sqrtNenner = Math.sqrt(nenner);
s.put(tmp, 1 / sqrtNenner);
}
}
}
- // System.out.println( aKey + "Sum " + sum + " SQRSum " +sqrSum +
- // " Counter " + counter);
double var = s.getSampleVariance();
- // System.out.println(aKey+ " : " + approx +" " + var);
- if(resqueue.size() < k) {
- resqueue.add(new DoubleObjPair<DBID>(var, aKey));
- }
- else {
- if(resqueue.peek().first > var) {
- resqueue.remove();
- resqueue.add(new DoubleObjPair<DBID>(var, aKey));
+ if (resqueue.size() < k) {
+ resqueue.add(DBIDUtil.newPair(var, aKey));
+ } else {
+ if (resqueue.peek().doubleValue() > var) {
+ resqueue.replaceTopElement(DBIDUtil.newPair(var, aKey));
}
}
}
- // System.out.println(v + " Punkte von " + data.size() + " verfeinert !!");
DoubleMinMax minmaxabod = new DoubleMinMax();
WritableDoubleDataStore abodvalues = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
- for(DoubleObjPair<DBID> pair : pq) {
- abodvalues.putDouble(pair.getSecond(), pair.first);
- minmaxabod.put(pair.first);
+ for (DoubleDBIDPair pair : pq) {
+ abodvalues.putDouble(pair, pair.doubleValue());
+ minmaxabod.put(pair.doubleValue());
}
// Build result representation.
Relation<Double> scoreResult = new MaterializedRelation<Double>("Angle-based Outlier Detection", "abod-outlier", TypeUtil.DOUBLE, abodvalues, ids);
@@ -348,7 +318,7 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg
double sum = 0;
double sumF = 0;
for (DBIDIter yKey = ids.iter(); yKey.valid(); yKey.advance()) {
- if(dists.doubleValue(yKey) != 0) {
+ if (dists.doubleValue(yKey) != 0) {
double tmp = 1 / Math.sqrt(dists.doubleValue(yKey));
sum += tmp;
sumF += (1 / dists.doubleValue(yKey)) * tmp;
@@ -357,7 +327,7 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg
double sofar = 0;
double sofarF = 0;
for (DBIDIter zKey = ids.iter(); zKey.valid(); zKey.advance()) {
- if(dists.doubleValue(zKey) != 0) {
+ if (dists.doubleValue(zKey) != 0) {
double tmp = 1 / Math.sqrt(dists.doubleValue(zKey));
sofar += tmp;
double rest = sum - sofar;
@@ -375,17 +345,17 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg
double sum = 0.0;
double sqrSum = 0.0;
double partCounter = 0;
- for(DBIDIter bKey = neighbors.iter(); bKey.valid(); bKey.advance()) {
- if(bKey.sameDBID(aKey)) {
+ for (DBIDIter bKey = neighbors.iter(); bKey.valid(); bKey.advance()) {
+ if (DBIDUtil.equal(bKey, aKey)) {
continue;
}
- for(DBIDIter cKey = neighbors.iter(); cKey.valid(); cKey.advance()) {
- if(cKey.sameDBID(aKey)) {
+ for (DBIDIter cKey = neighbors.iter(); cKey.valid(); cKey.advance()) {
+ if (DBIDUtil.equal(cKey, aKey)) {
continue;
}
- if(bKey.compareDBID(cKey) > 0) {
+ if (DBIDUtil.compare(bKey, cKey) > 0) {
double nenner = dists.doubleValue(bKey) * dists.doubleValue(cKey);
- if(nenner != 0) {
+ if (nenner != 0) {
double tmp = calcNumerator(kernelMatrix, aKey, bKey, cKey) / nenner;
double sqrtNenner = Math.sqrt(nenner);
sum += tmp * (1 / sqrtNenner);
@@ -417,7 +387,7 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg
private int mapDBID(DBIDRef aKey) {
// TODO: this is not the most efficient...
int off = staticids.binarySearch(aKey);
- if(off < 0) {
+ if (off < 0) {
throw new AbortException("Did not find id " + aKey.toString() + " in staticids. " + staticids.contains(aKey));
}
return off + 1;
@@ -434,33 +404,31 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg
return (kernelMatrix.getDistance(ai, ai) + kernelMatrix.getDistance(bi, ci) - kernelMatrix.getDistance(ai, ci) - kernelMatrix.getDistance(ai, bi));
}
- private Heap<DoubleObjPair<DBID>> calcDistsandNN(Relation<V> data, KernelMatrix kernelMatrix, int sampleSize, DBIDRef aKey, WritableDoubleDataStore dists) {
- Heap<DoubleObjPair<DBID>> nn = new Heap<DoubleObjPair<DBID>>(sampleSize);
- for(DBIDIter bKey = data.iterDBIDs(); bKey.valid(); bKey.advance()) {
+ private Heap<DoubleDBIDPair> calcDistsandNN(Relation<V> data, KernelMatrix kernelMatrix, int sampleSize, DBIDRef aKey, WritableDoubleDataStore dists) {
+ Heap<DoubleDBIDPair> nn = new Heap<DoubleDBIDPair>(sampleSize);
+ for (DBIDIter bKey = data.iterDBIDs(); bKey.valid(); bKey.advance()) {
double val = calcCos(kernelMatrix, aKey, bKey);
dists.putDouble(bKey, val);
- if(nn.size() < sampleSize) {
- nn.add(new DoubleObjPair<DBID>(val, bKey.getDBID()));
- }
- else {
- if(val < nn.peek().first) {
- nn.remove();
- nn.add(new DoubleObjPair<DBID>(val, bKey.getDBID()));
+ if (nn.size() < sampleSize) {
+ nn.add(DBIDUtil.newPair(val, bKey));
+ } else {
+ if (val < nn.peek().doubleValue()) {
+ nn.replaceTopElement(DBIDUtil.newPair(val, bKey));
}
}
}
return nn;
}
- private Heap<DoubleObjPair<DBID>> calcDistsandRNDSample(Relation<V> data, KernelMatrix kernelMatrix, int sampleSize, DBIDRef aKey, WritableDoubleDataStore dists) {
- Heap<DoubleObjPair<DBID>> nn = new Heap<DoubleObjPair<DBID>>(sampleSize);
+ private Heap<DoubleDBIDPair> calcDistsandRNDSample(Relation<V> data, KernelMatrix kernelMatrix, int sampleSize, DBIDRef aKey, WritableDoubleDataStore dists) {
+ Heap<DoubleDBIDPair> nn = new Heap<DoubleDBIDPair>(sampleSize);
int step = (int) ((double) data.size() / (double) sampleSize);
int counter = 0;
- for(DBIDIter bKey = data.iterDBIDs(); bKey.valid(); bKey.advance()) {
+ for (DBIDIter bKey = data.iterDBIDs(); bKey.valid(); bKey.advance()) {
double val = calcCos(kernelMatrix, aKey, bKey);
dists.putDouble(bKey, val);
- if(counter % step == 0) {
- nn.add(new DoubleObjPair<DBID>(val, bKey.getDBID()));
+ if (counter % step == 0) {
+ nn.add(DBIDUtil.newPair(val, bKey));
}
counter++;
}
@@ -471,112 +439,108 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg
* Get explanations for points in the database.
*
* @param data to get explanations for
+ * @return String explanation
*/
// TODO: this should be done by the result classes.
- public void getExplanations(Relation<V> data) {
+ public String getExplanations(Relation<V> data) {
KernelMatrix kernelMatrix = new KernelMatrix(primitiveKernelFunction, data, staticids);
// PQ for Outlier Ranking
- Heap<DoubleObjPair<DBID>> pq = new Heap<DoubleObjPair<DBID>>(data.size(), Collections.reverseOrder());
+ Heap<DoubleDBIDPair> pq = new Heap<DoubleDBIDPair>(data.size(), Collections.reverseOrder());
HashMap<DBID, DBIDs> explaintab = new HashMap<DBID, DBIDs>();
// test all objects
MeanVariance s = new MeanVariance(), s2 = new MeanVariance();
- for(DBIDIter objKey = data.iterDBIDs(); objKey.valid(); objKey.advance()) {
+ for (DBIDIter objKey = data.iterDBIDs(); objKey.valid(); objKey.advance()) {
s.reset();
// Queue for the best explanation
- Heap<DoubleObjPair<DBID>> explain = new Heap<DoubleObjPair<DBID>>();
+ Heap<DoubleDBIDPair> explain = new Heap<DoubleDBIDPair>();
// determine Object
// for each pair of other objects
for (DBIDIter key1 = data.iterDBIDs(); key1.valid(); key1.advance()) {
- // Collect Explanation Vectors
+ // Collect Explanation Vectors
s2.reset();
- if(objKey.sameDBID(key1)) {
+ if (DBIDUtil.equal(objKey, key1)) {
continue;
}
for (DBIDIter key2 = data.iterDBIDs(); key2.valid(); key2.advance()) {
- if(key2.sameDBID(key1) || objKey.sameDBID(key2)) {
+ if (DBIDUtil.equal(key2, key1) || DBIDUtil.equal(objKey, key2)) {
continue;
}
double nenner = calcDenominator(kernelMatrix, objKey, key1, key2);
- if(nenner != 0) {
+ if (nenner != 0) {
double tmp = calcNumerator(kernelMatrix, objKey, key1, key2) / nenner;
double sqr = Math.sqrt(nenner);
s2.put(tmp, 1 / sqr);
}
}
- explain.add(new DoubleObjPair<DBID>(s2.getSampleVariance(), key1.getDBID()));
+ explain.add(DBIDUtil.newPair(s2.getSampleVariance(), key1));
s.put(s2);
}
// build variance of the observed vectors
- pq.add(new DoubleObjPair<DBID>(s.getSampleVariance(), objKey.getDBID()));
+ pq.add(DBIDUtil.newPair(s.getSampleVariance(), objKey));
//
ModifiableDBIDs expList = DBIDUtil.newArray();
- expList.add(explain.remove().getSecond());
- while(!explain.isEmpty()) {
- DBID nextKey = explain.remove().getSecond();
- if(nextKey.sameDBID(objKey)) {
+ expList.add(explain.poll());
+ while (!explain.isEmpty()) {
+ DBIDRef nextKey = explain.poll();
+ if (DBIDUtil.equal(nextKey, objKey)) {
continue;
}
double max = Double.MIN_VALUE;
- for(DBIDIter exp = expList.iter(); exp.valid(); exp.advance()) {
- if(exp.sameDBID(objKey) || nextKey.sameDBID(exp)) {
+ for (DBIDIter exp = expList.iter(); exp.valid(); exp.advance()) {
+ if (DBIDUtil.equal(exp, objKey) || DBIDUtil.equal(nextKey, exp)) {
continue;
}
double nenner = Math.sqrt(calcCos(kernelMatrix, objKey, nextKey)) * Math.sqrt(calcCos(kernelMatrix, objKey, exp));
double angle = calcNumerator(kernelMatrix, objKey, nextKey, exp) / nenner;
max = Math.max(angle, max);
}
- if(max < 0.5) {
+ if (max < 0.5) {
expList.add(nextKey);
}
}
- explaintab.put(objKey.getDBID(), expList);
+ explaintab.put(DBIDUtil.deref(objKey), expList);
}
- System.out.println("--------------------------------------------");
- System.out.println("Result: ABOD");
+ StringBuilder buf = new StringBuilder();
+ buf.append("Result: ABOD\n");
int count = 0;
- while(!pq.isEmpty()) {
- if(count > 10) {
+ while (!pq.isEmpty()) {
+ if (count > 10) {
break;
}
- double factor = pq.peek().first;
- DBID key = pq.remove().getSecond();
- System.out.print(data.get(key) + " ");
- System.out.println(count + " Factor=" + factor + " " + key);
+ double factor = pq.peek().doubleValue();
+ DBIDRef key = pq.poll();
+ buf.append(data.get(key)).append(' ');
+ buf.append(count).append(" Factor=").append(factor).append(' ').append(key).append('\n');
DBIDs expList = explaintab.get(key);
- generateExplanation(data, key, expList);
+ generateExplanation(buf, data, key, expList);
count++;
}
- System.out.println("--------------------------------------------");
+ return buf.toString();
}
- private void generateExplanation(Relation<V> data, DBID key, DBIDs expList) {
+ private void generateExplanation(StringBuilder buf, Relation<V> data, DBIDRef key, DBIDs expList) {
Vector vect1 = data.get(key).getColumnVector();
- for(DBIDIter iter = expList.iter(); iter.valid(); iter.advance()) {
- System.out.println("Outlier: " + vect1);
+ for (DBIDIter iter = expList.iter(); iter.valid(); iter.advance()) {
+ buf.append("Outlier: ").append(vect1).append('\n');
Vector exp = data.get(iter).getColumnVector();
- System.out.println("Most common neighbor: " + exp);
+ buf.append("Most common neighbor: ").append(exp).append('\n');
// determine difference Vector
Vector vals = exp.minus(vect1);
- System.out.println(vals);
- // System.out.println(new FeatureVector(
- // "Diff-"+vect1.getPrimaryKey(),vals ));
+ buf.append(vals).append('\n');
}
- System.out.println();
}
/**
- * Run ABOD on the data set
+ * Run ABOD on the data set.
*
- * @param database
- * @param relation
+ * @param relation Relation to process
* @return Outlier detection result
*/
- public OutlierResult run(Database database, Relation<V> relation) {
- if(sampleSize > 0) {
- return getFastRanking(relation, k, sampleSize);
- }
- else {
- return getRanking(relation, k);
+ public OutlierResult run(Relation<V> relation) {
+ if (sampleSize > 0) {
+ return getFastRanking(relation);
+ } else {
+ return getRanking(relation);
}
}
@@ -587,7 +551,7 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -597,26 +561,38 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<V, DoubleDistance> {
+ public static class Parameterizer<V extends NumberVector<?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<V, DoubleDistance> {
+ /**
+ * k Parameter.
+ */
protected int k = 0;
+ /**
+ * Sample size.
+ */
protected int sampleSize = 0;
+ /**
+ * Distance function.
+ */
protected PrimitiveSimilarityFunction<V, DoubleDistance> primitiveKernelFunction = null;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final IntParameter kP = new IntParameter(K_ID, new GreaterEqualConstraint(1), 30);
- if(config.grab(kP)) {
+ final IntParameter kP = new IntParameter(K_ID, 30);
+ kP.addConstraint(new GreaterEqualConstraint(1));
+ if (config.grab(kP)) {
k = kP.getValue();
}
- final IntParameter sampleSizeP = new IntParameter(FAST_SAMPLE_ID, new GreaterEqualConstraint(1), true);
- if(config.grab(sampleSizeP)) {
+ final IntParameter sampleSizeP = new IntParameter(FAST_SAMPLE_ID);
+ sampleSizeP.addConstraint(new GreaterEqualConstraint(1));
+ sampleSizeP.setOptional(true);
+ if (config.grab(sampleSizeP)) {
sampleSize = sampleSizeP.getValue();
}
final ObjectParameter<PrimitiveSimilarityFunction<V, DoubleDistance>> param = new ObjectParameter<PrimitiveSimilarityFunction<V, DoubleDistance>>(KERNEL_FUNCTION_ID, PrimitiveSimilarityFunction.class, PolynomialKernelFunction.class);
- if(config.grab(param)) {
+ if (config.grab(param)) {
primitiveKernelFunction = param.instantiateClass(config);
}
}
@@ -626,4 +602,4 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg
return new ABOD<V>(k, sampleSize, primitiveKernelFunction, distanceFunction);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ALOCI.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ALOCI.java
index 39c3db60..41da687f 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ALOCI.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ALOCI.java
@@ -36,10 +36,12 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.NumberVectorDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
@@ -51,6 +53,7 @@ import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
@@ -58,8 +61,8 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.LongParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
/**
@@ -78,17 +81,19 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
* @author Jonathan von Brünken
* @author Erich Schubert
*
+ * @apiviz.composedOf ALOCIQuadTree
+ *
* @param <O> Object type
* @param <D> Distance type
*/
@Title("LOCI: Fast Outlier Detection Using the Local Correlation Integral")
@Description("Algorithm to compute outliers based on the Local Correlation Integral")
@Reference(authors = "S. Papadimitriou, H. Kitagawa, P. B. Gibbons, C. Faloutsos", title = "LOCI: Fast Outlier Detection Using the Local Correlation Integral", booktitle = "Proc. 19th IEEE Int. Conf. on Data Engineering (ICDE '03), Bangalore, India, 2003", url = "http://dx.doi.org/10.1109/ICDE.2003.1260802")
-public class ALOCI<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
+public class ALOCI<O extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(ALOCI.class);
+ private static final Logging LOG = Logging.getLogger(ALOCI.class);
/**
* Minimum size for a leaf.
@@ -108,7 +113,7 @@ public class ALOCI<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>>
/**
* Random generator
*/
- private Random random;
+ private RandomFactory rnd;
/**
* Distance function
@@ -122,20 +127,21 @@ public class ALOCI<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>>
* @param nmin Minimum neighborhood size
* @param alpha Alpha value
* @param g Number of grids to use
- * @param seed Random generator seed.
+ * @param rnd Random generator.
*/
- public ALOCI(NumberVectorDistanceFunction<D> distanceFunction, int nmin, int alpha, int g, Long seed) {
+ public ALOCI(NumberVectorDistanceFunction<D> distanceFunction, int nmin, int alpha, int g, RandomFactory rnd) {
super();
this.distFunc = distanceFunction;
this.nmin = nmin;
this.alpha = alpha;
this.g = g;
- this.random = (seed != null) ? new Random(seed) : new Random(0);
+ this.rnd = rnd;
}
public OutlierResult run(Database database, Relation<O> relation) {
- final int dim = DatabaseUtil.dimensionality(relation);
- FiniteProgress progressPreproc = logger.isVerbose() ? new FiniteProgress("Build aLOCI quadtress", g, logger) : null;
+ final int dim = RelationUtil.dimensionality(relation);
+ final Random random = rnd.getRandom();
+ FiniteProgress progressPreproc = LOG.isVerbose() ? new FiniteProgress("Build aLOCI quadtress", g, LOG) : null;
// Compute extend of dataset.
double[] min, max;
@@ -145,13 +151,13 @@ public class ALOCI<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>>
min = new double[dim];
max = new double[dim];
for(int i = 0; i < dim; i++) {
- min[i] = hbbs.first.doubleValue(i + 1);
- max[i] = hbbs.second.doubleValue(i + 1);
+ min[i] = hbbs.first.doubleValue(i);
+ max[i] = hbbs.second.doubleValue(i);
maxd = Math.max(maxd, max[i] - min[i]);
}
// Enlarge bounding box to have equal lengths.
for(int i = 0; i < dim; i++) {
- double diff = (maxd - (max[i] - min[i])) / 2;
+ double diff = (maxd - (max[i] - min[i])) * .5;
min[i] -= diff;
max[i] += diff;
}
@@ -163,7 +169,7 @@ public class ALOCI<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>>
ALOCIQuadTree qt = new ALOCIQuadTree(min, max, nshift, nmin, relation);
qts.add(qt);
if(progressPreproc != null) {
- progressPreproc.incrementProcessed(logger);
+ progressPreproc.incrementProcessed(LOG);
}
/*
* create the remaining g-1 shifted QuadTrees. This not clearly described in
@@ -178,19 +184,19 @@ public class ALOCI<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>>
qt = new ALOCIQuadTree(min, max, svec, nmin, relation);
qts.add(qt);
if(progressPreproc != null) {
- progressPreproc.incrementProcessed(logger);
+ progressPreproc.incrementProcessed(LOG);
}
}
if(progressPreproc != null) {
- progressPreproc.ensureCompleted(logger);
+ progressPreproc.ensureCompleted(LOG);
}
// aLOCI main loop: evaluate
- FiniteProgress progressLOCI = logger.isVerbose() ? new FiniteProgress("Compute aLOCI scores", relation.size(), logger) : null;
+ FiniteProgress progressLOCI = LOG.isVerbose() ? new FiniteProgress("Compute aLOCI scores", relation.size(), LOG) : null;
WritableDoubleDataStore mdef_norm = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
DoubleMinMax minmax = new DoubleMinMax();
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
final O obj = relation.get(iditer);
double maxmdefnorm = 0;
@@ -239,11 +245,11 @@ public class ALOCI<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>>
mdef_norm.putDouble(iditer, maxmdefnorm);
minmax.put(maxmdefnorm);
if(progressLOCI != null) {
- progressLOCI.incrementProcessed(logger);
+ progressLOCI.incrementProcessed(LOG);
}
}
if(progressLOCI != null) {
- progressLOCI.ensureCompleted(logger);
+ progressLOCI.ensureCompleted(LOG);
}
Relation<Double> scoreResult = new MaterializedRelation<Double>("aLOCI normalized MDEF", "aloci-mdef-outlier", TypeUtil.DOUBLE, mdef_norm, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY);
@@ -291,7 +297,7 @@ public class ALOCI<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>>
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
@Override
@@ -329,7 +335,7 @@ public class ALOCI<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>>
/**
* Relation indexed.
*/
- private Relation<? extends NumberVector<?, ?>> relation;
+ private Relation<? extends NumberVector<?>> relation;
/**
* Constructor.
@@ -340,7 +346,7 @@ public class ALOCI<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>>
* @param nmin Maximum size for a page to split
* @param relation Relation to index
*/
- public ALOCIQuadTree(double[] min, double[] max, double[] shift, int nmin, Relation<? extends NumberVector<?, ?>> relation) {
+ public ALOCIQuadTree(double[] min, double[] max, double[] shift, int nmin, Relation<? extends NumberVector<?>> relation) {
super();
assert (min.length <= 32) : "Quadtrees are only supported for up to 32 dimensions";
this.shift = shift;
@@ -386,11 +392,14 @@ public class ALOCI<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>>
// logger.warning(FormatUtil.format(lmin)+" "+FormatUtil.format(lmax)+" "+start+"->"+end+" "+(end-start));
// Hack: Check degenerate cases that won't split
if(dim == 0) {
- NumberVector<?, ?> first = relation.get(ids.get(start));
+ DBIDArrayIter iter = ids.iter();
+ iter.seek(start);
+ NumberVector<?> first = relation.get(iter);
+ iter.advance();
boolean degenerate = true;
- loop: for(int pos = start + 1; pos < end; pos++) {
- NumberVector<?, ?> other = relation.get(ids.get(pos));
- for(int d = 1; d <= lmin.length; d++) {
+ loop: for(; iter.getOffset() < end; iter.advance()) {
+ NumberVector<?> other = relation.get(iter);
+ for(int d = 0; d < lmin.length; d++) {
if(Math.abs(first.doubleValue(d) - other.doubleValue(d)) > 1E-15) {
degenerate = false;
break loop;
@@ -431,20 +440,23 @@ public class ALOCI<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>>
}
else {
// Partially sort data, by dimension dim < mid
- int spos = start, epos = end;
- while(spos < epos) {
- if(getShiftedDim(relation.get(ids.get(spos)), dim, level) <= .5) {
- spos++;
+ DBIDArrayIter siter = ids.iter(), eiter = ids.iter();
+ siter.seek(start);
+ eiter.seek(end - 1);
+ while(siter.getOffset() < eiter.getOffset()) {
+ if(getShiftedDim(relation.get(siter), dim, level) <= .5) {
+ siter.advance();
continue;
}
- if(getShiftedDim(relation.get(ids.get(epos - 1)), dim, level) > 0.5) {
- epos--;
+ if(getShiftedDim(relation.get(eiter), dim, level) > 0.5) {
+ eiter.retract();
continue;
}
- ids.swap(spos, epos - 1);
- spos++;
- epos--;
+ ids.swap(siter.getOffset(), eiter.getOffset() - 1);
+ siter.advance();
+ eiter.retract();
}
+ final int spos = siter.getOffset();
if(start < spos) {
final double tmp = lmax[dim];
lmax[dim] = lmax[dim] * .5 + lmin[dim] * .5;
@@ -468,8 +480,8 @@ public class ALOCI<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>>
* @param level Level (controls scaling/wraping!)
* @return Shifted position
*/
- private double getShiftedDim(NumberVector<?, ?> obj, int dim, int level) {
- double pos = obj.doubleValue(dim + 1) + shift[dim];
+ private double getShiftedDim(NumberVector<?> obj, int dim, int level) {
+ double pos = obj.doubleValue(dim) + shift[dim];
pos = (pos - min[dim]) / width[dim] * (1 + level);
return pos - Math.floor(pos);
}
@@ -482,7 +494,7 @@ public class ALOCI<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>>
* @param tlevel Target level
* @return Node
*/
- public Node findClosestNode(NumberVector<?, ?> vec, int tlevel) {
+ public Node findClosestNode(NumberVector<?> vec, int tlevel) {
Node cur = root;
for(int level = 0; level <= tlevel; level++) {
if(cur.children == null) {
@@ -637,26 +649,26 @@ public class ALOCI<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>>
*
* @apiviz.exclude
*/
- public static class Parameterizer<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>> extends AbstractParameterizer {
+ public static class Parameterizer<O extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractParameterizer {
/**
* Parameter to specify the minimum neighborhood size
*/
- public static final OptionID NMIN_ID = OptionID.getOrCreateOptionID("loci.nmin", "Minimum neighborhood size to be considered.");
+ public static final OptionID NMIN_ID = new OptionID("loci.nmin", "Minimum neighborhood size to be considered.");
/**
* Parameter to specify the averaging neighborhood scaling.
*/
- public static final OptionID ALPHA_ID = OptionID.getOrCreateOptionID("loci.alpha", "Scaling factor for averaging neighborhood");
+ public static final OptionID ALPHA_ID = new OptionID("loci.alpha", "Scaling factor for averaging neighborhood");
/**
* Parameter to specify the number of Grids to use.
*/
- public static final OptionID GRIDS_ID = OptionID.getOrCreateOptionID("loci.g", "The number of Grids to use.");
+ public static final OptionID GRIDS_ID = new OptionID("loci.g", "The number of Grids to use.");
/**
* Parameter to specify the seed to initialize Random.
*/
- public static final OptionID SEED_ID = OptionID.getOrCreateOptionID("loci.seed", "The seed to use for initializing Random.");
+ public static final OptionID SEED_ID = new OptionID("loci.seed", "The seed to use for initializing Random.");
/**
* Neighborhood minimum size
@@ -674,9 +686,9 @@ public class ALOCI<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>>
protected int g = 1;
/**
- * Random generator seed
+ * Random generator
*/
- protected Long seed = null;
+ protected RandomFactory rnd;
/**
* The distance function
@@ -702,9 +714,9 @@ public class ALOCI<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>>
this.g = g.getValue();
}
- final LongParameter seedP = new LongParameter(SEED_ID, true);
- if(config.grab(seedP)) {
- this.seed = seedP.getValue();
+ final RandomParameter rndP = new RandomParameter(SEED_ID);
+ if(config.grab(rndP)) {
+ this.rnd = rndP.getValue();
}
final IntParameter alphaP = new IntParameter(ALPHA_ID, 4);
@@ -718,7 +730,7 @@ public class ALOCI<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>>
@Override
protected ALOCI<O, D> makeInstance() {
- return new ALOCI<O, D>(distanceFunction, nmin, alpha, g, seed);
+ return new ALOCI<O, D>(distanceFunction, nmin, alpha, g, rnd);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractAggarwalYuOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractAggarwalYuOutlier.java
index 9c1a216a..2a4885dc 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractAggarwalYuOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractAggarwalYuOutlier.java
@@ -25,28 +25,26 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
import java.util.ArrayList;
import java.util.Collections;
-import java.util.Vector;
import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair;
import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
-import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair;
import de.lmu.ifi.dbs.elki.utilities.pairs.IntIntPair;
/**
@@ -64,19 +62,11 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.IntIntPair;
*
* @author Ahmed Hettab
* @author Erich Schubert
+ *
+ * @param <V> Vector type
*/
@Reference(authors = "C.C. Aggarwal, P. S. Yu", title = "Outlier detection for high dimensional data", booktitle = "Proc. ACM SIGMOD Int. Conf. on Management of Data (SIGMOD 2001), Santa Barbara, CA, 2001", url = "http://dx.doi.org/10.1145/375663.375668")
-public abstract class AbstractAggarwalYuOutlier<V extends NumberVector<?, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
- /**
- * OptionID for the grid size
- */
- public static final OptionID PHI_ID = OptionID.getOrCreateOptionID("ay.phi", "The number of equi-depth grid ranges to use in each dimension.");
-
- /**
- * OptionID for the target dimensionality
- */
- public static final OptionID K_ID = OptionID.getOrCreateOptionID("ay.k", "Subspace dimensionality to search for.");
-
+public abstract class AbstractAggarwalYuOutlier<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
/**
* Symbolic value for subspaces not in use.
*
@@ -86,7 +76,7 @@ public abstract class AbstractAggarwalYuOutlier<V extends NumberVector<?, ?>> ex
public static final int DONT_CARE = 0;
/**
- * The number of partitions for each dimension
+ * The number of partitions for each dimension.
*/
protected int phi;
@@ -112,33 +102,32 @@ public abstract class AbstractAggarwalYuOutlier<V extends NumberVector<?, ?>> ex
* Each attribute of data is divided into phi equi-depth ranges.<br />
* Each range contains a fraction f=1/phi of the records.
*
- * @param database
+ * @param relation Relation to process
* @return range map
*/
- protected ArrayList<ArrayList<DBIDs>> buildRanges(Relation<V> database) {
- final int dim = DatabaseUtil.dimensionality(database);
- final int size = database.size();
- final DBIDs allids = database.getDBIDs();
+ protected ArrayList<ArrayList<DBIDs>> buildRanges(Relation<V> relation) {
+ final int dim = RelationUtil.dimensionality(relation);
+ final int size = relation.size();
+ final DBIDs allids = relation.getDBIDs();
final ArrayList<ArrayList<DBIDs>> ranges = new ArrayList<ArrayList<DBIDs>>();
// Temporary projection storage of the database
- final ArrayList<ArrayList<DoubleObjPair<DBID>>> dbAxis = new ArrayList<ArrayList<DoubleObjPair<DBID>>>(dim);
+ final ArrayList<ArrayList<DoubleDBIDPair>> dbAxis = new ArrayList<ArrayList<DoubleDBIDPair>>(dim);
for(int i = 0; i < dim; i++) {
- ArrayList<DoubleObjPair<DBID>> axis = new ArrayList<DoubleObjPair<DBID>>(size);
+ ArrayList<DoubleDBIDPair> axis = new ArrayList<DoubleDBIDPair>(size);
dbAxis.add(i, axis);
}
// Project
for(DBIDIter iter = allids.iter(); iter.valid(); iter.advance()) {
- DBID id = iter.getDBID();
- final V obj = database.get(id);
- for(int d = 1; d <= dim; d++) {
- dbAxis.get(d - 1).add(new DoubleObjPair<DBID>(obj.doubleValue(d), id));
+ final V obj = relation.get(iter);
+ for(int d = 0; d < dim; d++) {
+ dbAxis.get(d).add(DBIDUtil.newPair(obj.doubleValue(d), iter));
}
}
// Split into cells
final double part = size * 1.0 / phi;
- for(int d = 1; d <= dim; d++) {
- ArrayList<DoubleObjPair<DBID>> axis = dbAxis.get(d - 1);
+ for(int d = 0; d < dim; d++) {
+ ArrayList<DoubleDBIDPair> axis = dbAxis.get(d);
Collections.sort(axis);
ArrayList<DBIDs> dimranges = new ArrayList<DBIDs>(phi + 1);
dimranges.add(allids);
@@ -150,7 +139,7 @@ public abstract class AbstractAggarwalYuOutlier<V extends NumberVector<?, ?>> ex
}
ArrayModifiableDBIDs currange = DBIDUtil.newArray(phi + 1);
for(int i = start; i < end; i++) {
- currange.add(axis.get(i).second);
+ currange.add(axis.get(i));
}
start = end;
dimranges.add(currange);
@@ -161,14 +150,15 @@ public abstract class AbstractAggarwalYuOutlier<V extends NumberVector<?, ?>> ex
}
/**
- * Method to calculate the sparsity coefficient of
+ * Method to calculate the sparsity coefficient of.
*
* @param setsize Size of subset
* @param dbsize Size of database
* @param k Dimensionality
+ * @param phi Phi parameter
* @return sparsity coefficient
*/
- protected double sparsity(final int setsize, final int dbsize, final int k) {
+ protected static double sparsity(final int setsize, final int dbsize, final int k, final double phi) {
// calculate sparsity c
final double f = 1. / phi;
final double fK = Math.pow(f, k);
@@ -177,16 +167,17 @@ public abstract class AbstractAggarwalYuOutlier<V extends NumberVector<?, ?>> ex
}
/**
- * Method to get the ids in the given subspace
+ * Method to get the ids in the given subspace.
*
- * @param subspace
+ * @param subspace Subspace to process
+ * @param ranges List of DBID ranges
* @return ids
*/
- protected DBIDs computeSubspace(Vector<IntIntPair> subspace, ArrayList<ArrayList<DBIDs>> ranges) {
- HashSetModifiableDBIDs ids = DBIDUtil.newHashSet(ranges.get(subspace.get(0).first - 1).get(subspace.get(0).second));
+ protected DBIDs computeSubspace(ArrayList<IntIntPair> subspace, ArrayList<ArrayList<DBIDs>> ranges) {
+ HashSetModifiableDBIDs ids = DBIDUtil.newHashSet(ranges.get(subspace.get(0).first).get(subspace.get(0).second));
// intersect all selected dimensions
for(int i = 1; i < subspace.size(); i++) {
- DBIDs current = ranges.get(subspace.get(i).first - 1).get(subspace.get(i).second);
+ DBIDs current = ranges.get(subspace.get(i).first).get(subspace.get(i).second);
ids.retainAll(current);
if(ids.size() == 0) {
break;
@@ -226,19 +217,37 @@ public abstract class AbstractAggarwalYuOutlier<V extends NumberVector<?, ?>> ex
*
* @apiviz.exclude
*/
- public static abstract class Parameterizer extends AbstractParameterizer {
- protected Integer phi;
+ public abstract static class Parameterizer extends AbstractParameterizer {
+ /**
+ * OptionID for the grid size.
+ */
+ public static final OptionID PHI_ID = new OptionID("ay.phi", "The number of equi-depth grid ranges to use in each dimension.");
+
+ /**
+ * OptionID for the target dimensionality.
+ */
+ public static final OptionID K_ID = new OptionID("ay.k", "Subspace dimensionality to search for.");
+
+ /**
+ * Phi parameter.
+ */
+ protected int phi;
- protected Integer k;
+ /**
+ * k Parameter.
+ */
+ protected int k;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final IntParameter kP = new IntParameter(K_ID, new GreaterEqualConstraint(2));
+ final IntParameter kP = new IntParameter(K_ID);
+ kP.addConstraint(new GreaterEqualConstraint(2));
if(config.grab(kP)) {
k = kP.getValue();
}
- final IntParameter phiP = new IntParameter(PHI_ID, new GreaterEqualConstraint(2));
+ final IntParameter phiP = new IntParameter(PHI_ID);
+ phiP.addConstraint(new GreaterEqualConstraint(2));
if(config.grab(phiP)) {
phi = phiP.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractDBOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractDBOutlier.java
index a5ccce3a..0e6f502a 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractDBOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractDBOutlier.java
@@ -27,7 +27,7 @@ import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
-import de.lmu.ifi.dbs.elki.database.datastore.DataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.DoubleDataStore;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
@@ -56,7 +56,7 @@ public abstract class AbstractDBOutlier<O, D extends Distance<D>> extends Abstra
/**
* Parameter to specify the size of the D-neighborhood
*/
- public static final OptionID D_ID = OptionID.getOrCreateOptionID("dbod.d", "size of the D-neighborhood");
+ public static final OptionID D_ID = new OptionID("dbod.d", "size of the D-neighborhood");
/**
* Holds the value of {@link #D_ID}.
@@ -83,7 +83,7 @@ public abstract class AbstractDBOutlier<O, D extends Distance<D>> extends Abstra
*/
public OutlierResult run(Database database, Relation<O> relation) {
// Run the actual score process
- DataStore<Double> dbodscore = computeOutlierScores(database, relation, d);
+ DoubleDataStore dbodscore = computeOutlierScores(database, relation, d);
// Build result representation.
Relation<Double> scoreResult = new MaterializedRelation<Double>("Density-Based Outlier Detection", "db-outlier", TypeUtil.DOUBLE, dbodscore, relation.getDBIDs());
@@ -99,7 +99,7 @@ public abstract class AbstractDBOutlier<O, D extends Distance<D>> extends Abstra
* @param d distance
* @return computed scores
*/
- protected abstract DataStore<Double> computeOutlierScores(Database database, Relation<O> relation, D d);
+ protected abstract DoubleDataStore computeOutlierScores(Database database, Relation<O> relation, D d);
@Override
public TypeInformation[] getInputTypeRestriction() {
@@ -113,7 +113,7 @@ public abstract class AbstractDBOutlier<O, D extends Distance<D>> extends Abstra
*
* @apiviz.exclude
*/
- public static abstract class Parameterizer<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ public abstract static class Parameterizer<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
/**
* Query radius
*/
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuEvolutionary.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuEvolutionary.java
index 1d02e865..c263cdfa 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuEvolutionary.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuEvolutionary.java
@@ -37,18 +37,18 @@ import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.FormatUtil;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TopBoundedHeap;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
@@ -58,7 +58,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.LongParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
import de.lmu.ifi.dbs.elki.utilities.pairs.FCPair;
import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
@@ -85,40 +85,26 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
@Title("EAFOD: the evolutionary outlier detection algorithm")
@Description("Outlier detection for high dimensional data")
@Reference(authors = "C.C. Aggarwal, P. S. Yu", title = "Outlier detection for high dimensional data", booktitle = "Proc. ACM SIGMOD Int. Conf. on Management of Data (SIGMOD 2001), Santa Barbara, CA, 2001", url = "http://dx.doi.org/10.1145/375663.375668")
-public class AggarwalYuEvolutionary<V extends NumberVector<?, ?>> extends AbstractAggarwalYuOutlier<V> {
+public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractAggarwalYuOutlier<V> {
/**
* The logger for this class.
*/
- protected static final Logging logger = Logging.getLogger(AggarwalYuEvolutionary.class);
-
- /**
- * Parameter to specify the number of solutions must be an integer greater
- * than 1.
- * <p>
- * Key: {@code -eafod.m}
- * </p>
- */
- public static final OptionID M_ID = OptionID.getOrCreateOptionID("ay.m", "Population size for evolutionary algorithm.");
-
- /**
- * Parameter to specify the random generator seed.
- */
- public static final OptionID SEED_ID = OptionID.getOrCreateOptionID("ay.seed", "The random number generator seed.");
+ private static final Logging LOG = Logging.getLogger(AggarwalYuEvolutionary.class);
/**
* Maximum iteration count for evolutionary search.
*/
- protected final int MAX_ITERATIONS = 1000;
+ protected final static int MAX_ITERATIONS = 1000;
/**
- * Holds the value of {@link #M_ID}.
+ * Holds the value of {@link Parameterizer#M_ID}.
*/
private int m;
/**
- * Holds the value of {@link #SEED_ID}.
+ * Random generator.
*/
- private Long seed;
+ private RandomFactory rnd;
/**
* Constructor.
@@ -126,12 +112,12 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?, ?>> extends Abstra
* @param k K
* @param phi Phi
* @param m M
- * @param seed Seed
+ * @param rnd Random generator
*/
- public AggarwalYuEvolutionary(int k, int phi, int m, Long seed) {
+ public AggarwalYuEvolutionary(int k, int phi, int m, RandomFactory rnd) {
super(k, phi);
this.m = m;
- this.seed = seed;
+ this.rnd = rnd;
}
/**
@@ -145,27 +131,25 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?, ?>> extends Abstra
final int dbsize = relation.size();
ArrayList<ArrayList<DBIDs>> ranges = buildRanges(relation);
- Collection<Individuum> individuums = (new EvolutionarySearch(relation, ranges, m, seed)).run();
+ Iterable<Individuum> individuums = (new EvolutionarySearch(relation, ranges, m, rnd.getRandom())).run();
WritableDoubleDataStore outlierScore = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
for(Individuum ind : individuums) {
DBIDs ids = computeSubspaceForGene(ind.getGene(), ranges);
- double sparsityC = sparsity(ids.size(), dbsize, k);
+ double sparsityC = sparsity(ids.size(), dbsize, k, phi);
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
- DBID id = iter.getDBID();
- double prev = outlierScore.doubleValue(id);
+ double prev = outlierScore.doubleValue(iter);
if(Double.isNaN(prev) || sparsityC < prev) {
- outlierScore.putDouble(id, sparsityC);
+ outlierScore.putDouble(iter, sparsityC);
}
}
}
DoubleMinMax minmax = new DoubleMinMax();
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- DBID id = iditer.getDBID();
- double val = outlierScore.doubleValue(id);
+ double val = outlierScore.doubleValue(iditer);
if(Double.isNaN(val)) {
- outlierScore.putDouble(id, 0.0);
+ outlierScore.putDouble(iditer, 0.0);
val = 0.0;
}
minmax.put(val);
@@ -177,7 +161,7 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?, ?>> extends Abstra
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -189,17 +173,17 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?, ?>> extends Abstra
*/
private class EvolutionarySearch {
/**
- * Database size
+ * Database size.
*/
final int dbsize;
/**
- * Database dimensionality
+ * Database dimensionality.
*/
final int dim;
/**
- * Database ranges
+ * Database ranges.
*/
final ArrayList<ArrayList<DBIDs>> ranges;
@@ -209,36 +193,34 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?, ?>> extends Abstra
final int m;
/**
- * random generator
+ * random generator.
*/
final private Random random;
/**
* Constructor.
*
- * @param database Database to use
+ * @param relation Database to use
+ * @param ranges DBID ranges to process
* @param m Population size
- * @param seed Random generator seed
+ * @param random Random generator
*/
- public EvolutionarySearch(Relation<V> database, ArrayList<ArrayList<DBIDs>> ranges, int m, Long seed) {
+ public EvolutionarySearch(Relation<V> relation, ArrayList<ArrayList<DBIDs>> ranges, int m, Random random) {
super();
this.ranges = ranges;
this.m = m;
- this.dbsize = database.size();
- this.dim = DatabaseUtil.dimensionality(database);
- if(seed != null) {
- this.random = new Random(seed);
- }
- else {
- this.random = new Random();
- }
+ this.dbsize = relation.size();
+ this.dim = RelationUtil.dimensionality(relation);
+ this.random = random;
}
- public Collection<Individuum> run() {
+ public Iterable<Individuum> run() {
ArrayList<Individuum> pop = initialPopulation(m);
// best Population
TopBoundedHeap<Individuum> bestSol = new TopBoundedHeap<Individuum>(m, Collections.reverseOrder());
- bestSol.addAll(pop);
+ for (Individuum ind : pop) {
+ bestSol.add(ind);
+ }
int iterations = 0;
while(!checkConvergence(pop)) {
@@ -249,26 +231,29 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?, ?>> extends Abstra
// Mutation with probability 0.25 , 0.25
pop = mutation(pop, 0.5, 0.5);
// Avoid duplicates
- for(Individuum ind : pop) {
- if(!bestSol.contains(ind)) {
- bestSol.add(ind);
+ ind: for(Individuum ind : pop) {
+ for (Individuum b : bestSol) {
+ if (b.equals(ind)) {
+ continue ind;
+ }
}
+ bestSol.add(ind);
}
- if(logger.isDebuggingFinest()) {
- StringBuffer buf = new StringBuffer();
+ if(LOG.isDebuggingFinest()) {
+ StringBuilder buf = new StringBuilder();
buf.append("Top solutions:\n");
for(Individuum ind : bestSol) {
- buf.append(ind.toString()).append("\n");
+ buf.append(ind.toString()).append('\n');
}
buf.append("Population:\n");
for(Individuum ind : pop) {
- buf.append(ind.toString()).append("\n");
+ buf.append(ind.toString()).append('\n');
}
- logger.debugFinest(buf.toString());
+ LOG.debugFinest(buf.toString());
}
iterations++;
if(iterations > MAX_ITERATIONS) {
- logger.warning("Maximum iterations reached.");
+ LOG.warning("Maximum iterations reached.");
break;
}
}
@@ -276,7 +261,10 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?, ?>> extends Abstra
}
/**
- * check the termination criterion
+ * check the termination criterion.
+ *
+ * @param pop Population
+ * @return Convergence
*/
private boolean checkConvergence(Collection<Individuum> pop) {
if(pop.size() == 0) {
@@ -291,7 +279,7 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?, ?>> extends Abstra
for(int d = 0; d < dim; d++) {
int val = gene[d] + DONT_CARE;
if(val < 0 || val >= phi + 1) {
- logger.warning("Invalid gene value encountered: " + val + " in " + ind.toString());
+ LOG.warning("Invalid gene value encountered: " + val + " in " + ind.toString());
continue;
}
occur[d][val] += 1;
@@ -299,8 +287,8 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?, ?>> extends Abstra
}
int conv = (int) (pop.size() * 0.95);
- if(logger.isDebuggingFine()) {
- logger.debugFine("Convergence at " + conv + " of " + pop.size() + " individuums.");
+ if(LOG.isDebuggingFine()) {
+ LOG.debugFine("Convergence at " + conv + " of " + pop.size() + " individuums.");
}
for(int d = 0; d < dim; d++) {
boolean converged = false;
@@ -353,18 +341,21 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?, ?>> extends Abstra
}
/**
+ * Select surviving individuums weighted by rank.
+ *
* the selection criterion for the genetic algorithm: <br>
* roulette wheel mechanism: <br>
* where the probability of sampling an individual of the population was
* proportional to p - r(i), where p is the size of population and r(i) the
* rank of i-th individual
*
- * @param population
+ * @param population Population
+ * @return Survivors
*/
private ArrayList<Individuum> rouletteRankSelection(ArrayList<Individuum> population) {
final int popsize = population.size();
// Relative weight := popsize - position => sum(1..popsize)
- int totalweight = popsize * (popsize + 1) / 2;
+ int totalweight = (popsize * (popsize + 1)) >> 1;
// Survivors
ArrayList<Individuum> survivors = new ArrayList<Individuum>(popsize);
@@ -392,7 +383,7 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?, ?>> extends Abstra
}
/**
- * method implements the mutation algorithm
+ * Apply the mutation alogrithm.
*/
private ArrayList<Individuum> mutation(ArrayList<Individuum> population, double perc1, double perc2) {
// the Mutations
@@ -470,7 +461,7 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?, ?>> extends Abstra
*/
private Individuum makeIndividuum(int[] gene) {
final DBIDs ids = computeSubspaceForGene(gene, ranges);
- final double fitness = (ids.size() > 0) ? sparsity(ids.size(), dbsize, k) : Double.MAX_VALUE;
+ final double fitness = (ids.size() > 0) ? sparsity(ids.size(), dbsize, k, phi) : Double.MAX_VALUE;
return new Individuum(fitness, gene);
}
@@ -543,8 +534,8 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?, ?>> extends Abstra
l1[next] = parent1.getGene()[next];
l2[next] = parent2.getGene()[next];
- final double sparsityL1 = sparsity(computeSubspaceForGene(l1, ranges).size(), dbsize, k);
- final double sparsityL2 = sparsity(computeSubspaceForGene(l2, ranges).size(), dbsize, k);
+ final double sparsityL1 = sparsity(computeSubspaceForGene(l1, ranges).size(), dbsize, k, phi);
+ final double sparsityL2 = sparsity(computeSubspaceForGene(l2, ranges).size(), dbsize, k, phi);
if(sparsityL1 <= sparsityL2) {
b = l1.clone();
@@ -619,6 +610,8 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?, ?>> extends Abstra
* Individuum for the evolutionary search.
*
* @author Erich Schubert
+ *
+ * @apiviz.exclude de.lmu.ifi.dbs.elki.utilities.pairs.FCPair
*/
private static class Individuum extends FCPair<Double, int[]> {
/**
@@ -691,27 +684,42 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?, ?>> extends Abstra
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<?, ?>> extends AbstractAggarwalYuOutlier.Parameterizer {
+ public static class Parameterizer<V extends NumberVector<?>> extends AbstractAggarwalYuOutlier.Parameterizer {
+ /**
+ * Parameter to specify the number of solutions must be an integer greater
+ * than 1.
+ * <p>
+ * Key: {@code -eafod.m}
+ * </p>
+ */
+ public static final OptionID M_ID = new OptionID("ay.m", "Population size for evolutionary algorithm.");
+
+ /**
+ * Parameter to specify the random generator seed.
+ */
+ public static final OptionID SEED_ID = new OptionID("ay.seed", "The random number generator seed.");
+
protected int m = 0;
- protected Long seed = null;
+ protected RandomFactory rnd;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final IntParameter mP = new IntParameter(M_ID, new GreaterEqualConstraint(2));
+ final IntParameter mP = new IntParameter(M_ID);
+ mP.addConstraint(new GreaterEqualConstraint(2));
if(config.grab(mP)) {
m = mP.getValue();
}
- final LongParameter seedP = new LongParameter(SEED_ID, true);
- if(config.grab(seedP)) {
- seed = seedP.getValue();
+ final RandomParameter rndP = new RandomParameter(SEED_ID);
+ if(config.grab(rndP)) {
+ rnd = rndP.getValue();
}
}
@Override
protected AggarwalYuEvolutionary<V> makeInstance() {
- return new AggarwalYuEvolutionary<V>(k, phi, m, seed);
+ return new AggarwalYuEvolutionary<V>(k, phi, m, rnd);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuNaive.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuNaive.java
index 0bb73aba..9cd7d79f 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuNaive.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuNaive.java
@@ -24,7 +24,6 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
*/
import java.util.ArrayList;
-import java.util.Vector;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
@@ -35,12 +34,12 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
@@ -65,16 +64,18 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.IntIntPair;
*
* @author Ahmed Hettab
* @author Erich Schubert
+ *
+ * @param <V> Vector type
*/
// TODO: progress logging!
@Title("BruteForce: Outlier detection for high dimensional data")
@Description("Examines all possible sets of k dimensional projections")
@Reference(authors = "C.C. Aggarwal, P. S. Yu", title = "Outlier detection for high dimensional data", booktitle = "Proc. ACM SIGMOD Int. Conf. on Management of Data (SIGMOD 2001), Santa Barbara, CA, 2001", url = "http://dx.doi.org/10.1145/375663.375668")
-public class AggarwalYuNaive<V extends NumberVector<?, ?>> extends AbstractAggarwalYuOutlier<V> {
+public class AggarwalYuNaive<V extends NumberVector<?>> extends AbstractAggarwalYuOutlier<V> {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(AggarwalYuNaive.class);
+ private static final Logging LOG = Logging.getLogger(AggarwalYuNaive.class);
/**
* Constructor.
@@ -93,23 +94,23 @@ public class AggarwalYuNaive<V extends NumberVector<?, ?>> extends AbstractAggar
* @return Outlier detection result
*/
public OutlierResult run(Relation<V> relation) {
- final int dimensionality = DatabaseUtil.dimensionality(relation);
+ final int dimensionality = RelationUtil.dimensionality(relation);
final int size = relation.size();
ArrayList<ArrayList<DBIDs>> ranges = buildRanges(relation);
- ArrayList<Vector<IntIntPair>> Rk;
+ ArrayList<ArrayList<IntIntPair>> Rk;
// Build a list of all subspaces
{
// R1 initial one-dimensional subspaces.
- Rk = new ArrayList<Vector<IntIntPair>>();
+ Rk = new ArrayList<ArrayList<IntIntPair>>();
// Set of all dim*phi ranges
ArrayList<IntIntPair> q = new ArrayList<IntIntPair>();
- for(int i = 1; i <= dimensionality; i++) {
+ for(int i = 0; i < dimensionality; i++) {
for(int j = 1; j <= phi; j++) {
IntIntPair s = new IntIntPair(i, j);
q.add(s);
// Add to first Rk
- Vector<IntIntPair> v = new Vector<IntIntPair>();
+ ArrayList<IntIntPair> v = new ArrayList<IntIntPair>();
v.add(s);
Rk.add(v);
}
@@ -117,10 +118,10 @@ public class AggarwalYuNaive<V extends NumberVector<?, ?>> extends AbstractAggar
// build Ri
for(int i = 2; i <= k; i++) {
- ArrayList<Vector<IntIntPair>> Rnew = new ArrayList<Vector<IntIntPair>>();
+ ArrayList<ArrayList<IntIntPair>> Rnew = new ArrayList<ArrayList<IntIntPair>>();
for(int j = 0; j < Rk.size(); j++) {
- Vector<IntIntPair> c = Rk.get(j);
+ ArrayList<IntIntPair> c = Rk.get(j);
for(IntIntPair pair : q) {
boolean invalid = false;
for(int t = 0; t < c.size(); t++) {
@@ -130,7 +131,7 @@ public class AggarwalYuNaive<V extends NumberVector<?, ?>> extends AbstractAggar
}
}
if(!invalid) {
- Vector<IntIntPair> neu = new Vector<IntIntPair>(c);
+ ArrayList<IntIntPair> neu = new ArrayList<IntIntPair>(c);
neu.add(pair);
Rnew.add(neu);
}
@@ -142,9 +143,9 @@ public class AggarwalYuNaive<V extends NumberVector<?, ?>> extends AbstractAggar
WritableDoubleDataStore sparsity = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
// calculate the sparsity coefficient
- for(Vector<IntIntPair> sub : Rk) {
+ for(ArrayList<IntIntPair> sub : Rk) {
DBIDs ids = computeSubspace(sub, ranges);
- final double sparsityC = sparsity(ids.size(), size, k);
+ final double sparsityC = sparsity(ids.size(), size, k, phi);
if(sparsityC < 0) {
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
@@ -171,7 +172,7 @@ public class AggarwalYuNaive<V extends NumberVector<?, ?>> extends AbstractAggar
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -181,7 +182,7 @@ public class AggarwalYuNaive<V extends NumberVector<?, ?>> extends AbstractAggar
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<?, ?>> extends AbstractAggarwalYuOutlier.Parameterizer {
+ public static class Parameterizer<V extends NumberVector<?>> extends AbstractAggarwalYuOutlier.Parameterizer {
@Override
protected AggarwalYuNaive<V> makeInstance() {
return new AggarwalYuNaive<V>(k, phi);
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/COP.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/COP.java
new file mode 100644
index 00000000..ac544b7f
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/COP.java
@@ -0,0 +1,385 @@
+package de.lmu.ifi.dbs.elki.algorithm.outlier;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.Arrays;
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.QueryUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAResult;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCARunner;
+import de.lmu.ifi.dbs.elki.math.statistics.distribution.ChiSquaredDistribution;
+import de.lmu.ifi.dbs.elki.math.statistics.distribution.GammaDistribution;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.EnumParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+
+/**
+ * Correlation outlier probability: Outlier Detection in Arbitrarily Oriented
+ * Subspaces
+ *
+ * <p>
+ * Hans-Peter Kriegel, Peer Kröger, Erich Schubert, Arthur Zimek<br />
+ * Outlier Detection in Arbitrarily Oriented Subspaces<br />
+ * in: Proc. IEEE International Conference on Data Mining (ICDM 2012)
+ * </p>
+ *
+ * @author Erich Schubert
+ *
+ * @param <V> the type of NumberVector handled by this Algorithm
+ * @param <D> Distance type
+ */
+@Title("COP: Correlation Outlier Probability")
+@Reference(authors = "Hans-Peter Kriegel, Peer Kröger, Erich Schubert, Arthur Zimek", title = "Outlier Detection in Arbitrarily Oriented Subspaces", booktitle = "Proc. IEEE International Conference on Data Mining (ICDM 2012)")
+public class COP<V extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<V, D, OutlierResult> implements OutlierAlgorithm {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging LOG = Logging.getLogger(COP.class);
+
+ /**
+ * Result name for the COP outlier scores.
+ */
+ public static final String COP_SCORES = "cop-outlier";
+
+ /**
+ * Result name for the dimensionality.
+ */
+ public static final String COP_DIM = "cop-dim";
+
+ /**
+ * Result name for the error vectors.
+ */
+ public static final String COP_ERRORVEC = "cop-errorvec";
+
+ /**
+ * Number of neighbors to be considered.
+ */
+ int k;
+
+ /**
+ * Holds the PCA runner.
+ */
+ private PCARunner<V> pca;
+
+ /**
+ * Expected amount of outliers.
+ */
+ double expect = 0.0001;
+
+ /**
+ * Score type.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public enum DistanceDist {
+ /**
+ * Use chi^2 for score normalization.
+ */
+ CHISQUARED,
+ /**
+ * Use gamma distributions for score normalization.
+ */
+ GAMMA
+ }
+
+ /**
+ * Type of distribution to assume for distances.
+ */
+ DistanceDist dist = DistanceDist.CHISQUARED;
+
+ /**
+ * Constructor.
+ *
+ * @param distanceFunction distance function
+ * @param k number of neighbors
+ * @param pca PCA computation method
+ * @param expect Expected fraction of outliers (for score normalization)
+ * @param dist Distance distribution model (ChiSquared, Gamma)
+ */
+ public COP(DistanceFunction<? super V, D> distanceFunction, int k, PCARunner<V> pca, double expect, DistanceDist dist) {
+ super(distanceFunction);
+ this.k = k;
+ this.pca = pca;
+ this.expect = expect;
+ this.dist = dist;
+ }
+
+ /**
+ * Process a single relation.
+ *
+ * @param relation Relation to process
+ * @return Outlier detection result
+ */
+ public OutlierResult run(Relation<V> relation) {
+ final DBIDs ids = relation.getDBIDs();
+ KNNQuery<V, D> knnQuery = QueryUtil.getKNNQuery(relation, getDistanceFunction(), k + 1);
+
+ final int dim = RelationUtil.dimensionality(relation);
+ if (k <= dim + 1) {
+ LOG.warning("PCA is underspecified with a too low k! k should be at much larger than " + dim);
+ }
+
+ WritableDoubleDataStore cop_score = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
+ WritableDataStore<Vector> cop_err_v = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, Vector.class);
+ WritableIntegerDataStore cop_dim = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, -1);
+ // compute neighbors of each db object
+ FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Correlation Outlier Probabilities", relation.size(), LOG) : null;
+
+ for (DBIDIter id = ids.iter(); id.valid(); id.advance()) {
+ KNNResult<D> neighbors = knnQuery.getKNNForDBID(id, k + 1);
+ ModifiableDBIDs nids = DBIDUtil.newHashSet(neighbors);
+ nids.remove(id); // Do not use query object
+
+ Vector centroid = Centroid.make(relation, nids).toVector(relation).getColumnVector();
+ Vector relative = relation.get(id).getColumnVector().minusEquals(centroid);
+
+ PCAResult pcares = pca.processIds(nids, relation);
+ Matrix evecs = pcares.getEigenvectors();
+ Vector projected = evecs.transposeTimes(relative);
+ double[] evs = pcares.getEigenvalues();
+
+ double min = Double.POSITIVE_INFINITY;
+ int vdim = dim;
+ switch(dist) {
+ case CHISQUARED: {
+ double sqdevs = 0;
+ for (int d = 0; d < dim; d++) {
+ // Scale with Stddev
+ double dev = projected.get(d);
+ // Accumulate
+ sqdevs += dev * dev / evs[d];
+ // Evaluate
+ double score = 1 - ChiSquaredDistribution.cdf(sqdevs, d + 1);
+ if (score < min) {
+ min = score;
+ vdim = d + 1;
+ }
+ }
+ break;
+ }
+ case GAMMA: {
+ double[][] dists = new double[dim][nids.size()];
+ int j = 0;
+ Vector srel = new Vector(dim);
+ for (DBIDIter s = nids.iter(); s.valid() && j < nids.size(); s.advance()) {
+ V vec = relation.get(s);
+ for (int d = 0; d < dim; d++) {
+ srel.set(d, vec.doubleValue(d) - centroid.get(d));
+ }
+ Vector serr = evecs.transposeTimes(srel);
+ double sqdist = 0.0;
+ for (int d = 0; d < dim; d++) {
+ sqdist += serr.get(d) * serr.get(d) / evs[d];
+ dists[d][j] = sqdist;
+ }
+ j++;
+ }
+ double sqdevs = 0;
+ for (int d = 0; d < dim; d++) {
+ // Scale with Stddev
+ final double dev = projected.get(d);
+ // Accumulate
+ sqdevs += dev * dev / evs[d];
+ // Sort, so we can trim the top 15% below.
+ Arrays.sort(dists[d]);
+ // Evaluate
+ double score = 1 - GammaDistribution.estimate(dists[d], (int) (.85 * dists[d].length)).cdf(sqdevs);
+ if (score < min) {
+ min = score;
+ vdim = d + 1;
+ }
+ }
+ break;
+ }
+ }
+ // Normalize the value
+ final double prob = expect * (1 - min) / (expect + min);
+ // Construct the error vector:
+ for (int d = vdim; d < dim; d++) {
+ projected.set(d, 0.0);
+ }
+ Vector ev = evecs.times(projected).timesEquals(-1 * prob);
+
+ cop_score.putDouble(id, prob);
+ cop_err_v.put(id, ev);
+ cop_dim.putInt(id, dim + 1 - vdim);
+
+ if (prog != null) {
+ prog.incrementProcessed(LOG);
+ }
+ }
+ if (prog != null) {
+ prog.ensureCompleted(LOG);
+ }
+
+ // combine results.
+ Relation<Double> scoreResult = new MaterializedRelation<Double>("Correlation Outlier Probabilities", COP_SCORES, TypeUtil.DOUBLE, cop_score, ids);
+ OutlierScoreMeta scoreMeta = new ProbabilisticOutlierScore();
+ OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
+ result.addChildResult(new MaterializedRelation<Integer>("Local Dimensionality", COP_DIM, TypeUtil.INTEGER, cop_dim, ids));
+ result.addChildResult(new MaterializedRelation<Vector>("Error vectors", COP_ERRORVEC, TypeUtil.VECTOR, cop_err_v, ids));
+ return result;
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(TypeUtil.NUMBER_VECTOR_FIELD);
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer<V extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<V, D> {
+ /**
+ * Parameter to specify the number of nearest neighbors of an object to be
+ * considered for computing its COP_SCORE, must be an integer greater than
+ * 0.
+ * <p>
+ * Key: {@code -cop.k}
+ * </p>
+ */
+ public static final OptionID K_ID = new OptionID("cop.k", "The number of nearest neighbors of an object to be considered for computing its COP_SCORE.");
+
+ /**
+ * Distribution assumption for distances.
+ * <p>
+ * Key: {@code -cop.dist}
+ * </p>
+ */
+ public static final OptionID DIST_ID = new OptionID("cop.dist", "The assumed distribution of squared distances. ChiSquared is faster, Gamma expected to be more accurate but could also overfit.");
+
+ /**
+ * Class to compute the PCA with.
+ * <p>
+ * Key: {@code -cop.pcarunner}
+ * </p>
+ */
+ public static final OptionID PCARUNNER_ID = new OptionID("cop.pcarunner", "The class to compute (filtered) PCA.");
+
+ /**
+ * Expected share of outliers.
+ * <p>
+ * Key: {@code -cop.expect}
+ *
+ * Default: 0.001
+ * </p>
+ */
+ public static final OptionID EXPECT_ID = new OptionID("cop.expect", "Expected share of outliers. Only affect score normalization.");
+
+ /**
+ * Number of neighbors to be considered.
+ */
+ int k;
+
+ /**
+ * Holds the object performing the dependency derivation.
+ */
+ PCARunner<V> pca;
+
+ /**
+ * Distance distributution assumption.
+ */
+ DistanceDist dist;
+
+ /**
+ * Expected amount of outliers.
+ */
+ double expect;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ IntParameter kP = new IntParameter(K_ID);
+ kP.addConstraint(new GreaterConstraint(5));
+ if (config.grab(kP)) {
+ k = kP.intValue();
+ }
+ EnumParameter<DistanceDist> distP = new EnumParameter<DistanceDist>(DIST_ID, DistanceDist.class, DistanceDist.GAMMA);
+ if (config.grab(distP)) {
+ dist = distP.getValue();
+ }
+ DoubleParameter expectP = new DoubleParameter(EXPECT_ID, 0.001);
+ expectP.addConstraint(new GreaterConstraint(0));
+ expectP.addConstraint(new LessConstraint(1.0));
+ if (config.grab(expectP)) {
+ expect = expectP.doubleValue();
+ }
+ ObjectParameter<PCARunner<V>> pcaP = new ObjectParameter<PCARunner<V>>(PCARUNNER_ID, PCARunner.class, PCARunner.class);
+ if (config.grab(pcaP)) {
+ pca = pcaP.instantiateClass(config);
+ }
+ }
+
+ @Override
+ protected COP<V, D> makeInstance() {
+ return new COP<V, D>(distanceFunction, k, pca, expect, dist);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierDetection.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierDetection.java
index dbaf8a5a..ba1fd841 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierDetection.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierDetection.java
@@ -24,17 +24,17 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
*/
import de.lmu.ifi.dbs.elki.database.Database;
-import de.lmu.ifi.dbs.elki.database.datastore.DataStore;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.DoubleDataStore;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
@@ -72,13 +72,13 @@ public class DBOutlierDetection<O, D extends Distance<D>> extends AbstractDBOutl
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(DBOutlierDetection.class);
+ private static final Logging LOG = Logging.getLogger(DBOutlierDetection.class);
/**
* Parameter to specify the minimum fraction of objects that must be outside
* the D- neighborhood of an outlier
*/
- public static final OptionID P_ID = OptionID.getOrCreateOptionID("dbod.p", "minimum fraction of objects that must be outside the D-neighborhood of an outlier");
+ public static final OptionID P_ID = new OptionID("dbod.p", "minimum fraction of objects that must be outside the D-neighborhood of an outlier");
/**
* Holds the value of {@link #P_ID}.
@@ -98,7 +98,7 @@ public class DBOutlierDetection<O, D extends Distance<D>> extends AbstractDBOutl
}
@Override
- protected DataStore<Double> computeOutlierScores(Database database, Relation<O> relation, D neighborhoodSize) {
+ protected DoubleDataStore computeOutlierScores(Database database, Relation<O> relation, D neighborhoodSize) {
DistanceQuery<O, D> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
KNNQuery<O, D> knnQuery = database.getKNNQuery(distFunc, DatabaseQuery.HINT_OPTIMIZED_ONLY);
@@ -106,11 +106,11 @@ public class DBOutlierDetection<O, D extends Distance<D>> extends AbstractDBOutl
int m = (int) ((distFunc.getRelation().size()) * (1 - p));
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(distFunc.getRelation().getDBIDs(), DataStoreFactory.HINT_STATIC);
- if(logger.isVerbose()) {
- logger.verbose("computing outlier flag");
+ if(LOG.isVerbose()) {
+ LOG.verbose("computing outlier flag");
}
- FiniteProgress progressOFlags = logger.isVerbose() ? new FiniteProgress("DBOutlier for objects", distFunc.getRelation().size(), logger) : null;
+ FiniteProgress progressOFlags = LOG.isVerbose() ? new FiniteProgress("DBOutlier for objects", distFunc.getRelation().size(), LOG) : null;
int counter = 0;
// if index exists, kNN query. if the distance to the mth nearest neighbor
// is more than d -> object is outlier
@@ -118,8 +118,8 @@ public class DBOutlierDetection<O, D extends Distance<D>> extends AbstractDBOutl
for(DBIDIter iditer = distFunc.getRelation().iterDBIDs(); iditer.valid(); iditer.advance()) {
counter++;
final KNNResult<D> knns = knnQuery.getKNNForDBID(iditer, m);
- if(logger.isDebugging()) {
- logger.debugFine("distance to mth nearest neighbour" + knns.toString());
+ if(LOG.isDebugging()) {
+ LOG.debugFine("distance to mth nearest neighbour" + knns.toString());
}
if(knns.get(Math.min(m, knns.size()) - 1).getDistance().compareTo(neighborhoodSize) <= 0) {
// flag as outlier
@@ -131,7 +131,7 @@ public class DBOutlierDetection<O, D extends Distance<D>> extends AbstractDBOutl
}
}
if(progressOFlags != null) {
- progressOFlags.setProcessed(counter, logger);
+ progressOFlags.setProcessed(counter, LOG);
}
}
else {
@@ -149,18 +149,18 @@ public class DBOutlierDetection<O, D extends Distance<D>> extends AbstractDBOutl
}
if(progressOFlags != null) {
- progressOFlags.setProcessed(counter, logger);
+ progressOFlags.setProcessed(counter, LOG);
}
}
if(progressOFlags != null) {
- progressOFlags.ensureCompleted(logger);
+ progressOFlags.ensureCompleted(LOG);
}
return scores;
}
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierScore.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierScore.java
index 419b9a0e..a2d39130 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierScore.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierScore.java
@@ -24,9 +24,9 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
*/
import de.lmu.ifi.dbs.elki.database.Database;
-import de.lmu.ifi.dbs.elki.database.datastore.DataStore;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.DoubleDataStore;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
@@ -60,7 +60,7 @@ public class DBOutlierScore<O, D extends Distance<D>> extends AbstractDBOutlier<
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(DBOutlierScore.class);
+ private static final Logging LOG = Logging.getLogger(DBOutlierScore.class);
/**
* Constructor with parameters.
@@ -73,7 +73,7 @@ public class DBOutlierScore<O, D extends Distance<D>> extends AbstractDBOutlier<
}
@Override
- protected DataStore<Double> computeOutlierScores(Database database, Relation<O> relation, D d) {
+ protected DoubleDataStore computeOutlierScores(Database database, Relation<O> relation, D d) {
DistanceQuery<O, D> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
RangeQuery<O, D> rangeQuery = database.getRangeQuery(distFunc);
final double size = distFunc.getRelation().size();
@@ -90,7 +90,7 @@ public class DBOutlierScore<O, D extends Distance<D>> extends AbstractDBOutlier<
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/EMOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/EMOutlier.java
index db4b7782..2d2a4466 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/EMOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/EMOutlier.java
@@ -62,11 +62,11 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameteriz
// TODO: re-use an existing EM when present?
@Title("EM Outlier: Outlier Detection based on the generic EM clustering")
@Description("The outlier score assigned is based on the highest cluster probability obtained from EM clustering.")
-public class EMOutlier<V extends NumberVector<V, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
+public class EMOutlier<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(EMOutlier.class);
+ private static final Logging LOG = Logging.getLogger(EMOutlier.class);
/**
* Inner algorithm.
@@ -120,7 +120,7 @@ public class EMOutlier<V extends NumberVector<V, ?>> extends AbstractAlgorithm<O
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -130,7 +130,7 @@ public class EMOutlier<V extends NumberVector<V, ?>> extends AbstractAlgorithm<O
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractParameterizer {
+ public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
protected EM<V> em = null;
@Override
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianModel.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianModel.java
index 51833c8b..6aed60fe 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianModel.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianModel.java
@@ -29,10 +29,10 @@ import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.math.MathUtil;
@@ -43,7 +43,6 @@ import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
@@ -61,16 +60,16 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag;
*/
@Title("Gaussian Model Outlier Detection")
@Description("Fit a multivariate gaussian model onto the data, and use the PDF to compute an outlier score.")
-public class GaussianModel<V extends NumberVector<V, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
+public class GaussianModel<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(GaussianModel.class);
+ private static final Logging LOG = Logging.getLogger(GaussianModel.class);
/**
* OptionID for inversion flag.
*/
- public static final OptionID INVERT_ID = OptionID.getOrCreateOptionID("gaussod.invert", "Invert the value range to [0:1], with 1 being outliers instead of 0.");
+ public static final OptionID INVERT_ID = new OptionID("gaussod.invert", "Invert the value range to [0:1], with 1 being outliers instead of 0.");
/**
* Small value to increment diagonally of a matrix in order to avoid
@@ -113,7 +112,7 @@ public class GaussianModel<V extends NumberVector<V, ?>> extends AbstractAlgorit
Matrix covarianceTransposed = covarianceMatrix.cheatToAvoidSingularity(SINGULARITY_CHEAT).inverse();
// Normalization factors for Gaussian PDF
- final double fakt = (1.0 / (Math.sqrt(Math.pow(MathUtil.TWOPI, DatabaseUtil.dimensionality(relation)) * covarianceMatrix.det())));
+ final double fakt = (1.0 / (Math.sqrt(Math.pow(MathUtil.TWOPI, RelationUtil.dimensionality(relation)) * covarianceMatrix.det())));
// for each object compute Mahalanobis distance
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
@@ -130,8 +129,7 @@ public class GaussianModel<V extends NumberVector<V, ?>> extends AbstractAlgorit
if(invert) {
double max = mm.getMax() != 0 ? mm.getMax() : 1.;
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- DBID id = iditer.getDBID();
- oscores.putDouble(id, (max - oscores.doubleValue(id)) / max);
+ oscores.putDouble(iditer, (max - oscores.doubleValue(iditer)) / max);
}
meta = new BasicOutlierScoreMeta(0.0, 1.0);
}
@@ -149,7 +147,7 @@ public class GaussianModel<V extends NumberVector<V, ?>> extends AbstractAlgorit
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -159,7 +157,7 @@ public class GaussianModel<V extends NumberVector<V, ?>> extends AbstractAlgorit
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractParameterizer {
+ public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
protected boolean invert = false;
@Override
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianUniformMixture.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianUniformMixture.java
index 1cd31442..db53a3ef 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianUniformMixture.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianUniformMixture.java
@@ -32,13 +32,13 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.generic.MaskedDBIDs;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.math.MathUtil;
@@ -48,7 +48,6 @@ import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
@@ -79,21 +78,21 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
@Title("Gaussian-Uniform Mixture Model Outlier Detection")
@Description("Fits a mixture model consisting of a Gaussian and a uniform distribution to the data.")
@Reference(prefix = "Generalization using the likelihood gain as outlier score of", authors = "Eskin, Eleazar", title = "Anomaly detection over noisy data using learned probability distributions", booktitle = "Proc. of the Seventeenth International Conference on Machine Learning (ICML-2000)")
-public class GaussianUniformMixture<V extends NumberVector<V, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
+public class GaussianUniformMixture<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(GaussianUniformMixture.class);
+ private static final Logging LOG = Logging.getLogger(GaussianUniformMixture.class);
/**
* Parameter to specify the fraction of expected outliers.
*/
- public static final OptionID L_ID = OptionID.getOrCreateOptionID("mmo.l", "expected fraction of outliers");
+ public static final OptionID L_ID = new OptionID("mmo.l", "expected fraction of outliers");
/**
* Parameter to specify the cutoff.
*/
- public static final OptionID C_ID = OptionID.getOrCreateOptionID("mmo.c", "cutoff");
+ public static final OptionID C_ID = new OptionID("mmo.c", "cutoff");
/**
* Small value to increment diagonally of a matrix in order to avoid
@@ -154,20 +153,19 @@ public class GaussianUniformMixture<V extends NumberVector<V, ?>> extends Abstra
// logger.debugFine(logLike + " loglike beginning" +
// loglikelihoodNormal(normalObjs, database));
DoubleMinMax minmax = new DoubleMinMax();
- for(int i = 0; i < objids.size(); i++) {
+
+ DBIDIter iter = objids.iter();
+ for(int i = 0; i < objids.size(); i++, iter.advance()) {
// logger.debugFine("i " + i);
// Change mask to make the current object anomalous
bits.set(i);
// Compute new likelihoods
double currentLogLike = normalObjs.size() * logml + loglikelihoodNormal(normalObjs, relation) + anomalousObjs.size() * logl + loglikelihoodAnomalous(anomalousObjs);
- // Get the actual object id
- DBID curid = objids.get(i);
-
// if the loglike increases more than a threshold, object stays in
// anomalous set and is flagged as outlier
final double loglikeGain = currentLogLike - logLike;
- oscores.putDouble(curid, loglikeGain);
+ oscores.putDouble(iter, loglikeGain);
minmax.put(loglikeGain);
if(loglikeGain > c) {
@@ -221,7 +219,7 @@ public class GaussianUniformMixture<V extends NumberVector<V, ?>> extends Abstra
Matrix covInv = covarianceMatrix.cheatToAvoidSingularity(SINGULARITY_CHEAT).inverse();
double covarianceDet = covarianceMatrix.det();
- double fakt = 1.0 / Math.sqrt(Math.pow(MathUtil.TWOPI, DatabaseUtil.dimensionality(database)) * covarianceDet);
+ double fakt = 1.0 / Math.sqrt(Math.pow(MathUtil.TWOPI, RelationUtil.dimensionality(database)) * covarianceDet);
// for each object compute probability and sum
double prob = 0;
for (DBIDIter iter = objids.iter(); iter.valid(); iter.advance()) {
@@ -239,7 +237,7 @@ public class GaussianUniformMixture<V extends NumberVector<V, ?>> extends Abstra
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -249,7 +247,7 @@ public class GaussianUniformMixture<V extends NumberVector<V, ?>> extends Abstra
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractParameterizer {
+ public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
protected double l = 1E-7;
protected double c = 0;
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/HilOut.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/HilOut.java
index 4ed56e1a..15f6cbf3 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/HilOut.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/HilOut.java
@@ -36,13 +36,15 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDFactory;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDistanceDBIDPair;
import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.query.DoubleDistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.LPNormDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
@@ -91,11 +93,11 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
@Title("Fast Outlier Detection in High Dimensional Spaces")
@Description("Algorithm to compute outliers using Hilbert space filling curves")
@Reference(authors = "F. Angiulli, C. Pizzuti", title = "Fast Outlier Detection in High Dimensional Spaces", booktitle = "Proc. European Conference on Principles of Knowledge Discovery and Data Mining (PKDD'02)", url = "http://dx.doi.org/10.1145/375663.375668")
-public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedAlgorithm<O, DoubleDistance, OutlierResult> implements OutlierAlgorithm {
+public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgorithm<O, DoubleDistance, OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(HilOut.class);
+ private static final Logging LOG = Logging.getLogger(HilOut.class);
/**
* Number of nearest neighbors
@@ -170,7 +172,7 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA
public OutlierResult run(Database database, Relation<O> relation) {
distq = database.getDistanceQuery(relation, getDistanceFunction());
- d = DatabaseUtil.dimensionality(relation);
+ d = RelationUtil.dimensionality(relation);
WritableDoubleDataStore hilout_weight = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
// Compute extend of dataset.
@@ -181,18 +183,18 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA
min = new double[d];
double[] max = new double[d];
for(int i = 0; i < d; i++) {
- min[i] = hbbs.first.doubleValue(i + 1);
- max[i] = hbbs.second.doubleValue(i + 1);
+ min[i] = hbbs.first.doubleValue(i);
+ max[i] = hbbs.second.doubleValue(i);
diameter = Math.max(diameter, max[i] - min[i]);
}
// Enlarge bounding box to have equal lengths.
for(int i = 0; i < d; i++) {
- double diff = (diameter - (max[i] - min[i])) / 2;
+ double diff = (diameter - (max[i] - min[i])) * .5;
min[i] -= diff;
max[i] += diff;
}
- if(logger.isVerbose()) {
- logger.verbose("Rescaling dataset by " + (1 / diameter) + " to fit the unit cube.");
+ if(LOG.isVerbose()) {
+ LOG.verbose("Rescaling dataset by " + (1 / diameter) + " to fit the unit cube.");
}
}
@@ -200,8 +202,8 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA
capital_n_star = capital_n = relation.size();
HilbertFeatures h = new HilbertFeatures(relation, min, diameter);
- FiniteProgress progressHilOut = logger.isVerbose() ? new FiniteProgress("HilOut iterations", d + 1, logger) : null;
- FiniteProgress progressTrueOut = logger.isVerbose() ? new FiniteProgress("True outliers found", n, logger) : null;
+ FiniteProgress progressHilOut = LOG.isVerbose() ? new FiniteProgress("HilOut iterations", d + 1, LOG) : null;
+ FiniteProgress progressTrueOut = LOG.isVerbose() ? new FiniteProgress("True outliers found", n, LOG) : null;
// Main part: 1. Phase max. d+1 loops
for(int j = 0; j <= d && n_star < n; j++) {
// initialize (clear) out and wlb - not 100% clear in the paper
@@ -214,7 +216,7 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA
// determine the true outliers (n_star)
trueOutliers(h);
if(progressTrueOut != null) {
- progressTrueOut.setProcessed(n_star, logger);
+ progressTrueOut.setProcessed(n_star, LOG);
}
// Build the top Set as out + wlb
h.top.clear();
@@ -230,7 +232,7 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA
}
}
if(progressHilOut != null) {
- progressHilOut.incrementProcessed(logger);
+ progressHilOut.incrementProcessed(LOG);
}
}
// 2. Phase: Additional Scan if less than n true outliers determined
@@ -241,12 +243,12 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA
scan(h, capital_n);
}
if(progressHilOut != null) {
- progressHilOut.setProcessed(d, logger);
- progressHilOut.ensureCompleted(logger);
+ progressHilOut.setProcessed(d, LOG);
+ progressHilOut.ensureCompleted(LOG);
}
if(progressTrueOut != null) {
- progressTrueOut.setProcessed(n, logger);
- progressTrueOut.ensureCompleted(logger);
+ progressTrueOut.setProcessed(n, LOG);
+ progressTrueOut.ensureCompleted(LOG);
}
DoubleMinMax minmax = new DoubleMinMax();
// Return weights in out
@@ -281,8 +283,8 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA
*/
private void scan(HilbertFeatures hf, int k0) {
final int mink0 = Math.min(2 * k0, capital_n - 1);
- if(logger.isDebuggingFine()) {
- logger.debugFine("Scanning with k0=" + k0 + " (" + mink0 + ")" + " N*=" + capital_n_star);
+ if(LOG.isDebuggingFine()) {
+ LOG.debugFine("Scanning with k0=" + k0 + " (" + mink0 + ")" + " N*=" + capital_n_star);
}
for(int i = 0; i < hf.pf.length; i++) {
if(hf.pf[i].ubound < omega_star) {
@@ -366,7 +368,7 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA
if(mlevel < level) {
level = mlevel;
final double delta = hf.minDistLevel(hf.pf[i].id, level);
- if(delta >= hf.pf[i].nn.peek().getDoubleDistance()) {
+ if(delta >= hf.pf[i].nn.peek().doubleDistance()) {
break; // stop = true
}
}
@@ -376,10 +378,10 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA
double br = hf.boxRadius(i, a - 1, b + 1);
double newlb = 0.0;
double newub = 0.0;
- for(DoubleDistanceResultPair entry : hf.pf[i].nn) {
- newub += entry.getDoubleDistance();
- if(entry.getDoubleDistance() <= br) {
- newlb += entry.getDoubleDistance();
+ for(DoubleDistanceDBIDPair entry : hf.pf[i].nn) {
+ newub += entry.doubleDistance();
+ if(entry.doubleDistance() <= br) {
+ newlb += entry.doubleDistance();
}
}
if(newlb > hf.pf[i].lbound) {
@@ -408,7 +410,7 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
@Override
@@ -482,7 +484,7 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA
int pos = 0;
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- pf[pos++] = new HilFeature(iditer.getDBID(), new Heap<DoubleDistanceResultPair>(k, Collections.reverseOrder()));
+ pf[pos++] = new HilFeature(DBIDUtil.deref(iditer), new Heap<DoubleDistanceDBIDPair>(k, Collections.reverseOrder()));
}
this.out = new Heap<HilFeature>(n, new Comparator<HilFeature>() {
@Override
@@ -513,7 +515,7 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA
if(h >= 32) { // 32 to 63 bit
final long scale = Long.MAX_VALUE; // = 63 bits
for(int i = 0; i < pf.length; i++) {
- NumberVector<?, ?> obj = relation.get(pf[i].id);
+ NumberVector<?> obj = relation.get(pf[i].id);
long[] coord = new long[d];
for(int dim = 0; dim < d; dim++) {
coord[dim] = (long) (getDimForObject(obj, dim) * .5 * scale);
@@ -524,7 +526,7 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA
else if(h >= 16) { // 16-31 bit
final int scale = ~1 >>> 1;
for(int i = 0; i < pf.length; i++) {
- NumberVector<?, ?> obj = relation.get(pf[i].id);
+ NumberVector<?> obj = relation.get(pf[i].id);
int[] coord = new int[d];
for(int dim = 0; dim < d; dim++) {
coord[dim] = (int) (getDimForObject(obj, dim) * .5 * scale);
@@ -535,7 +537,7 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA
else if(h >= 8) { // 8-15 bit
final int scale = ~1 >>> 16;
for(int i = 0; i < pf.length; i++) {
- NumberVector<?, ?> obj = relation.get(pf[i].id);
+ NumberVector<?> obj = relation.get(pf[i].id);
short[] coord = new short[d];
for(int dim = 0; dim < d; dim++) {
coord[dim] = (short) (getDimForObject(obj, dim) * .5 * scale);
@@ -546,7 +548,7 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA
else { // 1-7 bit
final int scale = ~1 >>> 8;
for(int i = 0; i < pf.length; i++) {
- NumberVector<?, ?> obj = relation.get(pf[i].id);
+ NumberVector<?> obj = relation.get(pf[i].id);
byte[] coord = new byte[d];
for(int dim = 0; dim < d; dim++) {
coord[dim] = (byte) (getDimForObject(obj, dim) * .5 * scale);
@@ -575,15 +577,13 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA
*/
private void updateOUT(int i) {
if(out.size() < n) {
- out.offer(pf[i]);
+ out.add(pf[i]);
}
else {
HilFeature head = out.peek();
if(pf[i].ubound > head.ubound) {
// replace smallest
- out.poll();
- // assert (out.peek().ubound >= head.ubound);
- out.offer(pf[i]);
+ out.replaceTopElement(pf[i]);
}
}
}
@@ -595,15 +595,13 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA
*/
private void updateWLB(int i) {
if(wlb.size() < n) {
- wlb.offer(pf[i]);
+ wlb.add(pf[i]);
}
else {
HilFeature head = wlb.peek();
if(pf[i].lbound > head.lbound) {
// replace smallest
- wlb.poll();
- // assert (wlb.peek().lbound >= head.lbound);
- wlb.offer(pf[i]);
+ wlb.replaceTopElement(pf[i]);
}
}
}
@@ -639,7 +637,7 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA
* @param level Level of the corresponding r-region
*/
private double minDistLevel(DBID id, int level) {
- final NumberVector<?, ?> obj = relation.get(id);
+ final NumberVector<?> obj = relation.get(id);
// level 1 is supposed to have r=1 as in the original publication
// 2 ^ - (level - 1)
final double r = 1.0 / (1 << (level - 1));
@@ -659,7 +657,7 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA
* @param level Level of the corresponding r-region
*/
private double maxDistLevel(DBID id, int level) {
- final NumberVector<?, ?> obj = relation.get(id);
+ final NumberVector<?> obj = relation.get(id);
// level 1 is supposed to have r=1 as in the original publication
final double r = 1.0 / (1 << (level - 1));
double dist;
@@ -780,8 +778,8 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA
* @param dim Dimension
* @return Projected and shifted position
*/
- private double getDimForObject(NumberVector<?, ?> obj, int dim) {
- return (obj.doubleValue(dim + 1) - min[dim]) / diameter + shift;
+ private double getDimForObject(NumberVector<?> obj, int dim) {
+ return (obj.doubleValue(dim) - min[dim]) / diameter + shift;
}
}
@@ -824,7 +822,7 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA
/**
* Heap with the nearest known neighbors
*/
- public Heap<DoubleDistanceResultPair> nn;
+ public Heap<DoubleDistanceDBIDPair> nn;
/**
* Set representation of the nearest neighbors for faster lookups
@@ -842,7 +840,7 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA
* @param id Object ID
* @param nn Heap for neighbors
*/
- public HilFeature(DBID id, Heap<DoubleDistanceResultPair> nn) {
+ public HilFeature(DBID id, Heap<DoubleDistanceDBIDPair> nn) {
super();
this.id = id;
this.nn = nn;
@@ -864,27 +862,26 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA
protected void insert(DBID id, double dt, int k) {
// assert (!nn_keys.contains(id));
if(nn.size() < k) {
- DoubleDistanceResultPair entry = new DoubleDistanceResultPair(dt, id);
- nn.offer(entry);
+ DoubleDistanceDBIDPair entry = DBIDFactory.FACTORY.newDistancePair(dt, id);
+ nn.add(entry);
nn_keys.add(id);
sum_nn += dt;
}
else {
- DoubleDistanceResultPair head = nn.peek();
- if(dt < head.getDoubleDistance()) {
+ DoubleDistanceDBIDPair head = nn.peek();
+ if(dt < head.doubleDistance()) {
head = nn.poll(); // Remove worst
- sum_nn -= head.getDoubleDistance();
- nn_keys.remove(head.getDBID());
+ sum_nn -= head.doubleDistance();
+ nn_keys.remove(head);
- // assert (nn.peek().getDoubleDistance() <= head.getDoubleDistance());
+ // assert (nn.peek().doubleDistance() <= head.doubleDistance());
- DoubleDistanceResultPair entry = new DoubleDistanceResultPair(dt, id);
- nn.offer(entry);
+ DoubleDistanceDBIDPair entry = DBIDFactory.FACTORY.newDistancePair(dt, id);
+ nn.add(entry);
nn_keys.add(id);
sum_nn += dt;
}
}
-
}
}
@@ -897,33 +894,33 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA
*
* @param <O> Vector type
*/
- public static class Parameterizer<O extends NumberVector<O, ?>> extends AbstractParameterizer {
+ public static class Parameterizer<O extends NumberVector<?>> extends AbstractParameterizer {
/**
* Parameter to specify how many next neighbors should be used in the
* computation
*/
- public static final OptionID K_ID = OptionID.getOrCreateOptionID("HilOut.k", "Compute up to k next neighbors");
+ public static final OptionID K_ID = new OptionID("HilOut.k", "Compute up to k next neighbors");
/**
* Parameter to specify how many outliers should be computed
*/
- public static final OptionID N_ID = OptionID.getOrCreateOptionID("HilOut.n", "Compute n outliers");
+ public static final OptionID N_ID = new OptionID("HilOut.n", "Compute n outliers");
/**
* Parameter to specify the maximum Hilbert-Level
*/
- public static final OptionID H_ID = OptionID.getOrCreateOptionID("HilOut.h", "Max. Hilbert-Level");
+ public static final OptionID H_ID = new OptionID("HilOut.h", "Max. Hilbert-Level");
/**
* Parameter to specify p of LP-NormDistance
*/
- public static final OptionID T_ID = OptionID.getOrCreateOptionID("HilOut.t", "t of Lt Metric");
+ public static final OptionID T_ID = new OptionID("HilOut.t", "t of Lt Metric");
/**
* Parameter to specify if only the Top n, or also approximations for the
* other elements, should be returned
*/
- public static final OptionID TN_ID = OptionID.getOrCreateOptionID("HilOut.tn", "output of Top n or all elements");
+ public static final OptionID TN_ID = new OptionID("HilOut.tn", "output of Top n or all elements");
/**
* Neighborhood size
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/INFLO.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/INFLO.java
index 1fe5fe71..655a0910 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/INFLO.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/INFLO.java
@@ -1,26 +1,27 @@
package de.lmu.ifi.dbs.elki.algorithm.outlier;
-/*
-This file is part of ELKI:
-Environment for Developing KDD-Applications Supported by Index-Structures
-
-Copyright (C) 2012
-Ludwig-Maximilians-Universität München
-Lehr- und Forschungseinheit für Datenbanksysteme
-ELKI Development Team
-
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU Affero General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU Affero General Public License for more details.
-
-You should have received a copy of the GNU Affero General Public License
-along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
@@ -36,10 +37,10 @@ import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
@@ -81,7 +82,7 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(INFLO.class);
+ private static final Logging LOG = Logging.getLogger(INFLO.class);
/**
* Parameter to specify if any object is a Core Object must be a double
@@ -89,7 +90,7 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa
* <p>
* see paper "Two-way search method" 3.2
*/
- public static final OptionID M_ID = OptionID.getOrCreateOptionID("inflo.m", "The threshold");
+ public static final OptionID M_ID = new OptionID("inflo.m", "The threshold");
/**
* Holds the value of {@link #M_ID}.
@@ -101,7 +102,7 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa
* considered for computing its INFLO_SCORE. must be an integer greater than
* 1.
*/
- public static final OptionID K_ID = OptionID.getOrCreateOptionID("inflo.k", "The number of nearest neighbors of an object to be considered for computing its INFLO_SCORE.");
+ public static final OptionID K_ID = new OptionID("inflo.k", "The number of nearest neighbors of an object to be considered for computing its INFLO_SCORE.");
/**
* Holds the value of {@link #K_ID}.
@@ -140,7 +141,7 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa
// density
WritableDoubleDataStore density = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT);
// init knns and rnns
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
knns.put(iditer, DBIDUtil.newArray());
rnns.put(iditer, DBIDUtil.newArray());
}
@@ -148,38 +149,34 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa
// TODO: use kNN preprocessor?
KNNQuery<O, D> knnQuery = database.getKNNQuery(distFunc, k, DatabaseQuery.HINT_HEAVY_USE);
- for(DBIDIter id = relation.iterDBIDs(); id.valid(); id.advance()) {
+ for (DBIDIter id = relation.iterDBIDs(); id.valid(); id.advance()) {
// if not visited count=0
int count = rnns.get(id).size();
- ModifiableDBIDs s;
- if(!processedIDs.contains(id)) {
+ if (!processedIDs.contains(id)) {
// TODO: use exactly k neighbors?
KNNResult<D> list = knnQuery.getKNNForDBID(id, k);
- knns.get(id).addDBIDs(list.asDBIDs());
+ knns.get(id).addDBIDs(list);
processedIDs.add(id);
- s = knns.get(id);
- density.putDouble(id, 1 / list.get(k - 1).getDistance().doubleValue());
+ density.putDouble(id, 1 / list.getKNNDistance().doubleValue());
}
- else {
- s = knns.get(id);
- }
- for (DBIDIter q = s.iter(); q.valid(); q.advance()) {
- if(!processedIDs.contains(q)) {
+ ModifiableDBIDs s = knns.get(id);
+ for (DBIDIter q = knns.get(id).iter(); q.valid(); q.advance()) {
+ if (!processedIDs.contains(q)) {
// TODO: use exactly k neighbors?
KNNResult<D> listQ = knnQuery.getKNNForDBID(q, k);
- knns.get(q).addDBIDs(listQ.asDBIDs());
+ knns.get(q).addDBIDs(listQ);
density.putDouble(q, 1 / listQ.getKNNDistance().doubleValue());
processedIDs.add(q);
}
- if(knns.get(q).contains(id)) {
+ if (knns.get(q).contains(id)) {
rnns.get(q).add(id);
rnns.get(id).add(q);
count++;
}
}
- if(count >= s.size() * m) {
+ if (count >= s.size() * m) {
pruned.add(id);
}
}
@@ -188,8 +185,8 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa
// IF Object is pruned INFLO=1.0
DoubleMinMax inflominmax = new DoubleMinMax();
WritableDoubleDataStore inflos = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
- for(DBIDIter id = relation.iterDBIDs(); id.valid(); id.advance()) {
- if(!pruned.contains(id)) {
+ for (DBIDIter id = relation.iterDBIDs(); id.valid(); id.advance()) {
+ if (!pruned.contains(id)) {
ModifiableDBIDs knn = knns.get(id);
ModifiableDBIDs rnn = rnns.get(id);
@@ -205,7 +202,7 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa
inflominmax.put(den);
}
- if(pruned.contains(id)) {
+ if (pruned.contains(id)) {
inflos.putDouble(id, 1.0);
inflominmax.put(1.0);
}
@@ -224,15 +221,15 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
- * Parameterization class.
- *
- * @author Erich Schubert
- *
- * @apiviz.exclude
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
*/
public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
protected double m = 1.0;
@@ -242,14 +239,16 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final DoubleParameter mP = new DoubleParameter(M_ID, new GreaterConstraint(0.0), 1.0);
- if(config.grab(mP)) {
- m = mP.getValue();
+ final DoubleParameter mP = new DoubleParameter(M_ID, 1.0);
+ mP.addConstraint(new GreaterConstraint(0.0));
+ if (config.grab(mP)) {
+ m = mP.doubleValue();
}
- final IntParameter kP = new IntParameter(K_ID, new GreaterConstraint(1));
- if(config.grab(kP)) {
- k = kP.getValue();
+ final IntParameter kP = new IntParameter(K_ID);
+ kP.addConstraint(new GreaterConstraint(1));
+ if (config.grab(kP)) {
+ k = kP.intValue();
}
}
@@ -258,4 +257,4 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa
return new INFLO<O, D>(distanceFunction, m, k);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNOutlier.java
index 08be944a..4c4873dd 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNOutlier.java
@@ -1,26 +1,27 @@
package de.lmu.ifi.dbs.elki.algorithm.outlier;
-/*
-This file is part of ELKI:
-Environment for Developing KDD-Applications Supported by Index-Structures
-
-Copyright (C) 2012
-Ludwig-Maximilians-Universität München
-Lehr- und Forschungseinheit für Datenbanksysteme
-ELKI Development Team
-
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU Affero General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU Affero General Public License for more details.
-
-You should have received a copy of the GNU Affero General Public License
-along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
@@ -32,10 +33,11 @@ import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceKNNList;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
@@ -77,12 +79,12 @@ public class KNNOutlier<O, D extends NumberDistance<D, ?>> extends AbstractDista
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(KNNOutlier.class);
+ private static final Logging LOG = Logging.getLogger(KNNOutlier.class);
/**
* Parameter to specify the k nearest neighbor
*/
- public static final OptionID K_ID = OptionID.getOrCreateOptionID("knno.k", "k nearest neighbor");
+ public static final OptionID K_ID = new OptionID("knno.k", "k nearest neighbor");
/**
* The parameter k
@@ -107,28 +109,34 @@ public class KNNOutlier<O, D extends NumberDistance<D, ?>> extends AbstractDista
final DistanceQuery<O, D> distanceQuery = database.getDistanceQuery(relation, getDistanceFunction());
KNNQuery<O, D> knnQuery = database.getKNNQuery(distanceQuery, k);
- if(logger.isVerbose()) {
- logger.verbose("Computing the kNN outlier degree (distance to the k nearest neighbor)");
+ if(LOG.isVerbose()) {
+ LOG.verbose("Computing the kNN outlier degree (distance to the k nearest neighbor)");
}
- FiniteProgress progressKNNDistance = logger.isVerbose() ? new FiniteProgress("kNN distance for objects", relation.size(), logger) : null;
+ FiniteProgress progressKNNDistance = LOG.isVerbose() ? new FiniteProgress("kNN distance for objects", relation.size(), LOG) : null;
DoubleMinMax minmax = new DoubleMinMax();
WritableDoubleDataStore knno_score = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
// compute distance to the k nearest neighbor.
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
// distance to the kth nearest neighbor
final KNNResult<D> knns = knnQuery.getKNNForDBID(iditer, k);
- double dkn = knns.getKNNDistance().doubleValue();
- knno_score.putDouble(iditer, dkn);
+ final double dkn;
+ if(knns instanceof DoubleDistanceKNNList) {
+ dkn = ((DoubleDistanceKNNList) knns).doubleKNNDistance();
+ }
+ else {
+ dkn = knns.getKNNDistance().doubleValue();
+ }
+ knno_score.putDouble(iditer, dkn);
minmax.put(dkn);
if(progressKNNDistance != null) {
- progressKNNDistance.incrementProcessed(logger);
+ progressKNNDistance.incrementProcessed(LOG);
}
}
if(progressKNNDistance != null) {
- progressKNNDistance.ensureCompleted(logger);
+ progressKNNDistance.ensureCompleted(LOG);
}
Relation<Double> scoreres = new MaterializedRelation<Double>("kNN Outlier Score", "knn-outlier", TypeUtil.DOUBLE, knno_score, relation.getDBIDs());
OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0.0);
@@ -142,15 +150,15 @@ public class KNNOutlier<O, D extends NumberDistance<D, ?>> extends AbstractDista
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
- * Parameterization class.
- *
- * @author Erich Schubert
- *
- * @apiviz.exclude
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
*/
public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
protected int k = 0;
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNWeightOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNWeightOutlier.java
index cb3ca2f1..e7eeeb9c 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNWeightOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNWeightOutlier.java
@@ -31,13 +31,15 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
-import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceDBIDResultIter;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceKNNList;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
@@ -74,17 +76,17 @@ public class KNNWeightOutlier<O, D extends NumberDistance<D, ?>> extends Abstrac
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(KNNWeightOutlier.class);
+ private static final Logging LOG = Logging.getLogger(KNNWeightOutlier.class);
/**
* Parameter to specify the k nearest neighbor
*/
- public static final OptionID K_ID = OptionID.getOrCreateOptionID("knnwod.k", "k nearest neighbor");
+ public static final OptionID K_ID = new OptionID("knnwod.k", "k nearest neighbor");
/**
* The kNN query used.
*/
- public static final OptionID KNNQUERY_ID = OptionID.getOrCreateOptionID("knnwod.knnquery", "kNN query to use");
+ public static final OptionID KNNQUERY_ID = new OptionID("knnwod.knnquery", "kNN query to use");
/**
* Holds the value of {@link #K_ID}.
@@ -109,33 +111,40 @@ public class KNNWeightOutlier<O, D extends NumberDistance<D, ?>> extends Abstrac
final DistanceQuery<O, D> distanceQuery = database.getDistanceQuery(relation, getDistanceFunction());
KNNQuery<O, D> knnQuery = database.getKNNQuery(distanceQuery, k);
- if(logger.isVerbose()) {
- logger.verbose("computing outlier degree(sum of the distances to the k nearest neighbors");
+ if(LOG.isVerbose()) {
+ LOG.verbose("computing outlier degree(sum of the distances to the k nearest neighbors");
}
- FiniteProgress progressKNNWeight = logger.isVerbose() ? new FiniteProgress("KNNWOD_KNNWEIGHT for objects", relation.size(), logger) : null;
+ FiniteProgress progressKNNWeight = LOG.isVerbose() ? new FiniteProgress("KNNWOD_KNNWEIGHT for objects", relation.size(), LOG) : null;
DoubleMinMax minmax = new DoubleMinMax();
// compute distance to the k nearest neighbor. n objects with the highest
// distance are flagged as outliers
WritableDoubleDataStore knnw_score = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
// compute sum of the distances to the k nearest neighbors
final KNNResult<D> knn = knnQuery.getKNNForDBID(iditer, k);
double skn = 0;
- for(DistanceResultPair<D> r : knn) {
- skn += r.getDistance().doubleValue();
+ if(knn instanceof DoubleDistanceKNNList) {
+ for(DoubleDistanceDBIDResultIter neighbor = ((DoubleDistanceKNNList) knn).iter(); neighbor.valid(); neighbor.advance()) {
+ skn += neighbor.doubleDistance();
+ }
+ }
+ else {
+ for(DistanceDBIDResultIter<D> neighbor = knn.iter(); neighbor.valid(); neighbor.advance()) {
+ skn += neighbor.getDistance().doubleValue();
+ }
}
knnw_score.putDouble(iditer, skn);
minmax.put(skn);
if(progressKNNWeight != null) {
- progressKNNWeight.incrementProcessed(logger);
+ progressKNNWeight.incrementProcessed(LOG);
}
}
if(progressKNNWeight != null) {
- progressKNNWeight.ensureCompleted(logger);
+ progressKNNWeight.ensureCompleted(LOG);
}
Relation<Double> res = new MaterializedRelation<Double>("Weighted kNN Outlier Score", "knnw-outlier", TypeUtil.DOUBLE, knnw_score, relation.getDBIDs());
@@ -150,7 +159,7 @@ public class KNNWeightOutlier<O, D extends NumberDistance<D, ?>> extends Abstrac
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LDF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LDF.java
new file mode 100644
index 00000000..4ce0313e
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LDF.java
@@ -0,0 +1,342 @@
+package de.lmu.ifi.dbs.elki.algorithm.outlier;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.type.CombinedTypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.QueryUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
+import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceDBIDResultIter;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceKNNList;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
+import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.logging.progress.StepProgress;
+import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.math.statistics.GaussianKernelDensityFunction;
+import de.lmu.ifi.dbs.elki.math.statistics.KernelDensityFunction;
+import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+
+/**
+ * Outlier Detection with Kernel Density Functions.
+ *
+ * A variation of LOF which uses kernel density estimation, but in contrast to
+ * {@link SimpleKernelDensityLOF} also uses the reachability concept of LOF.
+ *
+ * Reference:
+ * <p>
+ * Outlier Detection with Kernel Density Functions.<br/>
+ * L. J. Latecki, A. Lazarevic, D. Pokrajac<br />
+ * Machine Learning and Data Mining in Pattern Recognition 2007
+ * </p>
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.has KNNQuery
+ * @apiviz.has KernelDensityFunction
+ *
+ * @param <O> the type of objects handled by this Algorithm
+ * @param <D> Distance type
+ */
+@Reference(authors = "L. J. Latecki, A. Lazarevic, D. Pokrajac", title = "Outlier Detection with Kernel Density Functions", booktitle = "Machine Learning and Data Mining in Pattern Recognition", url = "http://dx.doi.org/10.1007/978-3-540-73499-4_6")
+public class LDF<O extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging LOG = Logging.getLogger(LDF.class);
+
+ /**
+ * Parameter k.
+ */
+ protected int k;
+
+ /**
+ * Bandwidth scaling factor.
+ */
+ protected double h = 1;
+
+ /**
+ * Scaling constant, to limit value range to 1/c
+ */
+ protected double c = 0.1;
+
+ /**
+ * Kernel density function
+ */
+ private KernelDensityFunction kernel;
+
+ /**
+ * Constructor.
+ *
+ * @param k the value of k
+ * @param kernel Kernel function
+ * @param h Kernel bandwidth scaling
+ * @param c Score scaling parameter
+ */
+ public LDF(int k, DistanceFunction<? super O, D> distance, KernelDensityFunction kernel, double h, double c) {
+ super(distance);
+ this.k = k + 1;
+ this.kernel = kernel;
+ this.h = h;
+ this.c = c;
+ }
+
+ /**
+ * Run the naive kernel density LOF algorithm.
+ *
+ * @param relation Data to process
+ * @return LOF outlier result
+ */
+ public OutlierResult run(Relation<O> relation) {
+ StepProgress stepprog = LOG.isVerbose() ? new StepProgress("LDF", 3) : null;
+
+ final int dim = RelationUtil.dimensionality(relation);
+
+ DBIDs ids = relation.getDBIDs();
+
+ // "HEAVY" flag for KNN Query since it is used more than once
+ KNNQuery<O, D> knnq = QueryUtil.getKNNQuery(relation, getDistanceFunction(), k, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
+ // No optimized kNN query - use a preprocessor!
+ if (!(knnq instanceof PreprocessorKNNQuery)) {
+ if (stepprog != null) {
+ stepprog.beginStep(1, "Materializing neighborhoods w.r.t. distance function.", LOG);
+ }
+ MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<O, D>(relation, getDistanceFunction(), k);
+ relation.getDatabase().addIndex(preproc);
+ DistanceQuery<O, D> rdq = relation.getDatabase().getDistanceQuery(relation, getDistanceFunction());
+ knnq = preproc.getKNNQuery(rdq, k);
+ }
+
+ // Compute LRDs
+ if (stepprog != null) {
+ stepprog.beginStep(2, "Computing LDEs.", LOG);
+ }
+ WritableDoubleDataStore ldes = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
+ FiniteProgress densProgress = LOG.isVerbose() ? new FiniteProgress("Densities", ids.size(), LOG) : null;
+ for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
+ final KNNResult<D> neighbors = knnq.getKNNForDBID(it, k);
+ double sum = 0.0;
+ int count = 0;
+ if (neighbors instanceof DoubleDistanceKNNList) {
+ // Fast version for double distances
+ for (DoubleDistanceDBIDResultIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) {
+ if (DBIDUtil.equal(neighbor, it)) {
+ continue;
+ }
+ double nkdist = ((DoubleDistanceKNNList) knnq.getKNNForDBID(neighbor, k)).doubleKNNDistance();
+
+ final double v = Math.max(nkdist, neighbor.doubleDistance()) / (h * nkdist);
+ sum += kernel.density(v) / Math.pow(h * nkdist, dim);
+ count++;
+ }
+ } else {
+ for (DistanceDBIDResultIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ if (DBIDUtil.equal(neighbor, it)) {
+ continue;
+ }
+ double nkdist = knnq.getKNNForDBID(neighbor, k).getKNNDistance().doubleValue();
+ final double v = Math.max(nkdist, neighbor.getDistance().doubleValue()) / (h * nkdist);
+ sum += kernel.density(v) / Math.pow(h * nkdist, dim);
+ count++;
+ }
+ }
+ ldes.putDouble(it, sum / count);
+ if (densProgress != null) {
+ densProgress.incrementProcessed(LOG);
+ }
+ }
+ if (densProgress != null) {
+ densProgress.ensureCompleted(LOG);
+ }
+
+ // Compute local density factors.
+ if (stepprog != null) {
+ stepprog.beginStep(3, "Computing LDFs.", LOG);
+ }
+ WritableDoubleDataStore ldfs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
+ // track the maximum value for normalization.
+ DoubleMinMax lofminmax = new DoubleMinMax();
+
+ FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("Local Density Factors", ids.size(), LOG) : null;
+ for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
+ final double lrdp = ldes.doubleValue(it);
+ final KNNResult<D> neighbors = knnq.getKNNForDBID(it, k);
+ double sum = 0.0;
+ int count = 0;
+ for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ // skip the point itself
+ if (DBIDUtil.equal(neighbor, it)) {
+ continue;
+ }
+ sum += ldes.doubleValue(neighbor);
+ count++;
+ }
+ sum /= count;
+ final double div = lrdp + c * sum;
+ double ldf = (div > 0) ? sum / div : 0;
+ ldfs.putDouble(it, ldf);
+ // update minimum and maximum
+ lofminmax.put(ldf);
+
+ if (progressLOFs != null) {
+ progressLOFs.incrementProcessed(LOG);
+ }
+ }
+ if (progressLOFs != null) {
+ progressLOFs.ensureCompleted(LOG);
+ }
+
+ if (stepprog != null) {
+ stepprog.setCompleted(LOG);
+ }
+
+ // Build result representation.
+ Relation<Double> scoreResult = new MaterializedRelation<Double>("Local Density Factor", "ldf-outlier", TypeUtil.DOUBLE, ldfs, ids);
+ OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, 1. / c, 1 / (1 + c));
+ OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
+
+ return result;
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(new CombinedTypeInformation(getDistanceFunction().getInputTypeRestriction(), TypeUtil.NUMBER_VECTOR_FIELD));
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ *
+ * @param <O> vector type
+ * @param <D> distance type
+ */
+ public static class Parameterizer<O extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ /**
+ * Option ID for kernel.
+ */
+ public static final OptionID KERNEL_ID = new OptionID("ldf.kernel", "Kernel to use for LDF.");
+
+ /**
+ * Option ID for k
+ */
+ public static final OptionID K_ID = new OptionID("ldf.k", "Number of neighbors to use for LDF.");
+
+ /**
+ * Option ID for h - kernel bandwidth scaling
+ */
+ public static final OptionID H_ID = new OptionID("ldf.h", "Kernel bandwidth multiplier for LDF.");
+
+ /**
+ * Option ID for c
+ */
+ public static final OptionID C_ID = new OptionID("ldf.c", "Score scaling parameter for LDF.");
+
+ /**
+ * The neighborhood size to use.
+ */
+ protected int k = 2;
+
+ /**
+ * Kernel density function parameter
+ */
+ KernelDensityFunction kernel;
+
+ /**
+ * Bandwidth scaling factor.
+ */
+ protected double h = 1;
+
+ /**
+ * Scaling constant, to limit value range to 1/c
+ */
+ protected double c = 0.1;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ final IntParameter pK = new IntParameter(K_ID);
+ pK.addConstraint(new GreaterConstraint(1));
+ if (config.grab(pK)) {
+ k = pK.getValue();
+ }
+
+ ObjectParameter<KernelDensityFunction> kernelP = new ObjectParameter<KernelDensityFunction>(KERNEL_ID, KernelDensityFunction.class, GaussianKernelDensityFunction.class);
+ if (config.grab(kernelP)) {
+ kernel = kernelP.instantiateClass(config);
+ }
+
+ DoubleParameter hP = new DoubleParameter(H_ID);
+ if (config.grab(hP)) {
+ h = hP.doubleValue();
+ }
+
+ DoubleParameter cP = new DoubleParameter(C_ID, 0.1);
+ if (config.grab(cP)) {
+ c = cP.doubleValue();
+ }
+ }
+
+ @Override
+ protected LDF<O, D> makeInstance() {
+ return new LDF<O, D>(k, distanceFunction, kernel, h, c);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LDOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LDOF.java
index 84f5dcc6..fbbfe484 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LDOF.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LDOF.java
@@ -31,13 +31,14 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
-import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
@@ -81,13 +82,13 @@ public class LDOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(LDOF.class);
+ private static final Logging LOG = Logging.getLogger(LDOF.class);
/**
* Parameter to specify the number of nearest neighbors of an object to be
* considered for computing its LDOF_SCORE, must be an integer greater than 1.
*/
- public static final OptionID K_ID = OptionID.getOrCreateOptionID("ldof.k", "The number of nearest neighbors of an object to be considered for computing its LDOF_SCORE.");
+ public static final OptionID K_ID = new OptionID("ldof.k", "The number of nearest neighbors of an object to be considered for computing its LDOF_SCORE.");
/**
* The baseline for LDOF values. The paper gives 0.5 for uniform
@@ -128,21 +129,22 @@ public class LDOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
WritableDoubleDataStore ldofs = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
// compute LOF_SCORE of each db object
- if(logger.isVerbose()) {
- logger.verbose("Computing LDOFs");
+ if(LOG.isVerbose()) {
+ LOG.verbose("Computing LDOFs");
}
- FiniteProgress progressLDOFs = logger.isVerbose() ? new FiniteProgress("LDOF_SCORE for objects", relation.size(), logger) : null;
+ FiniteProgress progressLDOFs = LOG.isVerbose() ? new FiniteProgress("LDOF_SCORE for objects", relation.size(), LOG) : null;
Mean dxp = new Mean(), Dxp = new Mean();
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
KNNResult<D> neighbors = knnQuery.getKNNForDBID(iditer, k);
// skip the point itself
dxp.reset(); Dxp.reset();
- for(DistanceResultPair<D> neighbor1 : neighbors) {
- if(!neighbor1.sameDBID(iditer)) {
+ // TODO: optimize for double distances
+ for (DistanceDBIDResultIter<D> neighbor1 = neighbors.iter(); neighbor1.valid(); neighbor1.advance()) {
+ if(!DBIDUtil.equal(neighbor1, iditer)) {
dxp.put(neighbor1.getDistance().doubleValue());
- for(DistanceResultPair<D> neighbor2 : neighbors) {
- if(!neighbor1.sameDBID(neighbor2) && !neighbor2.sameDBID(iditer)) {
+ for (DistanceDBIDResultIter<D> neighbor2 = neighbors.iter(); neighbor2.valid(); neighbor2.advance()) {
+ if(!DBIDUtil.equal(neighbor1, neighbor2) && !DBIDUtil.equal(neighbor2, iditer)) {
Dxp.put(distFunc.distance(neighbor1, neighbor2).doubleValue());
}
}
@@ -157,11 +159,11 @@ public class LDOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
ldofminmax.put(ldof);
if(progressLDOFs != null) {
- progressLDOFs.incrementProcessed(logger);
+ progressLDOFs.incrementProcessed(LOG);
}
}
if(progressLDOFs != null) {
- progressLDOFs.ensureCompleted(logger);
+ progressLDOFs.ensureCompleted(LOG);
}
// Build result representation.
@@ -177,7 +179,7 @@ public class LDOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -193,7 +195,8 @@ public class LDOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final IntParameter kP = new IntParameter(K_ID, new GreaterConstraint(1));
+ final IntParameter kP = new IntParameter(K_ID);
+ kP.addConstraint(new GreaterConstraint(1));
if(config.grab(kP)) {
k = kP.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOCI.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOCI.java
index a04aa041..ba9ad20e 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOCI.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOCI.java
@@ -36,13 +36,14 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
-import de.lmu.ifi.dbs.elki.database.query.DistanceDBIDResult;
-import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
+import de.lmu.ifi.dbs.elki.database.ids.DistanceDBIDPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
@@ -64,9 +65,7 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleIntPair;
/**
* Fast Outlier Detection Using the "Local Correlation Integral".
*
- * Exact implementation only, not aLOCI.
- *
- * TODO: add aLOCI
+ * Exact implementation only, not aLOCI. See {@link ALOCI}
*
* Outlier detection using multiple epsilon neighborhoods.
*
@@ -88,23 +87,23 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(LOCI.class);
+ private static final Logging LOG = Logging.getLogger(LOCI.class);
/**
* Parameter to specify the maximum radius of the neighborhood to be
* considered, must be suitable to the distance function specified.
*/
- public static final OptionID RMAX_ID = OptionID.getOrCreateOptionID("loci.rmax", "The maximum radius of the neighborhood to be considered.");
+ public static final OptionID RMAX_ID = new OptionID("loci.rmax", "The maximum radius of the neighborhood to be considered.");
/**
* Parameter to specify the minimum neighborhood size
*/
- public static final OptionID NMIN_ID = OptionID.getOrCreateOptionID("loci.nmin", "Minimum neighborhood size to be considered.");
+ public static final OptionID NMIN_ID = new OptionID("loci.nmin", "Minimum neighborhood size to be considered.");
/**
* Parameter to specify the averaging neighborhood scaling.
*/
- public static final OptionID ALPHA_ID = OptionID.getOrCreateOptionID("loci.alpha", "Scaling factor for averaging neighborhood");
+ public static final OptionID ALPHA_ID = new OptionID("loci.alpha", "Scaling factor for averaging neighborhood");
/**
* Holds the value of {@link #RMAX_ID}.
@@ -147,16 +146,16 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
DistanceQuery<O, D> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
RangeQuery<O, D> rangeQuery = database.getRangeQuery(distFunc);
- FiniteProgress progressPreproc = logger.isVerbose() ? new FiniteProgress("LOCI preprocessing", relation.size(), logger) : null;
+ FiniteProgress progressPreproc = LOG.isVerbose() ? new FiniteProgress("LOCI preprocessing", relation.size(), LOG) : null;
// LOCI preprocessing step
WritableDataStore<ArrayList<DoubleIntPair>> interestingDistances = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_SORTED, ArrayList.class);
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
DistanceDBIDResult<D> neighbors = rangeQuery.getRangeForDBID(iditer, rmax);
// build list of critical distances
- ArrayList<DoubleIntPair> cdist = new ArrayList<DoubleIntPair>(neighbors.size() * 2);
+ ArrayList<DoubleIntPair> cdist = new ArrayList<DoubleIntPair>(neighbors.size() << 1);
{
for(int i = 0; i < neighbors.size(); i++) {
- DistanceResultPair<D> r = neighbors.get(i);
+ DistanceDBIDPair<D> r = neighbors.get(i);
if(i + 1 < neighbors.size() && r.getDistance().compareTo(neighbors.get(i + 1).getDistance()) == 0) {
continue;
}
@@ -182,14 +181,14 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
interestingDistances.put(iditer, cdist);
if(progressPreproc != null) {
- progressPreproc.incrementProcessed(logger);
+ progressPreproc.incrementProcessed(LOG);
}
}
if(progressPreproc != null) {
- progressPreproc.ensureCompleted(logger);
+ progressPreproc.ensureCompleted(LOG);
}
// LOCI main step
- FiniteProgress progressLOCI = logger.isVerbose() ? new FiniteProgress("LOCI scores", relation.size(), logger) : null;
+ FiniteProgress progressLOCI = LOG.isVerbose() ? new FiniteProgress("LOCI scores", relation.size(), LOG) : null;
WritableDoubleDataStore mdef_norm = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
WritableDoubleDataStore mdef_radius = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
DoubleMinMax minmax = new DoubleMinMax();
@@ -204,9 +203,8 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
if(maxneig >= nmin) {
D range = distFunc.getDistanceFactory().fromDouble(maxdist);
// Compute the largest neighborhood we will need.
- List<DistanceResultPair<D>> maxneighbors = rangeQuery.getRangeForDBID(iditer, range);
- // Ensure the set is sorted. Should be a no-op with most indexes.
- Collections.sort(maxneighbors);
+ DistanceDBIDResult<D> maxneighbors = rangeQuery.getRangeForDBID(iditer, range);
+ // TODO: Ensure the set is sorted. Should be a no-op with most indexes.
// For any critical distance, compute the normalized MDEF score.
for(DoubleIntPair c : cdist) {
// Only start when minimum size is fulfilled
@@ -219,12 +217,13 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
final int n_alphar = elementsAtRadius(cdist, alpha_r);
// compute \hat{n}(p_i, r, \alpha) and the corresponding \simga_{MDEF}
MeanVariance mv_n_r_alpha = new MeanVariance();
- for(DistanceResultPair<D> ne : maxneighbors) {
+ // TODO: optimize for double distances
+ for (DistanceDBIDResultIter<D> neighbor = maxneighbors.iter(); neighbor.valid(); neighbor.advance()) {
// Stop at radius r
- if(ne.getDistance().doubleValue() > r) {
+ if(neighbor.getDistance().doubleValue() > r) {
break;
}
- int rn_alphar = elementsAtRadius(interestingDistances.get(ne), alpha_r);
+ int rn_alphar = elementsAtRadius(interestingDistances.get(neighbor), alpha_r);
mv_n_r_alpha.put(rn_alphar);
}
// We only use the average and standard deviation
@@ -251,11 +250,11 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
mdef_radius.putDouble(iditer, maxnormr);
minmax.put(maxmdefnorm);
if(progressLOCI != null) {
- progressLOCI.incrementProcessed(logger);
+ progressLOCI.incrementProcessed(LOG);
}
}
if(progressLOCI != null) {
- progressLOCI.ensureCompleted(logger);
+ progressLOCI.ensureCompleted(LOG);
}
Relation<Double> scoreResult = new MaterializedRelation<Double>("LOCI normalized MDEF", "loci-mdef-outlier", TypeUtil.DOUBLE, mdef_norm, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0.0);
@@ -293,7 +292,7 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -335,4 +334,4 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
return new LOCI<O, D>(distanceFunction, rmax, nmin, alpha);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOF.java
index 5aba41ec..66bed47a 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOF.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOF.java
@@ -29,29 +29,31 @@ import de.lmu.ifi.dbs.elki.data.type.CombinedTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.QueryUtil;
-import de.lmu.ifi.dbs.elki.database.datastore.DataStore;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.DoubleDataStore;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
-import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery;
import de.lmu.ifi.dbs.elki.database.query.rknn.RKNNQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceDBIDResultIter;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceKNNList;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.logging.progress.StepProgress;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
-import de.lmu.ifi.dbs.elki.math.Mean;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta;
@@ -118,19 +120,19 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(LOF.class);
+ private static final Logging LOG = Logging.getLogger(LOF.class);
/**
* The distance function to determine the reachability distance between
* database objects.
*/
- public static final OptionID REACHABILITY_DISTANCE_FUNCTION_ID = OptionID.getOrCreateOptionID("lof.reachdistfunction", "Distance function to determine the reachability distance between database objects.");
+ public static final OptionID REACHABILITY_DISTANCE_FUNCTION_ID = new OptionID("lof.reachdistfunction", "Distance function to determine the reachability distance between database objects.");
/**
* Parameter to specify the number of nearest neighbors of an object to be
* considered for computing its LOF_SCORE, must be an integer greater than 1.
*/
- public static final OptionID K_ID = OptionID.getOrCreateOptionID("lof.k", "The number of nearest neighbors of an object to be considered for computing its LOF_SCORE.");
+ public static final OptionID K_ID = new OptionID("lof.k", "The number of nearest neighbors of an object to be considered for computing its LOF_SCORE.");
/**
* Holds the value of {@link #K_ID}.
@@ -189,9 +191,10 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
* calling {@link #doRunInTime}.
*
* @param relation Data to process
+ * @return LOF outlier result
*/
public OutlierResult run(Relation<O> relation) {
- StepProgress stepprog = logger.isVerbose() ? new StepProgress("LOF", 3) : null;
+ StepProgress stepprog = LOG.isVerbose() ? new StepProgress("LOF", 3) : null;
Pair<KNNQuery<O, D>, KNNQuery<O, D>> pair = getKNNQueries(relation, stepprog);
KNNQuery<O, D> kNNRefer = pair.getFirst();
KNNQuery<O, D> kNNReach = pair.getSecond();
@@ -209,13 +212,12 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
// "HEAVY" flag for knnReach since it is used more than once
KNNQuery<O, D> knnReach = QueryUtil.getKNNQuery(relation, reachabilityDistanceFunction, k, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
// No optimized kNN query - use a preprocessor!
- if(!(knnReach instanceof PreprocessorKNNQuery)) {
- if(stepprog != null) {
- if(neighborhoodDistanceFunction.equals(reachabilityDistanceFunction)) {
- stepprog.beginStep(1, "Materializing neighborhoods w.r.t. reference neighborhood distance function.", logger);
- }
- else {
- stepprog.beginStep(1, "Not materializing neighborhoods w.r.t. reference neighborhood distance function, but materializing neighborhoods w.r.t. reachability distance function.", logger);
+ if (!(knnReach instanceof PreprocessorKNNQuery)) {
+ if (stepprog != null) {
+ if (neighborhoodDistanceFunction.equals(reachabilityDistanceFunction)) {
+ stepprog.beginStep(1, "Materializing neighborhoods w.r.t. reference neighborhood distance function.", LOG);
+ } else {
+ stepprog.beginStep(1, "Not materializing neighborhoods w.r.t. reference neighborhood distance function, but materializing neighborhoods w.r.t. reachability distance function.", LOG);
}
}
MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<O, D>(relation, reachabilityDistanceFunction, k);
@@ -226,10 +228,9 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
// knnReach is only used once
KNNQuery<O, D> knnRefer;
- if(neighborhoodDistanceFunction == reachabilityDistanceFunction || neighborhoodDistanceFunction.equals(reachabilityDistanceFunction)) {
+ if (neighborhoodDistanceFunction == reachabilityDistanceFunction || neighborhoodDistanceFunction.equals(reachabilityDistanceFunction)) {
knnRefer = knnReach;
- }
- else {
+ } else {
// do not materialize the first neighborhood, since it is used only once
knnRefer = QueryUtil.getKNNQuery(relation, neighborhoodDistanceFunction, k);
}
@@ -251,30 +252,30 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
*/
protected LOFResult<O, D> doRunInTime(DBIDs ids, KNNQuery<O, D> kNNRefer, KNNQuery<O, D> kNNReach, StepProgress stepprog) {
// Assert we got something
- if(kNNRefer == null) {
+ if (kNNRefer == null) {
throw new AbortException("No kNN queries supported by database for reference neighborhood distance function.");
}
- if(kNNReach == null) {
+ if (kNNReach == null) {
throw new AbortException("No kNN queries supported by database for reachability distance function.");
}
// Compute LRDs
- if(stepprog != null) {
- stepprog.beginStep(2, "Computing LRDs.", logger);
+ if (stepprog != null) {
+ stepprog.beginStep(2, "Computing LRDs.", LOG);
}
WritableDoubleDataStore lrds = computeLRDs(ids, kNNReach);
// compute LOF_SCORE of each db object
- if(stepprog != null) {
- stepprog.beginStep(3, "Computing LOFs.", logger);
+ if (stepprog != null) {
+ stepprog.beginStep(3, "Computing LOFs.", LOG);
}
Pair<WritableDoubleDataStore, DoubleMinMax> lofsAndMax = computeLOFs(ids, lrds, kNNRefer);
WritableDoubleDataStore lofs = lofsAndMax.getFirst();
// track the maximum value for normalization.
DoubleMinMax lofminmax = lofsAndMax.getSecond();
- if(stepprog != null) {
- stepprog.setCompleted(logger);
+ if (stepprog != null) {
+ stepprog.setCompleted(LOG);
}
// Build result representation.
@@ -295,26 +296,44 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
*/
protected WritableDoubleDataStore computeLRDs(DBIDs ids, KNNQuery<O, D> knnReach) {
WritableDoubleDataStore lrds = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
- FiniteProgress lrdsProgress = logger.isVerbose() ? new FiniteProgress("LRD", ids.size(), logger) : null;
- Mean mean = new Mean();
- for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
- mean.reset();
- KNNResult<D> neighbors = knnReach.getKNNForDBID(iter, k);
- for(DistanceResultPair<D> neighbor : neighbors) {
- if(objectIsInKNN || !neighbor.sameDBID(iter)) {
- KNNResult<D> neighborsNeighbors = knnReach.getKNNForDBID(neighbor, k);
- mean.put(Math.max(neighbor.getDistance().doubleValue(), neighborsNeighbors.getKNNDistance().doubleValue()));
+ FiniteProgress lrdsProgress = LOG.isVerbose() ? new FiniteProgress("LRD", ids.size(), LOG) : null;
+ for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ final KNNResult<D> neighbors = knnReach.getKNNForDBID(iter, k);
+ double sum = 0.0;
+ int count = 0;
+ if (neighbors instanceof DoubleDistanceKNNList) {
+ // Fast version for double distances
+ for (DoubleDistanceDBIDResultIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) {
+ if (objectIsInKNN || !DBIDUtil.equal(neighbor, iter)) {
+ KNNResult<D> neighborsNeighbors = knnReach.getKNNForDBID(neighbor, k);
+ final double nkdist;
+ if (neighborsNeighbors instanceof DoubleDistanceKNNList) {
+ nkdist = ((DoubleDistanceKNNList) neighborsNeighbors).doubleKNNDistance();
+ } else {
+ nkdist = neighborsNeighbors.getKNNDistance().doubleValue();
+ }
+ sum += Math.max(neighbor.doubleDistance(), nkdist);
+ count++;
+ }
+ }
+ } else {
+ for (DistanceDBIDResultIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ if (objectIsInKNN || !DBIDUtil.equal(neighbor, iter)) {
+ KNNResult<D> neighborsNeighbors = knnReach.getKNNForDBID(neighbor, k);
+ sum += Math.max(neighbor.getDistance().doubleValue(), neighborsNeighbors.getKNNDistance().doubleValue());
+ count++;
+ }
}
}
// Avoid division by 0
- final double lrd = (mean.getCount() > 0) ? 1 / mean.getMean() : 0.0;
+ final double lrd = (sum > 0) ? (count / sum) : 0;
lrds.putDouble(iter, lrd);
- if(lrdsProgress != null) {
- lrdsProgress.incrementProcessed(logger);
+ if (lrdsProgress != null) {
+ lrdsProgress.incrementProcessed(LOG);
}
}
- if(lrdsProgress != null) {
- lrdsProgress.ensureCompleted(logger);
+ if (lrdsProgress != null) {
+ lrdsProgress.ensureCompleted(LOG);
}
return lrds;
}
@@ -328,40 +347,40 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
* reference distance
* @return the LOFs of the objects and the maximum LOF
*/
- protected Pair<WritableDoubleDataStore, DoubleMinMax> computeLOFs(DBIDs ids, DataStore<Double> lrds, KNNQuery<O, D> knnRefer) {
+ protected Pair<WritableDoubleDataStore, DoubleMinMax> computeLOFs(DBIDs ids, DoubleDataStore lrds, KNNQuery<O, D> knnRefer) {
WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
// track the maximum value for normalization.
DoubleMinMax lofminmax = new DoubleMinMax();
- FiniteProgress progressLOFs = logger.isVerbose() ? new FiniteProgress("LOF_SCORE for objects", ids.size(), logger) : null;
- Mean mean = new Mean();
- for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
- double lrdp = lrds.get(iter);
+ FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("LOF_SCORE for objects", ids.size(), LOG) : null;
+ for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ final double lrdp = lrds.doubleValue(iter);
final double lof;
- if(lrdp > 0) {
+ if (lrdp > 0) {
final KNNResult<D> neighbors = knnRefer.getKNNForDBID(iter, k);
- mean.reset();
- for(DistanceResultPair<D> neighbor : neighbors) {
+ double sum = 0.0;
+ int count = 0;
+ for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
// skip the point itself
- if(objectIsInKNN || !neighbor.sameDBID(iter)) {
- mean.put(lrds.get(neighbor));
+ if (objectIsInKNN || !DBIDUtil.equal(neighbor, iter)) {
+ sum += lrds.doubleValue(neighbor);
+ count++;
}
}
- lof = mean.getMean() / lrdp;
- }
- else {
+ lof = sum / (count * lrdp);
+ } else {
lof = 1.0;
}
lofs.putDouble(iter, lof);
// update minimum and maximum
lofminmax.put(lof);
- if(progressLOFs != null) {
- progressLOFs.incrementProcessed(logger);
+ if (progressLOFs != null) {
+ progressLOFs.incrementProcessed(LOG);
}
}
- if(progressLOFs != null) {
- progressLOFs.ensureCompleted(logger);
+ if (progressLOFs != null) {
+ progressLOFs.ensureCompleted(LOG);
}
return new Pair<WritableDoubleDataStore, DoubleMinMax>(lofs, lofminmax);
}
@@ -369,10 +388,9 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
@Override
public TypeInformation[] getInputTypeRestriction() {
final TypeInformation type;
- if(reachabilityDistanceFunction.equals(neighborhoodDistanceFunction)) {
+ if (reachabilityDistanceFunction.equals(neighborhoodDistanceFunction)) {
type = reachabilityDistanceFunction.getInputTypeRestriction();
- }
- else {
+ } else {
type = new CombinedTypeInformation(neighborhoodDistanceFunction.getInputTypeRestriction(), reachabilityDistanceFunction.getInputTypeRestriction());
}
return TypeUtil.array(type);
@@ -380,7 +398,7 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -442,6 +460,8 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
}
/**
+ * Get the knn query for the reference set.
+ *
* @return the kNN query w.r.t. the reference neighborhood distance
*/
public KNNQuery<O, D> getKNNRefer() {
@@ -449,6 +469,8 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
}
/**
+ * Get the knn query for the reachability set.
+ *
* @return the kNN query w.r.t. the reachability distance
*/
public KNNQuery<O, D> getKNNReach() {
@@ -456,6 +478,8 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
}
/**
+ * Get the LRD data store.
+ *
* @return the LRD values of the objects
*/
public WritableDoubleDataStore getLrds() {
@@ -463,6 +487,8 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
}
/**
+ * Get the LOF data store.
+ *
* @return the LOF values of the objects
*/
public WritableDoubleDataStore getLofs() {
@@ -470,6 +496,8 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
}
/**
+ * Get the outlier result.
+ *
* @return the result of the run of the {@link LOF} algorithm
*/
public OutlierResult getResult() {
@@ -486,6 +514,8 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
}
/**
+ * Get the RkNN query for the reference set.
+ *
* @return the RkNN query w.r.t. the reference neighborhood distance
*/
public RKNNQuery<O, D> getRkNNRefer() {
@@ -493,6 +523,8 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
}
/**
+ * Get the RkNN query for the reachability set.
+ *
* @return the RkNN query w.r.t. the reachability distance
*/
public RKNNQuery<O, D> getRkNNReach() {
@@ -518,7 +550,7 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
*/
public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
/**
- * The neighborhood size to use
+ * The neighborhood size to use.
*/
protected int k = 2;
@@ -536,13 +568,14 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final IntParameter pK = new IntParameter(K_ID, new GreaterConstraint(1));
- if(config.grab(pK)) {
+ final IntParameter pK = new IntParameter(K_ID);
+ pK.addConstraint(new GreaterConstraint(1));
+ if (config.grab(pK)) {
k = pK.getValue();
}
final ObjectParameter<DistanceFunction<O, D>> reachDistP = new ObjectParameter<DistanceFunction<O, D>>(REACHABILITY_DISTANCE_FUNCTION_ID, DistanceFunction.class, true);
- if(config.grab(reachDistP)) {
+ if (config.grab(reachDistP)) {
reachabilityDistanceFunction = reachDistP.instantiateClass(config);
}
}
@@ -554,4 +587,4 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
return new LOF<O, D>(k, distanceFunction, rdist);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LoOP.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LoOP.java
index dc0d26a4..5da06983 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LoOP.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LoOP.java
@@ -33,15 +33,18 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
-import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceDBIDResultIter;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceKNNList;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor;
import de.lmu.ifi.dbs.elki.logging.Logging;
@@ -76,7 +79,8 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
*
* @apiviz.has KNNQuery
*
- * @param <O> the type of DatabaseObjects handled by this Algorithm
+ * @param <O> type of objects handled by this algorithm
+ * @param <D> type of distances used
*/
@Title("LoOP: Local Outlier Probabilities")
@Description("Variant of the LOF algorithm normalized using statistical values.")
@@ -85,37 +89,37 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(LoOP.class);
+ private static final Logging LOG = Logging.getLogger(LoOP.class);
/**
* The distance function to determine the reachability distance between
* database objects.
*/
- public static final OptionID REACHABILITY_DISTANCE_FUNCTION_ID = OptionID.getOrCreateOptionID("loop.referencedistfunction", "Distance function to determine the density of an object.");
+ public static final OptionID REACHABILITY_DISTANCE_FUNCTION_ID = new OptionID("loop.referencedistfunction", "Distance function to determine the density of an object.");
/**
* The distance function to determine the reachability distance between
* database objects.
*/
- public static final OptionID COMPARISON_DISTANCE_FUNCTION_ID = OptionID.getOrCreateOptionID("loop.comparedistfunction", "Distance function to determine the reference set of an object.");
+ public static final OptionID COMPARISON_DISTANCE_FUNCTION_ID = new OptionID("loop.comparedistfunction", "Distance function to determine the reference set of an object.");
/**
* Parameter to specify the number of nearest neighbors of an object to be
* considered for computing its LOOP_SCORE, must be an integer greater than 1.
*/
- public static final OptionID KREACH_ID = OptionID.getOrCreateOptionID("loop.kref", "The number of nearest neighbors of an object to be used for the PRD value.");
+ public static final OptionID KREACH_ID = new OptionID("loop.kref", "The number of nearest neighbors of an object to be used for the PRD value.");
/**
* Parameter to specify the number of nearest neighbors of an object to be
* considered for computing its LOOP_SCORE, must be an integer greater than 1.
*/
- public static final OptionID KCOMP_ID = OptionID.getOrCreateOptionID("loop.kcomp", "The number of nearest neighbors of an object to be considered for computing its LOOP_SCORE.");
+ public static final OptionID KCOMP_ID = new OptionID("loop.kcomp", "The number of nearest neighbors of an object to be considered for computing its LOOP_SCORE.");
/**
* Parameter to specify the number of nearest neighbors of an object to be
* considered for computing its LOOP_SCORE, must be an integer greater than 1.
*/
- public static final OptionID LAMBDA_ID = OptionID.getOrCreateOptionID("loop.lambda", "The number of standard deviations to consider for density computation.");
+ public static final OptionID LAMBDA_ID = new OptionID("loop.lambda", "The number of standard deviations to consider for density computation.");
/**
* Holds the value of {@link #KREACH_ID}.
@@ -133,12 +137,12 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O
double lambda;
/**
- * Preprocessor Step 1
+ * Preprocessor Step 1.
*/
protected DistanceFunction<? super O, D> reachabilityDistanceFunction;
/**
- * Preprocessor Step 2
+ * Preprocessor Step 2.
*/
protected DistanceFunction<? super O, D> comparisonDistanceFunction;
@@ -150,11 +154,11 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O
/**
* Constructor with parameters.
*
- * @param kreach
- * @param kcomp
- * @param reachabilityDistanceFunction
- * @param comparisonDistanceFunction
- * @param lambda
+ * @param kreach k for reachability
+ * @param kcomp k for comparison
+ * @param reachabilityDistanceFunction distance function for reachability
+ * @param comparisonDistanceFunction distance function for comparison
+ * @param lambda Lambda parameter
*/
public LoOP(int kreach, int kcomp, DistanceFunction<? super O, D> reachabilityDistanceFunction, DistanceFunction<? super O, D> comparisonDistanceFunction, double lambda) {
super();
@@ -168,36 +172,35 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O
/**
* Get the kNN queries for the algorithm.
*
- * @param database Database
- * @param stepprog Progress logger
+ * @param database Database to analyze
+ * @param relation Relation to analyze
+ * @param stepprog Progress logger, may be {@code null}
* @return result
*/
protected Pair<KNNQuery<O, D>, KNNQuery<O, D>> getKNNQueries(Database database, Relation<O> relation, StepProgress stepprog) {
KNNQuery<O, D> knnComp;
KNNQuery<O, D> knnReach;
- if(comparisonDistanceFunction == reachabilityDistanceFunction || comparisonDistanceFunction.equals(reachabilityDistanceFunction)) {
+ if (comparisonDistanceFunction == reachabilityDistanceFunction || comparisonDistanceFunction.equals(reachabilityDistanceFunction)) {
// We need each neighborhood twice - use "HEAVY" flag.
knnComp = QueryUtil.getKNNQuery(relation, comparisonDistanceFunction, Math.max(kreach, kcomp), DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
// No optimized kNN query - use a preprocessor!
- if(knnComp == null) {
- if(stepprog != null) {
- stepprog.beginStep(1, "Materializing neighborhoods with respect to reference neighborhood distance function.", logger);
+ if (knnComp == null) {
+ if (stepprog != null) {
+ stepprog.beginStep(1, "Materializing neighborhoods with respect to reference neighborhood distance function.", LOG);
}
MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<O, D>(relation, comparisonDistanceFunction, kcomp);
database.addIndex(preproc);
DistanceQuery<O, D> cdq = database.getDistanceQuery(relation, comparisonDistanceFunction);
knnComp = preproc.getKNNQuery(cdq, kreach, DatabaseQuery.HINT_HEAVY_USE);
- }
- else {
- if(stepprog != null) {
- stepprog.beginStep(1, "Optimized neighborhoods provided by database.", logger);
+ } else {
+ if (stepprog != null) {
+ stepprog.beginStep(1, "Optimized neighborhoods provided by database.", LOG);
}
}
knnReach = knnComp;
- }
- else {
- if(stepprog != null) {
- stepprog.beginStep(1, "Not materializing distance functions, since we request each DBID once only.", logger);
+ } else {
+ if (stepprog != null) {
+ stepprog.beginStep(1, "Not materializing distance functions, since we request each DBID once only.", LOG);
}
knnComp = QueryUtil.getKNNQuery(relation, comparisonDistanceFunction, kreach);
knnReach = QueryUtil.getKNNQuery(relation, reachabilityDistanceFunction, kcomp);
@@ -215,17 +218,17 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O
public OutlierResult run(Database database, Relation<O> relation) {
final double sqrt2 = Math.sqrt(2.0);
- StepProgress stepprog = logger.isVerbose() ? new StepProgress(5) : null;
+ StepProgress stepprog = LOG.isVerbose() ? new StepProgress(5) : null;
Pair<KNNQuery<O, D>, KNNQuery<O, D>> pair = getKNNQueries(database, relation, stepprog);
KNNQuery<O, D> knnComp = pair.getFirst();
KNNQuery<O, D> knnReach = pair.getSecond();
// Assert we got something
- if(knnComp == null) {
+ if (knnComp == null) {
throw new AbortException("No kNN queries supported by database for comparison distance function.");
}
- if(knnReach == null) {
+ if (knnReach == null) {
throw new AbortException("No kNN queries supported by database for density estimation distance function.");
}
@@ -233,29 +236,43 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O
WritableDoubleDataStore pdists = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
Mean mean = new Mean();
{// computing PRDs
- if(stepprog != null) {
- stepprog.beginStep(3, "Computing pdists", logger);
+ if (stepprog != null) {
+ stepprog.beginStep(3, "Computing pdists", LOG);
}
- FiniteProgress prdsProgress = logger.isVerbose() ? new FiniteProgress("pdists", relation.size(), logger) : null;
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ FiniteProgress prdsProgress = LOG.isVerbose() ? new FiniteProgress("pdists", relation.size(), LOG) : null;
+ for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
final KNNResult<D> neighbors = knnReach.getKNNForDBID(iditer, kreach);
mean.reset();
// use first kref neighbors as reference set
int ks = 0;
- for(DistanceResultPair<D> neighbor : neighbors) {
- if(objectIsInKNN || !neighbor.sameDBID(iditer)) {
- double d = neighbor.getDistance().doubleValue();
- mean.put(d * d);
- ks++;
- if(ks >= kreach) {
- break;
+ // TODO: optimize for double distances
+ if (neighbors instanceof DoubleDistanceKNNList) {
+ for (DoubleDistanceDBIDResultIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) {
+ if (objectIsInKNN || !DBIDUtil.equal(neighbor, iditer)) {
+ final double d = neighbor.doubleDistance();
+ mean.put(d * d);
+ ks++;
+ if (ks >= kreach) {
+ break;
+ }
+ }
+ }
+ } else {
+ for (DistanceDBIDResultIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ if (objectIsInKNN || !DBIDUtil.equal(neighbor, iditer)) {
+ double d = neighbor.getDistance().doubleValue();
+ mean.put(d * d);
+ ks++;
+ if (ks >= kreach) {
+ break;
+ }
}
}
}
double pdist = lambda * Math.sqrt(mean.getMean());
pdists.putDouble(iditer, pdist);
- if(prdsProgress != null) {
- prdsProgress.incrementProcessed(logger);
+ if (prdsProgress != null) {
+ prdsProgress.incrementProcessed(LOG);
}
}
}
@@ -263,63 +280,63 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O
WritableDoubleDataStore plofs = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
MeanVariance mvplof = new MeanVariance();
{// compute LOOP_SCORE of each db object
- if(stepprog != null) {
- stepprog.beginStep(4, "Computing PLOF", logger);
+ if (stepprog != null) {
+ stepprog.beginStep(4, "Computing PLOF", LOG);
}
- FiniteProgress progressPLOFs = logger.isVerbose() ? new FiniteProgress("PLOFs for objects", relation.size(), logger) : null;
+ FiniteProgress progressPLOFs = LOG.isVerbose() ? new FiniteProgress("PLOFs for objects", relation.size(), LOG) : null;
MeanVariance mv = new MeanVariance();
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
final KNNResult<D> neighbors = knnComp.getKNNForDBID(iditer, kcomp);
mv.reset();
// use first kref neighbors as comparison set.
int ks = 0;
- for(DistanceResultPair<D> neighbor1 : neighbors) {
- if(objectIsInKNN || !neighbor1.sameDBID(iditer)) {
- mv.put(pdists.doubleValue(neighbor1));
+ for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ if (objectIsInKNN || !DBIDUtil.equal(neighbor, iditer)) {
+ mv.put(pdists.doubleValue(neighbor));
ks++;
- if(ks >= kcomp) {
+ if (ks >= kcomp) {
break;
}
}
}
double plof = Math.max(pdists.doubleValue(iditer) / mv.getMean(), 1.0);
- if(Double.isNaN(plof) || Double.isInfinite(plof)) {
+ if (Double.isNaN(plof) || Double.isInfinite(plof)) {
plof = 1.0;
}
plofs.putDouble(iditer, plof);
mvplof.put((plof - 1.0) * (plof - 1.0));
- if(progressPLOFs != null) {
- progressPLOFs.incrementProcessed(logger);
+ if (progressPLOFs != null) {
+ progressPLOFs.incrementProcessed(LOG);
}
}
}
double nplof = lambda * Math.sqrt(mvplof.getMean());
- if(logger.isDebugging()) {
- logger.verbose("nplof normalization factor is " + nplof + " " + mvplof.getMean() + " " + mvplof.getSampleStddev());
+ if (LOG.isDebugging()) {
+ LOG.verbose("nplof normalization factor is " + nplof + " " + mvplof.getMean() + " " + mvplof.getSampleStddev());
}
// Compute final LoOP values.
WritableDoubleDataStore loops = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
{// compute LOOP_SCORE of each db object
- if(stepprog != null) {
- stepprog.beginStep(5, "Computing LoOP scores", logger);
+ if (stepprog != null) {
+ stepprog.beginStep(5, "Computing LoOP scores", LOG);
}
- FiniteProgress progressLOOPs = logger.isVerbose() ? new FiniteProgress("LoOP for objects", relation.size(), logger) : null;
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ FiniteProgress progressLOOPs = LOG.isVerbose() ? new FiniteProgress("LoOP for objects", relation.size(), LOG) : null;
+ for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
loops.putDouble(iditer, NormalDistribution.erf((plofs.doubleValue(iditer) - 1) / (nplof * sqrt2)));
- if(progressLOOPs != null) {
- progressLOOPs.incrementProcessed(logger);
+ if (progressLOOPs != null) {
+ progressLOOPs.incrementProcessed(LOG);
}
}
}
- if(stepprog != null) {
- stepprog.setCompleted(logger);
+ if (stepprog != null) {
+ stepprog.setCompleted(LOG);
}
// Build result representation.
@@ -331,10 +348,9 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O
@Override
public TypeInformation[] getInputTypeRestriction() {
final TypeInformation type;
- if(reachabilityDistanceFunction.equals(comparisonDistanceFunction)) {
+ if (reachabilityDistanceFunction.equals(comparisonDistanceFunction)) {
type = reachabilityDistanceFunction.getInputTypeRestriction();
- }
- else {
+ } else {
type = new CombinedTypeInformation(reachabilityDistanceFunction.getInputTypeRestriction(), comparisonDistanceFunction.getInputTypeRestriction());
}
return TypeUtil.array(type);
@@ -342,7 +358,7 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -369,45 +385,48 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O
double lambda = 2.0;
/**
- * Preprocessor Step 1
+ * Preprocessor Step 1.
*/
protected DistanceFunction<O, D> reachabilityDistanceFunction = null;
/**
- * Preprocessor Step 2
+ * Preprocessor Step 2.
*/
protected DistanceFunction<O, D> comparisonDistanceFunction = null;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final IntParameter kcompP = new IntParameter(KCOMP_ID, new GreaterConstraint(1));
- if(config.grab(kcompP)) {
- kcomp = kcompP.getValue();
+ final IntParameter kcompP = new IntParameter(KCOMP_ID);
+ kcompP.addConstraint(new GreaterConstraint(1));
+ if (config.grab(kcompP)) {
+ kcomp = kcompP.intValue();
}
final ObjectParameter<DistanceFunction<O, D>> compDistP = new ObjectParameter<DistanceFunction<O, D>>(COMPARISON_DISTANCE_FUNCTION_ID, DistanceFunction.class, EuclideanDistanceFunction.class);
- if(config.grab(compDistP)) {
+ if (config.grab(compDistP)) {
comparisonDistanceFunction = compDistP.instantiateClass(config);
}
- final IntParameter kreachP = new IntParameter(KREACH_ID, new GreaterConstraint(1), true);
- if(config.grab(kreachP)) {
- kreach = kreachP.getValue();
- }
- else {
+ final IntParameter kreachP = new IntParameter(KREACH_ID);
+ kreachP.addConstraint(new GreaterConstraint(1));
+ kreachP.setOptional(true);
+ if (config.grab(kreachP)) {
+ kreach = kreachP.intValue();
+ } else {
kreach = kcomp;
}
final ObjectParameter<DistanceFunction<O, D>> reachDistP = new ObjectParameter<DistanceFunction<O, D>>(REACHABILITY_DISTANCE_FUNCTION_ID, DistanceFunction.class, true);
- if(config.grab(reachDistP)) {
+ if (config.grab(reachDistP)) {
reachabilityDistanceFunction = reachDistP.instantiateClass(config);
}
// TODO: make default 1.0?
- final DoubleParameter lambdaP = new DoubleParameter(LAMBDA_ID, new GreaterConstraint(0.0), 2.0);
- if(config.grab(lambdaP)) {
- lambda = lambdaP.getValue();
+ final DoubleParameter lambdaP = new DoubleParameter(LAMBDA_ID, 2.0);
+ lambdaP.addConstraint(new GreaterConstraint(0.0));
+ if (config.grab(lambdaP)) {
+ lambda = lambdaP.doubleValue();
}
}
@@ -417,4 +436,4 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O
return new LoOP<O, D>(kreach, kcomp, realreach, comparisonDistanceFunction, lambda);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OPTICSOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OPTICSOF.java
index b3d24463..bed27a33 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OPTICSOF.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OPTICSOF.java
@@ -37,14 +37,14 @@ import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
@@ -83,7 +83,7 @@ public class OPTICSOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanc
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(OPTICSOF.class);
+ private static final Logging LOG = Logging.getLogger(OPTICSOF.class);
/**
* Parameter to specify the threshold MinPts.
@@ -136,9 +136,10 @@ public class OPTICSOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanc
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
List<Double> core = new ArrayList<Double>();
double lrd = 0;
- for(DistanceResultPair<D> neighPair : nMinPts.get(iditer)) {
- double coreDist = coreDistance.doubleValue(neighPair);
- double dist = distQuery.distance(iditer, neighPair).doubleValue();
+ // TODO: optimize for double distances
+ for (DistanceDBIDResultIter<D> neighbor = nMinPts.get(iditer).iter(); neighbor.valid(); neighbor.advance()) {
+ double coreDist = coreDistance.doubleValue(neighbor);
+ double dist = distQuery.distance(iditer, neighbor).doubleValue();
double rd = Math.max(coreDist, dist);
lrd = rd + lrd;
core.add(rd);
@@ -153,9 +154,9 @@ public class OPTICSOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanc
WritableDoubleDataStore ofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double of = 0;
- for(DistanceResultPair<D> pair : nMinPts.get(iditer)) {
+ for (DBIDIter neighbor = nMinPts.get(iditer).iter(); neighbor.valid(); neighbor.advance()) {
double lrd = lrds.doubleValue(iditer);
- double lrdN = lrds.doubleValue(pair);
+ double lrdN = lrds.doubleValue(neighbor);
of = of + lrdN / lrd;
}
of = of / minPtsNeighborhoodSize.intValue(iditer);
@@ -176,7 +177,7 @@ public class OPTICSOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanc
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -192,7 +193,8 @@ public class OPTICSOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanc
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final IntParameter param = new IntParameter(OPTICS.MINPTS_ID, new GreaterConstraint(1));
+ final IntParameter param = new IntParameter(OPTICS.MINPTS_ID);
+ param.addConstraint(new GreaterConstraint(1));
if(config.grab(param)) {
minpts = param.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OnlineLOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OnlineLOF.java
index 9b974ad9..bac5db36 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OnlineLOF.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OnlineLOF.java
@@ -34,14 +34,14 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
-import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery;
import de.lmu.ifi.dbs.elki.database.query.rknn.RKNNQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.index.preprocessed.knn.AbstractMaterializeKNNPreprocessor;
import de.lmu.ifi.dbs.elki.index.preprocessed.knn.KNNChangeEvent;
@@ -73,7 +73,7 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> {
/**
* The logger for this class.
*/
- static final Logging logger = Logging.getLogger(OnlineLOF.class);
+ private static final Logging LOG = Logging.getLogger(OnlineLOF.class);
/**
* Constructor.
@@ -93,7 +93,7 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> {
*/
@Override
public OutlierResult run(Relation<O> relation) {
- StepProgress stepprog = logger.isVerbose() ? new StepProgress("OnlineLOF", 3) : null;
+ StepProgress stepprog = LOG.isVerbose() ? new StepProgress("OnlineLOF", 3) : null;
Pair<Pair<KNNQuery<O, D>, KNNQuery<O, D>>, Pair<RKNNQuery<O, D>, RKNNQuery<O, D>>> queries = getKNNAndRkNNQueries(relation, stepprog);
KNNQuery<O, D> kNNRefer = queries.getFirst().getFirst();
@@ -128,7 +128,7 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> {
// No optimized kNN query or RkNN query - use a preprocessor!
if(kNNRefer == null || rkNNRefer == null) {
if(stepprog != null) {
- stepprog.beginStep(1, "Materializing neighborhood w.r.t. reference neighborhood distance function.", logger);
+ stepprog.beginStep(1, "Materializing neighborhood w.r.t. reference neighborhood distance function.", LOG);
}
MaterializeKNNAndRKNNPreprocessor<O, D> preproc = new MaterializeKNNAndRKNNPreprocessor<O, D>(relation, neighborhoodDistanceFunction, k);
DistanceQuery<O, D> ndq = relation.getDatabase().getDistanceQuery(relation, neighborhoodDistanceFunction);
@@ -139,7 +139,7 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> {
}
else {
if(stepprog != null) {
- stepprog.beginStep(1, "Optimized neighborhood w.r.t. reference neighborhood distance function provided by database.", logger);
+ stepprog.beginStep(1, "Optimized neighborhood w.r.t. reference neighborhood distance function provided by database.", LOG);
}
}
@@ -147,7 +147,7 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> {
RKNNQuery<O, D> rkNNReach = QueryUtil.getRKNNQuery(relation, reachabilityDistanceFunction, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
if(kNNReach == null || rkNNReach == null) {
if(stepprog != null) {
- stepprog.beginStep(2, "Materializing neighborhood w.r.t. reachability distance function.", logger);
+ stepprog.beginStep(2, "Materializing neighborhood w.r.t. reachability distance function.", LOG);
}
ListParameterization config = new ListParameterization();
config.addParameter(AbstractMaterializeKNNPreprocessor.Factory.DISTANCE_FUNCTION_ID, reachabilityDistanceFunction);
@@ -261,14 +261,14 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> {
* @param lofResult the result of the former LOF run
*/
private void kNNsInserted(DBIDs insertions, DBIDs updates1, DBIDs updates2, LOFResult<O, D> lofResult) {
- StepProgress stepprog = logger.isVerbose() ? new StepProgress(3) : null;
+ StepProgress stepprog = LOG.isVerbose() ? new StepProgress(3) : null;
// recompute lrds
if(stepprog != null) {
- stepprog.beginStep(1, "Recompute LRDs.", logger);
+ stepprog.beginStep(1, "Recompute LRDs.", LOG);
}
ArrayDBIDs lrd_ids = DBIDUtil.ensureArray(DBIDUtil.union(insertions, updates2));
- List<List<DistanceResultPair<D>>> reachDistRKNNs = lofResult.getRkNNReach().getRKNNForBulkDBIDs(lrd_ids, k);
+ List<? extends DistanceDBIDResult<D>> reachDistRKNNs = lofResult.getRkNNReach().getRKNNForBulkDBIDs(lrd_ids, k);
ArrayDBIDs affected_lrd_id_candidates = mergeIDs(reachDistRKNNs, lrd_ids);
ArrayModifiableDBIDs affected_lrd_ids = DBIDUtil.newArray(affected_lrd_id_candidates.size());
WritableDoubleDataStore new_lrds = computeLRDs(affected_lrd_id_candidates, lofResult.getKNNReach());
@@ -283,20 +283,20 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> {
// recompute lofs
if(stepprog != null) {
- stepprog.beginStep(2, "Recompute LOFS.", logger);
+ stepprog.beginStep(2, "Recompute LOFS.", LOG);
}
- List<List<DistanceResultPair<D>>> primDistRKNNs = lofResult.getRkNNRefer().getRKNNForBulkDBIDs(affected_lrd_ids, k);
+ List<? extends DistanceDBIDResult<D>> primDistRKNNs = lofResult.getRkNNRefer().getRKNNForBulkDBIDs(affected_lrd_ids, k);
ArrayDBIDs affected_lof_ids = mergeIDs(primDistRKNNs, affected_lrd_ids, insertions, updates1);
recomputeLOFs(affected_lof_ids, lofResult);
// fire result changed
if(stepprog != null) {
- stepprog.beginStep(3, "Inform listeners.", logger);
+ stepprog.beginStep(3, "Inform listeners.", LOG);
}
lofResult.getResult().getHierarchy().resultChanged(lofResult.getResult());
if(stepprog != null) {
- stepprog.setCompleted(logger);
+ stepprog.setCompleted(LOG);
}
}
@@ -311,11 +311,11 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> {
* @param lofResult the result of the former LOF run
*/
private void kNNsRemoved(DBIDs deletions, DBIDs updates1, DBIDs updates2, LOFResult<O, D> lofResult) {
- StepProgress stepprog = logger.isVerbose() ? new StepProgress(4) : null;
+ StepProgress stepprog = LOG.isVerbose() ? new StepProgress(4) : null;
// delete lrds and lofs
if(stepprog != null) {
- stepprog.beginStep(1, "Delete old LRDs and LOFs.", logger);
+ stepprog.beginStep(1, "Delete old LRDs and LOFs.", LOG);
}
for (DBIDIter iter = deletions.iter(); iter.valid(); iter.advance()) {
lofResult.getLrds().delete(iter);
@@ -324,10 +324,10 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> {
// recompute lrds
if(stepprog != null) {
- stepprog.beginStep(2, "Recompute LRDs.", logger);
+ stepprog.beginStep(2, "Recompute LRDs.", LOG);
}
ArrayDBIDs lrd_ids = DBIDUtil.ensureArray(updates2);
- List<List<DistanceResultPair<D>>> reachDistRKNNs = lofResult.getRkNNReach().getRKNNForBulkDBIDs(lrd_ids, k);
+ List<? extends DistanceDBIDResult<D>> reachDistRKNNs = lofResult.getRkNNReach().getRKNNForBulkDBIDs(lrd_ids, k);
ArrayDBIDs affected_lrd_id_candidates = mergeIDs(reachDistRKNNs, lrd_ids);
ArrayModifiableDBIDs affected_lrd_ids = DBIDUtil.newArray(affected_lrd_id_candidates.size());
WritableDoubleDataStore new_lrds = computeLRDs(affected_lrd_id_candidates, lofResult.getKNNReach());
@@ -342,20 +342,20 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> {
// recompute lofs
if(stepprog != null) {
- stepprog.beginStep(3, "Recompute LOFS.", logger);
+ stepprog.beginStep(3, "Recompute LOFS.", LOG);
}
- List<List<DistanceResultPair<D>>> primDistRKNNs = lofResult.getRkNNRefer().getRKNNForBulkDBIDs(affected_lrd_ids, k);
+ List<? extends DistanceDBIDResult<D>> primDistRKNNs = lofResult.getRkNNRefer().getRKNNForBulkDBIDs(affected_lrd_ids, k);
ArrayDBIDs affected_lof_ids = mergeIDs(primDistRKNNs, affected_lrd_ids, updates1);
recomputeLOFs(affected_lof_ids, lofResult);
// fire result changed
if(stepprog != null) {
- stepprog.beginStep(4, "Inform listeners.", logger);
+ stepprog.beginStep(4, "Inform listeners.", LOG);
}
lofResult.getResult().getHierarchy().resultChanged(lofResult.getResult());
if(stepprog != null) {
- stepprog.setCompleted(logger);
+ stepprog.setCompleted(LOG);
}
}
@@ -367,15 +367,13 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> {
* @return a set containing the ids of the query result and the specified
* ids
*/
- private ArrayModifiableDBIDs mergeIDs(List<List<DistanceResultPair<D>>> queryResults, DBIDs... ids) {
+ private ArrayModifiableDBIDs mergeIDs(List<? extends DistanceDBIDResult<D>> queryResults, DBIDs... ids) {
ModifiableDBIDs result = DBIDUtil.newHashSet();
for(DBIDs dbids : ids) {
result.addDBIDs(dbids);
}
- for(List<DistanceResultPair<D>> queryResult : queryResults) {
- for(DistanceResultPair<D> qr : queryResult) {
- result.add(qr);
- }
+ for(DistanceDBIDResult<D> queryResult : queryResults) {
+ result.addDBIDs(queryResult);
}
return DBIDUtil.newArray(result);
}
@@ -410,7 +408,7 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> {
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -440,7 +438,8 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> {
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final IntParameter pK = new IntParameter(K_ID, new GreaterConstraint(1));
+ final IntParameter pK = new IntParameter(K_ID);
+ pK.addConstraint(new GreaterConstraint(1));
if(config.grab(pK)) {
k = pK.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OutlierAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OutlierAlgorithm.java
index d8322d8b..00c4a8ec 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OutlierAlgorithm.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OutlierAlgorithm.java
@@ -31,6 +31,8 @@ import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
* Generic super interface for outlier detection algorithms.
*
* @author Erich Schubert
+ *
+ * @apiviz.landmark
*
* @apiviz.has OutlierResult
*/
@@ -39,4 +41,4 @@ public interface OutlierAlgorithm extends Algorithm {
// Use the magic in AbstractAlgorithm and just implement a run method for your input data
@Override
OutlierResult run(Database database);
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ReferenceBasedOutlierDetection.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ReferenceBasedOutlierDetection.java
index dd1d37a3..93eca7db 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ReferenceBasedOutlierDetection.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ReferenceBasedOutlierDetection.java
@@ -23,11 +23,8 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import java.util.ArrayList;
import java.util.Collection;
-import java.util.Collections;
import java.util.Iterator;
-import java.util.List;
import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
@@ -39,12 +36,13 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
-import de.lmu.ifi.dbs.elki.database.query.GenericDistanceResultPair;
+import de.lmu.ifi.dbs.elki.database.ids.DistanceDBIDPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.GenericDistanceDBIDList;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.Mean;
@@ -88,23 +86,23 @@ import de.lmu.ifi.dbs.elki.utilities.referencepoints.ReferencePointsHeuristic;
@Title("An Efficient Reference-based Approach to Outlier Detection in Large Datasets")
@Description("Computes kNN distances approximately, using reference points with various reference point strategies.")
@Reference(authors = "Y. Pei, O.R. Zaiane, Y. Gao", title = "An Efficient Reference-based Approach to Outlier Detection in Large Datasets", booktitle = "Proc. 6th IEEE Int. Conf. on Data Mining (ICDM '06), Hong Kong, China, 2006", url = "http://dx.doi.org/10.1109/ICDM.2006.17")
-public class ReferenceBasedOutlierDetection<V extends NumberVector<?, ?>, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
+public class ReferenceBasedOutlierDetection<V extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(ReferenceBasedOutlierDetection.class);
+ private static final Logging LOG = Logging.getLogger(ReferenceBasedOutlierDetection.class);
/**
* Parameter for the reference points heuristic.
*/
- public static final OptionID REFP_ID = OptionID.getOrCreateOptionID("refod.refp", "The heuristic for finding reference points.");
+ public static final OptionID REFP_ID = new OptionID("refod.refp", "The heuristic for finding reference points.");
/**
* Parameter to specify the number of nearest neighbors of an object, to be
* considered for computing its REFOD_SCORE, must be an integer greater than
* 1.
*/
- public static final OptionID K_ID = OptionID.getOrCreateOptionID("refod.k", "The number of nearest neighbors");
+ public static final OptionID K_ID = new OptionID("refod.k", "The number of nearest neighbors");
/**
* Holds the value of {@link #K_ID}.
@@ -160,7 +158,7 @@ public class ReferenceBasedOutlierDetection<V extends NumberVector<?, ?>, D exte
}
V firstRef = iter.next();
// compute distance vector for the first reference point
- List<DistanceResultPair<D>> firstReferenceDists = computeDistanceVector(firstRef, relation, distFunc);
+ DistanceDBIDResult<D> firstReferenceDists = computeDistanceVector(firstRef, relation, distFunc);
for(int l = 0; l < firstReferenceDists.size(); l++) {
double density = computeDensity(firstReferenceDists, l);
// Initial value
@@ -169,7 +167,7 @@ public class ReferenceBasedOutlierDetection<V extends NumberVector<?, ?>, D exte
// compute density values for all remaining reference points
while(iter.hasNext()) {
V refPoint = iter.next();
- List<DistanceResultPair<D>> referenceDists = computeDistanceVector(refPoint, relation, distFunc);
+ DistanceDBIDResult<D> referenceDists = computeDistanceVector(refPoint, relation, distFunc);
// compute density value for each object
for(int l = 0; l < referenceDists.size(); l++) {
double density = computeDensity(referenceDists, l);
@@ -215,14 +213,13 @@ public class ReferenceBasedOutlierDetection<V extends NumberVector<?, ?>, D exte
* @return array containing the distance to one reference point for each
* database object and the object id
*/
- protected List<DistanceResultPair<D>> computeDistanceVector(V refPoint, Relation<V> database, DistanceQuery<V, D> distFunc) {
+ protected DistanceDBIDResult<D> computeDistanceVector(V refPoint, Relation<V> database, DistanceQuery<V, D> distFunc) {
// TODO: optimize for double distances?
- List<DistanceResultPair<D>> referenceDists = new ArrayList<DistanceResultPair<D>>(database.size());
+ GenericDistanceDBIDList<D> referenceDists = new GenericDistanceDBIDList<D>(database.size());
for(DBIDIter iditer = database.iterDBIDs(); iditer.valid(); iditer.advance()) {
- final D distance = distFunc.distance(iditer, refPoint);
- referenceDists.add(new GenericDistanceResultPair<D>(distance, iditer.getDBID()));
+ referenceDists.add(distFunc.distance(iditer, refPoint), iditer);
}
- Collections.sort(referenceDists);
+ referenceDists.sort();
return referenceDists;
}
@@ -238,8 +235,8 @@ public class ReferenceBasedOutlierDetection<V extends NumberVector<?, ?>, D exte
* @param index index of the current object
* @return density for one object and reference point
*/
- protected double computeDensity(List<DistanceResultPair<D>> referenceDists, int index) {
- final DistanceResultPair<D> x = referenceDists.get(index);
+ protected double computeDensity(DistanceDBIDResult<D> referenceDists, int index) {
+ final DistanceDBIDPair<D> x = referenceDists.get(index);
final double xDist = x.getDistance().doubleValue();
int lef = index - 1;
@@ -295,7 +292,7 @@ public class ReferenceBasedOutlierDetection<V extends NumberVector<?, ?>, D exte
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -305,7 +302,7 @@ public class ReferenceBasedOutlierDetection<V extends NumberVector<?, ?>, D exte
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<?, ?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<V, D> {
+ public static class Parameterizer<V extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<V, D> {
/**
* Holds the value of {@link #K_ID}.
*/
@@ -319,7 +316,8 @@ public class ReferenceBasedOutlierDetection<V extends NumberVector<?, ?>, D exte
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final IntParameter pK = new IntParameter(K_ID, new GreaterConstraint(1));
+ final IntParameter pK = new IntParameter(K_ID);
+ pK.addConstraint(new GreaterConstraint(1));
if(config.grab(pK)) {
k = pK.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleCOP.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleCOP.java
new file mode 100644
index 00000000..e8077819
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleCOP.java
@@ -0,0 +1,236 @@
+package de.lmu.ifi.dbs.elki.algorithm.outlier;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.DependencyDerivator;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.model.CorrelationAnalysisSolution;
+import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.QueryUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAFilteredRunner;
+import de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore;
+import de.lmu.ifi.dbs.elki.utilities.FormatUtil;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+
+/**
+ * Algorithm to compute local correlation outlier probability.
+ *
+ * This is the simpler, original version of COP, as published in
+ * <p>
+ * Arthur Zimek<br />
+ * Correlation Clustering.<br />
+ * PhD thesis, Chapter 18
+ * </p>
+ * which has then been refined to the method published as {@link COP}
+ *
+ * @author Erich Schubert
+ * @param <V> the type of NumberVector handled by this Algorithm
+ */
+@Title("Simple COP: Correlation Outlier Probability")
+@Reference(authors = "Arthur Zimek", title = "Correlation Clustering. PhD thesis, Chapter 18", booktitle = "")
+public class SimpleCOP<V extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<V, D, OutlierResult> implements OutlierAlgorithm {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging LOG = Logging.getLogger(SimpleCOP.class);
+
+ /**
+ * Number of neighbors to be considered.
+ */
+ int k;
+
+ /**
+ * Holds the object performing the dependency derivation
+ */
+ private DependencyDerivator<V, D> dependencyDerivator;
+
+ /**
+ * Constructor.
+ *
+ * @param distanceFunction Distance function
+ * @param k k Parameter
+ * @param pca PCA runner-
+ */
+ public SimpleCOP(DistanceFunction<? super V, D> distanceFunction, int k, PCAFilteredRunner<V> pca) {
+ super(distanceFunction);
+ this.k = k;
+ this.dependencyDerivator = new DependencyDerivator<V, D>(null, FormatUtil.NF8, pca, 0, false);
+ }
+
+ public OutlierResult run(Database database, Relation<V> data) throws IllegalStateException {
+ KNNQuery<V, D> knnQuery = QueryUtil.getKNNQuery(data, getDistanceFunction(), k + 1);
+
+ DBIDs ids = data.getDBIDs();
+
+ WritableDoubleDataStore cop_score = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
+ WritableDataStore<Vector> cop_err_v = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, Vector.class);
+ WritableDataStore<Matrix> cop_datav = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, Matrix.class);
+ WritableIntegerDataStore cop_dim = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, -1);
+ WritableDataStore<CorrelationAnalysisSolution<?>> cop_sol = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, CorrelationAnalysisSolution.class);
+ {// compute neighbors of each db object
+ FiniteProgress progressLocalPCA = LOG.isVerbose() ? new FiniteProgress("Correlation Outlier Probabilities", data.size(), LOG) : null;
+ double sqrt2 = Math.sqrt(2.0);
+ for (DBIDIter id = data.iterDBIDs(); id.valid(); id.advance()) {
+ KNNResult<D> neighbors = knnQuery.getKNNForDBID(id, k + 1);
+ ModifiableDBIDs nids = DBIDUtil.newArray(neighbors);
+ nids.remove(id);
+
+ // TODO: do we want to use the query point as centroid?
+ CorrelationAnalysisSolution<V> depsol = dependencyDerivator.generateModel(data, nids);
+
+ double stddev = depsol.getStandardDeviation();
+ double distance = depsol.distance(data.get(id));
+ double prob = NormalDistribution.erf(distance / (stddev * sqrt2));
+
+ cop_score.putDouble(id, prob);
+
+ Vector errv = depsol.errorVector(data.get(id)).timesEquals(-1);
+ cop_err_v.put(id, errv);
+
+ Matrix datav = depsol.dataProjections(data.get(id));
+ cop_datav.put(id, datav);
+
+ cop_dim.putInt(id, depsol.getCorrelationDimensionality());
+
+ cop_sol.put(id, depsol);
+
+ if (progressLocalPCA != null) {
+ progressLocalPCA.incrementProcessed(LOG);
+ }
+ }
+ if (progressLocalPCA != null) {
+ progressLocalPCA.ensureCompleted(LOG);
+ }
+ }
+ // combine results.
+ Relation<Double> scoreResult = new MaterializedRelation<Double>("Original Correlation Outlier Probabilities", "origcop-outlier", TypeUtil.DOUBLE, cop_score, ids);
+ OutlierScoreMeta scoreMeta = new ProbabilisticOutlierScore();
+ OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
+ // extra results
+ result.addChildResult(new MaterializedRelation<Integer>("Local Dimensionality", COP.COP_DIM, TypeUtil.INTEGER, cop_dim, ids));
+ result.addChildResult(new MaterializedRelation<Vector>("Error vectors", COP.COP_ERRORVEC, TypeUtil.VECTOR, cop_err_v, ids));
+ result.addChildResult(new MaterializedRelation<Matrix>("Data vectors", "cop-datavec", TypeUtil.MATRIX, cop_datav, ids));
+ result.addChildResult(new MaterializedRelation<CorrelationAnalysisSolution<?>>("Correlation analysis", "cop-sol", new SimpleTypeInformation<CorrelationAnalysisSolution<?>>(CorrelationAnalysisSolution.class), cop_sol, ids));
+ return result;
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(TypeUtil.NUMBER_VECTOR_FIELD);
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer<V extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<V, D> {
+ /**
+ * Parameter to specify the number of nearest neighbors of an object to be
+ * considered for computing its COP_SCORE, must be an integer greater than
+ * 0.
+ * <p/>
+ * Key: {@code -cop.k}
+ * </p>
+ */
+ public static final OptionID K_ID = new OptionID("cop.k", "The number of nearest neighbors of an object to be considered for computing its COP_SCORE.");
+
+ /**
+ * Parameter for the PCA runner class.
+ *
+ * <p>
+ * Key: {@code -cop.pcarunner}
+ * </p>
+ */
+ public static final OptionID PCARUNNER_ID = new OptionID("cop.pcarunner", "The class to compute (filtered) PCA.");
+
+ /**
+ * Number of neighbors to be considered.
+ */
+ int k;
+
+ /**
+ * Holds the object performing the dependency derivation
+ */
+ protected PCAFilteredRunner<V> pca;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ IntParameter kP = new IntParameter(K_ID);
+ kP.addConstraint(new GreaterConstraint(0));
+ if (config.grab(kP)) {
+ k = kP.intValue();
+ }
+ ObjectParameter<PCAFilteredRunner<V>> pcaP = new ObjectParameter<PCAFilteredRunner<V>>(PCARUNNER_ID, PCAFilteredRunner.class, PCAFilteredRunner.class);
+ if (config.grab(pcaP)) {
+ pca = pcaP.instantiateClass(config);
+ }
+ }
+
+ @Override
+ protected SimpleCOP<V, D> makeInstance() {
+ return new SimpleCOP<V, D>(distanceFunction, k, pca);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleKernelDensityLOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleKernelDensityLOF.java
new file mode 100644
index 00000000..1c104c08
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleKernelDensityLOF.java
@@ -0,0 +1,284 @@
+package de.lmu.ifi.dbs.elki.algorithm.outlier;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.type.CombinedTypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.QueryUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
+import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceDBIDResultIter;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceKNNList;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
+import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.logging.progress.StepProgress;
+import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.math.statistics.EpanechnikovKernelDensityFunction;
+import de.lmu.ifi.dbs.elki.math.statistics.KernelDensityFunction;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+
+/**
+ * A simple variant of the LOF algorithm, which uses a simple kernel density
+ * estimation instead of the local reachability density.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.has KNNQuery
+ * @apiviz.has KernelDensityFunction
+ *
+ * @param <O> the type of objects handled by this Algorithm
+ * @param <D> Distance type
+ */
+public class SimpleKernelDensityLOF<O extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging LOG = Logging.getLogger(SimpleKernelDensityLOF.class);
+
+ /**
+ * Parameter k.
+ */
+ protected int k;
+
+ /**
+ * Kernel density function
+ */
+ private KernelDensityFunction kernel;
+
+ /**
+ * Constructor.
+ *
+ * @param k the value of k
+ * @param kernel Kernel function
+ */
+ public SimpleKernelDensityLOF(int k, DistanceFunction<? super O, D> distance, KernelDensityFunction kernel) {
+ super(distance);
+ this.k = k + 1;
+ this.kernel = kernel;
+ }
+
+ /**
+ * Run the naive kernel density LOF algorithm.
+ *
+ * @param relation Data to process
+ * @return LOF outlier result
+ */
+ public OutlierResult run(Relation<O> relation) {
+ StepProgress stepprog = LOG.isVerbose() ? new StepProgress("KernelDensityLOF", 3) : null;
+
+ final int dim = RelationUtil.dimensionality(relation);
+
+ DBIDs ids = relation.getDBIDs();
+
+ // "HEAVY" flag for KNN Query since it is used more than once
+ KNNQuery<O, D> knnq = QueryUtil.getKNNQuery(relation, getDistanceFunction(), k, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
+ // No optimized kNN query - use a preprocessor!
+ if (!(knnq instanceof PreprocessorKNNQuery)) {
+ if (stepprog != null) {
+ stepprog.beginStep(1, "Materializing neighborhoods w.r.t. distance function.", LOG);
+ }
+ MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<O, D>(relation, getDistanceFunction(), k);
+ relation.getDatabase().addIndex(preproc);
+ DistanceQuery<O, D> rdq = relation.getDatabase().getDistanceQuery(relation, getDistanceFunction());
+ knnq = preproc.getKNNQuery(rdq, k);
+ }
+
+ // Compute LRDs
+ if (stepprog != null) {
+ stepprog.beginStep(2, "Computing densities.", LOG);
+ }
+ WritableDoubleDataStore dens = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
+ FiniteProgress densProgress = LOG.isVerbose() ? new FiniteProgress("Densities", ids.size(), LOG) : null;
+ for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
+ final KNNResult<D> neighbors = knnq.getKNNForDBID(it, k);
+ int count = 0;
+ double sum = 0.0;
+ if (neighbors instanceof DoubleDistanceKNNList) {
+ // Fast version for double distances
+ for (DoubleDistanceDBIDResultIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) {
+ if (DBIDUtil.equal(neighbor, it)) {
+ continue;
+ }
+ double max = ((DoubleDistanceKNNList)knnq.getKNNForDBID(neighbor, k)).doubleKNNDistance();
+ final double v = neighbor.doubleDistance() / max;
+ sum += kernel.density(v) / Math.pow(max, dim);
+ count++;
+ }
+ } else {
+ for (DistanceDBIDResultIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ if (DBIDUtil.equal(neighbor, it)) {
+ continue;
+ }
+ double max = knnq.getKNNForDBID(neighbor, k).getKNNDistance().doubleValue();
+ final double v = neighbor.getDistance().doubleValue() / max;
+ sum += kernel.density(v) / Math.pow(max, dim);
+ count++;
+ }
+ }
+ final double density = sum / count;
+ dens.putDouble(it, density);
+ if (densProgress != null) {
+ densProgress.incrementProcessed(LOG);
+ }
+ }
+ if (densProgress != null) {
+ densProgress.ensureCompleted(LOG);
+ }
+
+ // compute LOF_SCORE of each db object
+ if (stepprog != null) {
+ stepprog.beginStep(3, "Computing KLOFs.", LOG);
+ }
+ WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
+ // track the maximum value for normalization.
+ DoubleMinMax lofminmax = new DoubleMinMax();
+
+ FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("KLOF_SCORE for objects", ids.size(), LOG) : null;
+ for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
+ final double lrdp = dens.doubleValue(it);
+ final double lof;
+ if (lrdp > 0) {
+ final KNNResult<D> neighbors = knnq.getKNNForDBID(it, k);
+ double sum = 0.0;
+ int count = 0;
+ for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ // skip the point itself
+ if (DBIDUtil.equal(neighbor, it)) {
+ continue;
+ }
+ sum += dens.doubleValue(neighbor);
+ count++;
+ }
+ lof = sum / (count * lrdp);
+ } else {
+ lof = 1.0;
+ }
+ lofs.putDouble(it, lof);
+ // update minimum and maximum
+ lofminmax.put(lof);
+
+ if (progressLOFs != null) {
+ progressLOFs.incrementProcessed(LOG);
+ }
+ }
+ if (progressLOFs != null) {
+ progressLOFs.ensureCompleted(LOG);
+ }
+
+ if (stepprog != null) {
+ stepprog.setCompleted(LOG);
+ }
+
+ // Build result representation.
+ Relation<Double> scoreResult = new MaterializedRelation<Double>("Kernel Density Local Outlier Factor", "kernel-density-slof-outlier", TypeUtil.DOUBLE, lofs, ids);
+ OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
+ OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
+
+ return result;
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(new CombinedTypeInformation(getDistanceFunction().getInputTypeRestriction(), TypeUtil.NUMBER_VECTOR_FIELD));
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ *
+ * @param <O> vector type
+ * @param <D> distance type
+ */
+ public static class Parameterizer<O extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ /**
+ * Option ID for kernel density LOF kernel.
+ */
+ public static final OptionID KERNEL_ID = new OptionID("kernellof.kernel", "Kernel to use for kernel density LOF.");
+
+ /**
+ * The neighborhood size to use.
+ */
+ protected int k = 2;
+
+ /**
+ * Kernel density function parameter
+ */
+ KernelDensityFunction kernel;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ final IntParameter pK = new IntParameter(LOF.K_ID);
+ pK.addConstraint(new GreaterConstraint(1));
+ if (config.grab(pK)) {
+ k = pK.getValue();
+ }
+
+ ObjectParameter<KernelDensityFunction> kernelP = new ObjectParameter<KernelDensityFunction>(KERNEL_ID, KernelDensityFunction.class, EpanechnikovKernelDensityFunction.class);
+ if (config.grab(kernelP)) {
+ kernel = kernelP.instantiateClass(config);
+ }
+ }
+
+ @Override
+ protected SimpleKernelDensityLOF<O, D> makeInstance() {
+ return new SimpleKernelDensityLOF<O, D>(k, distanceFunction, kernel);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleLOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleLOF.java
new file mode 100644
index 00000000..48505ed5
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleLOF.java
@@ -0,0 +1,249 @@
+package de.lmu.ifi.dbs.elki.algorithm.outlier;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.QueryUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
+import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceDBIDResultIter;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceKNNList;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
+import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.logging.progress.StepProgress;
+import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+
+/**
+ * A simplified version of the original LOF algorithm, which does not use the
+ * reachability distance, yielding less stable results on inliers.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.has KNNQuery
+ *
+ * @param <O> the type of DatabaseObjects handled by this Algorithm
+ * @param <D> Distance type
+ */
+public class SimpleLOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging LOG = Logging.getLogger(SimpleLOF.class);
+
+ /**
+ * Parameter k.
+ */
+ protected int k;
+
+ /**
+ * Constructor.
+ *
+ * @param k the value of k
+ */
+ public SimpleLOF(int k, DistanceFunction<? super O, D> distance) {
+ super(distance);
+ this.k = k + 1;
+ }
+
+ /**
+ * Run the Simple LOF algorithm.
+ *
+ * @param relation Data to process
+ * @return LOF outlier result
+ */
+ public OutlierResult run(Relation<O> relation) {
+ StepProgress stepprog = LOG.isVerbose() ? new StepProgress("SimpleLOF", 3) : null;
+
+ DBIDs ids = relation.getDBIDs();
+
+ // "HEAVY" flag for KNN Query since it is used more than once
+ KNNQuery<O, D> knnq = QueryUtil.getKNNQuery(relation, getDistanceFunction(), k, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
+ // No optimized kNN query - use a preprocessor!
+ if (!(knnq instanceof PreprocessorKNNQuery)) {
+ if (stepprog != null) {
+ stepprog.beginStep(1, "Materializing neighborhoods w.r.t. distance function.", LOG);
+ }
+ MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<O, D>(relation, getDistanceFunction(), k);
+ relation.getDatabase().addIndex(preproc);
+ DistanceQuery<O, D> rdq = relation.getDatabase().getDistanceQuery(relation, getDistanceFunction());
+ knnq = preproc.getKNNQuery(rdq, k);
+ }
+
+ // Compute LRDs
+ if (stepprog != null) {
+ stepprog.beginStep(2, "Computing densities.", LOG);
+ }
+ WritableDoubleDataStore dens = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
+ FiniteProgress densProgress = LOG.isVerbose() ? new FiniteProgress("Densities", ids.size(), LOG) : null;
+ for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
+ final KNNResult<D> neighbors = knnq.getKNNForDBID(it, k);
+ double sum = 0.0;
+ int count = 0;
+ if (neighbors instanceof DoubleDistanceKNNList) {
+ // Fast version for double distances
+ for (DoubleDistanceDBIDResultIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) {
+ if (DBIDUtil.equal(neighbor, it)) {
+ continue;
+ }
+ sum += neighbor.doubleDistance();
+ count++;
+ }
+ } else {
+ for (DistanceDBIDResultIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ if (DBIDUtil.equal(neighbor, it)) {
+ continue;
+ }
+ sum += neighbor.getDistance().doubleValue();
+ count++;
+ }
+ }
+ // Avoid division by 0
+ final double lrd = (sum > 0) ? (count / sum) : 0;
+ dens.putDouble(it, lrd);
+ if (densProgress != null) {
+ densProgress.incrementProcessed(LOG);
+ }
+ }
+ if (densProgress != null) {
+ densProgress.ensureCompleted(LOG);
+ }
+
+ // compute LOF_SCORE of each db object
+ if (stepprog != null) {
+ stepprog.beginStep(3, "Computing SLOFs.", LOG);
+ }
+ WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
+ // track the maximum value for normalization.
+ DoubleMinMax lofminmax = new DoubleMinMax();
+
+ FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("Simple LOF scores.", ids.size(), LOG) : null;
+ for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
+ final double lrdp = dens.doubleValue(it);
+ final double lof;
+ if (lrdp > 0) {
+ final KNNResult<D> neighbors = knnq.getKNNForDBID(it, k);
+ double sum = 0.0;
+ int count = 0;
+ for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ // skip the point itself
+ if (DBIDUtil.equal(neighbor, it)) {
+ continue;
+ }
+ sum += dens.doubleValue(neighbor);
+ count++;
+ }
+ lof = sum / (count * lrdp);
+ } else {
+ lof = 1.0;
+ }
+ lofs.putDouble(it, lof);
+ // update minimum and maximum
+ lofminmax.put(lof);
+
+ if (progressLOFs != null) {
+ progressLOFs.incrementProcessed(LOG);
+ }
+ }
+ if (progressLOFs != null) {
+ progressLOFs.ensureCompleted(LOG);
+ }
+
+ if (stepprog != null) {
+ stepprog.setCompleted(LOG);
+ }
+
+ // Build result representation.
+ Relation<Double> scoreResult = new MaterializedRelation<Double>("Simple Local Outlier Factor", "simple-lof-outlier", TypeUtil.DOUBLE, lofs, ids);
+ OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
+ OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
+
+ return result;
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(getDistanceFunction().getInputTypeRestriction());
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ *
+ * @param <O> vector type
+ * @param <D> distance type
+ */
+ public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ /**
+ * The neighborhood size to use.
+ */
+ protected int k = 2;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ final IntParameter pK = new IntParameter(LOF.K_ID);
+ pK.addConstraint(new GreaterConstraint(1));
+ if (config.grab(pK)) {
+ k = pK.getValue();
+ }
+ }
+
+ @Override
+ protected SimpleLOF<O, D> makeInstance() {
+ return new SimpleLOF<O, D>(k, distanceFunction);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/ExternalDoubleOutlierScore.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/ExternalDoubleOutlierScore.java
index 1542b8e3..f230fd3b 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/ExternalDoubleOutlierScore.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/ExternalDoubleOutlierScore.java
@@ -77,7 +77,7 @@ public class ExternalDoubleOutlierScore extends AbstractAlgorithm<OutlierResult>
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(ExternalDoubleOutlierScore.class);
+ private static final Logging LOG = Logging.getLogger(ExternalDoubleOutlierScore.class);
/**
* The comment character.
@@ -183,7 +183,7 @@ public class ExternalDoubleOutlierScore extends AbstractAlgorithm<OutlierResult>
minmax.put(score);
}
else if(id == null && Double.isNaN(score)) {
- logger.warning("Line did not match either ID nor score nor comment: " + line);
+ LOG.warning("Line did not match either ID nor score nor comment: " + line);
}
else {
throw new AbortException("Line matched only ID or only SCORE patterns: " + line);
@@ -224,7 +224,7 @@ public class ExternalDoubleOutlierScore extends AbstractAlgorithm<OutlierResult>
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
@Override
@@ -246,7 +246,7 @@ public class ExternalDoubleOutlierScore extends AbstractAlgorithm<OutlierResult>
* Key: {@code -externaloutlier.file}
* </p>
*/
- public static final OptionID FILE_ID = OptionID.getOrCreateOptionID("externaloutlier.file", "The file name containing the (external) outlier scores.");
+ public static final OptionID FILE_ID = new OptionID("externaloutlier.file", "The file name containing the (external) outlier scores.");
/**
* Parameter that specifies the object ID pattern
@@ -255,7 +255,7 @@ public class ExternalDoubleOutlierScore extends AbstractAlgorithm<OutlierResult>
* Default: ^ID=
* </p>
*/
- public static final OptionID ID_ID = OptionID.getOrCreateOptionID("externaloutlier.idpattern", "The pattern to match object ID prefix");
+ public static final OptionID ID_ID = new OptionID("externaloutlier.idpattern", "The pattern to match object ID prefix");
/**
* Parameter that specifies the object score pattern
@@ -263,7 +263,7 @@ public class ExternalDoubleOutlierScore extends AbstractAlgorithm<OutlierResult>
* Key: {@code -externaloutlier.scorepattern}<br />
* </p>
*/
- public static final OptionID SCORE_ID = OptionID.getOrCreateOptionID("externaloutlier.scorepattern", "The pattern to match object score prefix");
+ public static final OptionID SCORE_ID = new OptionID("externaloutlier.scorepattern", "The pattern to match object score prefix");
/**
* Parameter to specify a scaling function to use.
@@ -271,12 +271,12 @@ public class ExternalDoubleOutlierScore extends AbstractAlgorithm<OutlierResult>
* Key: {@code -externaloutlier.scaling}
* </p>
*/
- public static final OptionID SCALING_ID = OptionID.getOrCreateOptionID("externaloutlier.scaling", "Class to use as scaling function.");
+ public static final OptionID SCALING_ID = new OptionID("externaloutlier.scaling", "Class to use as scaling function.");
/**
* Flag parameter for inverted scores.
*/
- public static final OptionID INVERTED_ID = OptionID.getOrCreateOptionID("externaloutlier.inverted", "Flag to signal an inverted outlier score.");
+ public static final OptionID INVERTED_ID = new OptionID("externaloutlier.inverted", "Flag to signal an inverted outlier score.");
/**
* The file to be reparsed
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java
index 407b7400..b53a0942 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java
@@ -39,6 +39,7 @@ import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
@@ -47,7 +48,7 @@ import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
@@ -57,7 +58,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualCons
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.LongParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
/**
@@ -85,22 +86,22 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(FeatureBagging.class);
+ private static final Logging LOG = Logging.getLogger(FeatureBagging.class);
/**
- * Number of instances to use
+ * Number of instances to use.
*/
protected int num = 1;
/**
- * Cumulative sum or breadth first combinations
+ * Cumulative sum or breadth first combinations.
*/
protected boolean breadth = false;
/**
- * Random number generator for subspace choice
+ * Random number generator for subspace choice.
*/
- private Random RANDOM;
+ private RandomFactory rnd;
/**
* The parameters k for LOF.
@@ -113,18 +114,14 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements
* @param k k Parameter for LOF
* @param num Number of subspaces to use
* @param breadth Flag for breadth-first merging
+ * @param rnd Random generator
*/
- public FeatureBagging(int k, int num, boolean breadth, Long seed) {
+ public FeatureBagging(int k, int num, boolean breadth, RandomFactory rnd) {
super();
this.k = k;
this.num = num;
this.breadth = breadth;
- if(seed != null) {
- this.RANDOM = new Random(seed);
- }
- else {
- this.RANDOM = new Random();
- }
+ this.rnd = rnd;
}
/**
@@ -133,80 +130,79 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements
* @param relation Relation to use
* @return Outlier detection result
*/
- public OutlierResult run(Relation<NumberVector<?, ?>> relation) {
- final int dbdim = DatabaseUtil.dimensionality(relation);
- final int mindim = dbdim / 2;
+ public OutlierResult run(Relation<NumberVector<?>> relation) {
+ final int dbdim = RelationUtil.dimensionality(relation);
+ final int mindim = dbdim >> 1;
final int maxdim = dbdim - 1;
+ final Random rand = rnd.getRandom();
ArrayList<OutlierResult> results = new ArrayList<OutlierResult>(num);
{
- FiniteProgress prog = logger.isVerbose() ? new FiniteProgress("LOF iterations", num, logger) : null;
- for(int i = 0; i < num; i++) {
- BitSet dimset = randomSubspace(dbdim, mindim, maxdim);
+ FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("LOF iterations", num, LOG) : null;
+ for (int i = 0; i < num; i++) {
+ BitSet dimset = randomSubspace(dbdim, mindim, maxdim, rand);
SubspaceEuclideanDistanceFunction df = new SubspaceEuclideanDistanceFunction(dimset);
- LOF<NumberVector<?, ?>, DoubleDistance> lof = new LOF<NumberVector<?, ?>, DoubleDistance>(k, df);
+ LOF<NumberVector<?>, DoubleDistance> lof = new LOF<NumberVector<?>, DoubleDistance>(k, df);
// run LOF and collect the result
OutlierResult result = lof.run(relation);
results.add(result);
- if(prog != null) {
- prog.incrementProcessed(logger);
+ if (prog != null) {
+ prog.incrementProcessed(LOG);
}
}
- if(prog != null) {
- prog.ensureCompleted(logger);
+ if (prog != null) {
+ prog.ensureCompleted(LOG);
}
}
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
DoubleMinMax minmax = new DoubleMinMax();
- if(breadth) {
- FiniteProgress cprog = logger.isVerbose() ? new FiniteProgress("Combining results", relation.size(), logger) : null;
+ if (breadth) {
+ FiniteProgress cprog = LOG.isVerbose() ? new FiniteProgress("Combining results", relation.size(), LOG) : null;
Pair<DBIDIter, Relation<Double>>[] IDVectorOntoScoreVector = Pair.newPairArray(results.size());
// Mapping score-sorted DBID-Iterators onto their corresponding scores.
// We need to initialize them now be able to iterate them "in parallel".
{
int i = 0;
- for(OutlierResult r : results) {
+ for (OutlierResult r : results) {
IDVectorOntoScoreVector[i] = new Pair<DBIDIter, Relation<Double>>(r.getOrdering().iter(relation.getDBIDs()).iter(), r.getScores());
i++;
}
}
// Iterating over the *lines* of the AS_t(i)-matrix.
- for(int i = 0; i < relation.size(); i++) {
+ for (int i = 0; i < relation.size(); i++) {
// Iterating over the elements of a line (breadth-first).
- for(Pair<DBIDIter, Relation<Double>> pair : IDVectorOntoScoreVector) {
+ for (Pair<DBIDIter, Relation<Double>> pair : IDVectorOntoScoreVector) {
DBIDIter iter = pair.first;
// Always true if every algorithm returns a complete result (one score
// for every DBID).
- if(iter.valid()) {
+ if (iter.valid()) {
double score = pair.second.get(iter);
- if(Double.isNaN(scores.doubleValue(iter))) {
+ if (Double.isNaN(scores.doubleValue(iter))) {
scores.putDouble(iter, score);
minmax.put(score);
}
iter.advance();
- }
- else {
- logger.warning("Incomplete result: Iterator does not contain |DB| DBIDs");
+ } else {
+ LOG.warning("Incomplete result: Iterator does not contain |DB| DBIDs");
}
}
// Progress does not take the initial mapping into account.
- if(cprog != null) {
- cprog.incrementProcessed(logger);
+ if (cprog != null) {
+ cprog.incrementProcessed(LOG);
}
}
- if(cprog != null) {
- cprog.ensureCompleted(logger);
+ if (cprog != null) {
+ cprog.ensureCompleted(LOG);
}
- }
- else {
- FiniteProgress cprog = logger.isVerbose() ? new FiniteProgress("Combining results", relation.size(), logger) : null;
- for(DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
+ } else {
+ FiniteProgress cprog = LOG.isVerbose() ? new FiniteProgress("Combining results", relation.size(), LOG) : null;
+ for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
double sum = 0.0;
- for(OutlierResult r : results) {
+ for (OutlierResult r : results) {
final Double s = r.getScores().get(iter);
if (s != null && !Double.isNaN(s)) {
sum += s;
@@ -214,12 +210,12 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements
}
scores.putDouble(iter, sum);
minmax.put(sum);
- if(cprog != null) {
- cprog.incrementProcessed(logger);
+ if (cprog != null) {
+ cprog.incrementProcessed(LOG);
}
}
- if(cprog != null) {
- cprog.ensureCompleted(logger);
+ if (cprog != null) {
+ cprog.ensureCompleted(LOG);
}
}
OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
@@ -228,36 +224,34 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements
}
/**
- * Choose a random subspace
+ * Choose a random subspace.
*
* @param alldim Number of total dimensions
* @param mindim Minimum number to choose
* @param maxdim Maximum number to choose
* @return Subspace as bits.
*/
- private BitSet randomSubspace(final int alldim, final int mindim, final int maxdim) {
+ private BitSet randomSubspace(final int alldim, final int mindim, final int maxdim, final Random rand) {
BitSet dimset = new BitSet();
- {
- // Fill with all dimensions
- int[] dims = new int[alldim];
- for(int d = 0; d < alldim; d++) {
- dims[d] = d;
- }
- // Target dimensionality:
- int subdim = mindim + RANDOM.nextInt(maxdim - mindim);
- // Shrink the subspace to the destination size
- for(int d = 0; d < alldim - subdim; d++) {
- int s = RANDOM.nextInt(alldim - d);
- dimset.set(dims[s]);
- dims[s] = dims[alldim - d - 1];
- }
+ // Fill with all dimensions
+ int[] dims = new int[alldim];
+ for (int d = 0; d < alldim; d++) {
+ dims[d] = d;
+ }
+ // Target dimensionality:
+ int subdim = mindim + rand.nextInt(maxdim - mindim);
+ // Shrink the subspace to the destination size
+ for (int d = 0; d < alldim - subdim; d++) {
+ int s = rand.nextInt(alldim - d);
+ dimset.set(dims[s]);
+ dims[s] = dims[alldim - d - 1];
}
return dimset;
}
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
@Override
@@ -279,69 +273,71 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements
* Key: {@code -fbagging.num}
* </p>
*/
- public static final OptionID NUM_ID = OptionID.getOrCreateOptionID("fbagging.num", "The number of instances to use in the ensemble.");
+ public static final OptionID NUM_ID = new OptionID("fbagging.num", "The number of instances to use in the ensemble.");
/**
- * The flag for using the breadth first approach
+ * The flag for using the breadth first approach.
* <p>
* Key: {@code -fbagging.breadth}
* </p>
*/
- public static final OptionID BREADTH_ID = OptionID.getOrCreateOptionID("fbagging.breadth", "Use the breadth first combinations instead of the cumulative sum approach");
+ public static final OptionID BREADTH_ID = new OptionID("fbagging.breadth", "Use the breadth first combinations instead of the cumulative sum approach");
/**
- * The parameter to specify the random seed
+ * The parameter to specify the random seed.
* <p>
* Key: {@code -fbagging.seed}
* </p>
*/
- public static final OptionID SEED_ID = OptionID.getOrCreateOptionID("fbagging.seed", "Specify a particular random seed.");
+ public static final OptionID SEED_ID = new OptionID("fbagging.seed", "Specify a particular random seed.");
/**
- * The neighborhood size to use
+ * The neighborhood size to use.
*/
protected int k = 2;
/**
- * Number of instances to use
+ * Number of instances to use.
*/
protected int num = 1;
/**
- * Cumulative sum or breadth first combinations
+ * Cumulative sum or breadth first combinations.
*/
protected boolean breadth = false;
/**
- * Random generator seed
+ * Random generator.
*/
- protected Long seed = null;
+ protected RandomFactory rnd;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final IntParameter pK = new IntParameter(LOF.K_ID, new GreaterConstraint(1));
- if(config.grab(pK)) {
+ final IntParameter pK = new IntParameter(LOF.K_ID);
+ pK.addConstraint(new GreaterConstraint(1));
+ if (config.grab(pK)) {
k = pK.getValue();
}
- IntParameter NUM_PARAM = new IntParameter(NUM_ID, new GreaterEqualConstraint(1));
- if(config.grab(NUM_PARAM)) {
- num = NUM_PARAM.getValue();
+ IntParameter numP = new IntParameter(NUM_ID);
+ numP.addConstraint(new GreaterEqualConstraint(1));
+ if (config.grab(numP)) {
+ num = numP.getValue();
}
- Flag BREADTH_FLAG = new Flag(BREADTH_ID);
- if(config.grab(BREADTH_FLAG)) {
- breadth = BREADTH_FLAG.getValue();
+ Flag breadthF = new Flag(BREADTH_ID);
+ if (config.grab(breadthF)) {
+ breadth = breadthF.getValue();
}
- LongParameter seedP = new LongParameter(SEED_ID, true);
- if(config.grab(seedP)) {
- seed = seedP.getValue();
+ RandomParameter rndP = new RandomParameter(SEED_ID);
+ if (config.grab(rndP)) {
+ rnd = rndP.getValue();
}
}
@Override
protected FeatureBagging makeInstance() {
// Default is to re-use the same distance
- return new FeatureBagging(k, num, breadth, seed);
+ return new FeatureBagging(k, num, breadth, rnd);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/HiCS.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/HiCS.java
index 73d4156a..15b94322 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/HiCS.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/HiCS.java
@@ -48,12 +48,14 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.ProjectedView;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress;
@@ -63,7 +65,7 @@ import de.lmu.ifi.dbs.elki.math.statistics.tests.KolmogorovSmirnovTest;
import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TopBoundedHeap;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
@@ -74,8 +76,8 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstrain
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.LongParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
/**
* Algorithm to compute High Contrast Subspaces for Density-Based Outlier
@@ -99,12 +101,12 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
*/
@Title("HiCS: High Contrast Subspaces for Density-Based Outlier Ranking")
@Description("Algorithm to compute High Contrast Subspaces in a database as a pre-processing step for for density-based outlier ranking methods.")
-@Reference(authors = "Fabian Keller, Emmanuel Müller, Klemens Böhm", title = "HiCS: High Contrast Subspaces for Density-Based Outlier Ranking", booktitle = "Proc. IEEE 28th International Conference on Data Engineering (ICDE 2012)")
-public class HiCS<V extends NumberVector<V, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
+@Reference(authors = "Fabian Keller, Emmanuel Müller, Klemens Böhm", title = "HiCS: High Contrast Subspaces for Density-Based Outlier Ranking", booktitle = "Proc. IEEE 28th International Conference on Data Engineering (ICDE 2012)", url = "http://dx.doi.org/10.1109/ICDE.2012.88")
+public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
/**
- * The Logger for this class
+ * The Logger for this class.
*/
- private static final Logging logger = Logging.getLogger(HiCS.class);
+ private static final Logging LOG = Logging.getLogger(HiCS.class);
/**
* Maximum number of retries.
@@ -112,57 +114,57 @@ public class HiCS<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlie
private static final int MAX_RETRIES = 100;
/**
- * Monte-Carlo iterations
+ * Monte-Carlo iterations.
*/
private int m;
/**
- * Alpha threshold
+ * Alpha threshold.
*/
private double alpha;
/**
- * Outlier detection algorithm
+ * Outlier detection algorithm.
*/
private OutlierAlgorithm outlierAlgorithm;
/**
- * Statistical test to use
+ * Statistical test to use.
*/
private GoodnessOfFitTest statTest;
/**
- * Candidates limit
+ * Candidates limit.
*/
private int cutoff;
-
+
/**
- * Random generator
+ * Random generator.
*/
- private Random random;
+ private RandomFactory rnd;
/**
- * Constructor
+ * Constructor.
*
* @param m value of m
* @param alpha value of alpha
* @param outlierAlgorithm Inner outlier detection algorithm
* @param statTest Test to use
* @param cutoff Candidate limit
- * @param seed Random seed
+ * @param rnd Random generator
*/
- public HiCS(int m, double alpha, OutlierAlgorithm outlierAlgorithm, GoodnessOfFitTest statTest, int cutoff, Long seed) {
+ public HiCS(int m, double alpha, OutlierAlgorithm outlierAlgorithm, GoodnessOfFitTest statTest, int cutoff, RandomFactory rnd) {
super();
this.m = m;
this.alpha = alpha;
this.outlierAlgorithm = outlierAlgorithm;
this.statTest = statTest;
this.cutoff = cutoff;
- this.random = (seed != null) ? new Random(seed) : new Random();
+ this.rnd = rnd;
}
/**
- * Perform HiCS on a given database
+ * Perform HiCS on a given database.
*
* @param relation the database
* @return The aggregated resulting scores that were assigned by the given
@@ -170,23 +172,23 @@ public class HiCS<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlie
*/
public OutlierResult run(Relation<V> relation) {
final DBIDs ids = relation.getDBIDs();
- final V factory = DatabaseUtil.assumeVectorField(relation).getFactory();
+ final NumberVector.Factory<V, ?> factory = RelationUtil.getNumberVectorFactory(relation);
ArrayList<ArrayDBIDs> subspaceIndex = buildOneDimIndexes(relation);
- Set<HiCSSubspace> subspaces = calculateSubspaces(relation, subspaceIndex);
+ Set<HiCSSubspace> subspaces = calculateSubspaces(relation, subspaceIndex, rnd.getRandom());
- if(logger.isVerbose()) {
- logger.verbose("Number of high-contrast subspaces: " + subspaces.size());
+ if (LOG.isVerbose()) {
+ LOG.verbose("Number of high-contrast subspaces: " + subspaces.size());
}
List<Relation<Double>> results = new ArrayList<Relation<Double>>();
- FiniteProgress prog = logger.isVerbose() ? new FiniteProgress("Calculating Outlier scores for high Contrast subspaces", subspaces.size(), logger) : null;
+ FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Calculating Outlier scores for high Contrast subspaces", subspaces.size(), LOG) : null;
// run outlier detection and collect the result
// TODO extend so that any outlierAlgorithm can be used (use materialized
// relation instead of SubspaceEuclideanDistanceFunction?)
- for(HiCSSubspace dimset : subspaces) {
- if(logger.isVerbose()) {
- logger.verbose("Performing outlier detection in subspace " + dimset);
+ for (HiCSSubspace dimset : subspaces) {
+ if (LOG.isVerbose()) {
+ LOG.verbose("Performing outlier detection in subspace " + dimset);
}
ProxyDatabase pdb = new ProxyDatabase(ids);
@@ -196,22 +198,22 @@ public class HiCS<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlie
// run LOF and collect the result
OutlierResult result = outlierAlgorithm.run(pdb);
results.add(result.getScores());
- if(prog != null) {
- prog.incrementProcessed(logger);
+ if (prog != null) {
+ prog.incrementProcessed(LOG);
}
}
- if(prog != null) {
- prog.ensureCompleted(logger);
+ if (prog != null) {
+ prog.ensureCompleted(LOG);
}
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
DoubleMinMax minmax = new DoubleMinMax();
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double sum = 0.0;
- for(Relation<Double> r : results) {
+ for (Relation<Double> r : results) {
final Double s = r.get(iditer);
- if(s != null && !Double.isNaN(s)) {
+ if (s != null && !Double.isNaN(s)) {
sum += s;
}
}
@@ -232,12 +234,12 @@ public class HiCS<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlie
* @param relation Relation to index
* @return List of sorted objects
*/
- private ArrayList<ArrayDBIDs> buildOneDimIndexes(Relation<? extends NumberVector<?, ?>> relation) {
- final int dim = DatabaseUtil.dimensionality(relation);
+ private ArrayList<ArrayDBIDs> buildOneDimIndexes(Relation<? extends NumberVector<?>> relation) {
+ final int dim = RelationUtil.dimensionality(relation);
ArrayList<ArrayDBIDs> subspaceIndex = new ArrayList<ArrayDBIDs>(dim + 1);
SortDBIDsBySingleDimension comp = new VectorUtil.SortDBIDsBySingleDimension(relation);
- for(int i = 1; i <= dim; i++) {
+ for (int i = 0; i < dim; i++) {
ArrayModifiableDBIDs amDBIDs = DBIDUtil.newArray(relation.getDBIDs());
comp.setDimension(i);
amDBIDs.sort(comp);
@@ -248,140 +250,143 @@ public class HiCS<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlie
}
/**
- * Identifies high contrast subspaces in a given full-dimensional database
+ * Identifies high contrast subspaces in a given full-dimensional database.
*
* @param relation the relation the HiCS should be evaluated for
* @param subspaceIndex Subspace indexes
* @return a set of high contrast subspaces
*/
- private Set<HiCSSubspace> calculateSubspaces(Relation<? extends NumberVector<?, ?>> relation, ArrayList<ArrayDBIDs> subspaceIndex) {
- final int dbdim = DatabaseUtil.dimensionality(relation);
+ private Set<HiCSSubspace> calculateSubspaces(Relation<? extends NumberVector<?>> relation, ArrayList<ArrayDBIDs> subspaceIndex, Random random) {
+ final int dbdim = RelationUtil.dimensionality(relation);
- FiniteProgress dprog = logger.isVerbose() ? new FiniteProgress("Subspace dimensionality", dbdim, logger) : null;
- if(dprog != null) {
- dprog.setProcessed(2, logger);
+ FiniteProgress dprog = LOG.isVerbose() ? new FiniteProgress("Subspace dimensionality", dbdim, LOG) : null;
+ if (dprog != null) {
+ dprog.setProcessed(2, LOG);
}
TreeSet<HiCSSubspace> subspaceList = new TreeSet<HiCSSubspace>(HiCSSubspace.SORT_BY_SUBSPACE);
TopBoundedHeap<HiCSSubspace> dDimensionalList = new TopBoundedHeap<HiCSSubspace>(cutoff, HiCSSubspace.SORT_BY_CONTRAST_ASC);
- FiniteProgress prog = logger.isVerbose() ? new FiniteProgress("Generating two-element subsets", dbdim * (dbdim - 1) / 2, logger) : null;
+ FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Generating two-element subsets", (dbdim * (dbdim - 1)) >> 1, LOG) : null;
// compute two-element sets of subspaces
- for(int i = 0; i < dbdim; i++) {
- for(int j = i + 1; j < dbdim; j++) {
+ for (int i = 0; i < dbdim; i++) {
+ for (int j = i + 1; j < dbdim; j++) {
HiCSSubspace ts = new HiCSSubspace();
ts.set(i);
ts.set(j);
- calculateContrast(relation, ts, subspaceIndex);
+ calculateContrast(relation, ts, subspaceIndex, random);
dDimensionalList.add(ts);
- if(prog != null) {
- prog.incrementProcessed(logger);
+ if (prog != null) {
+ prog.incrementProcessed(LOG);
}
}
}
- if(prog != null) {
- prog.ensureCompleted(logger);
+ if (prog != null) {
+ prog.ensureCompleted(LOG);
}
- IndefiniteProgress qprog = logger.isVerbose() ? new IndefiniteProgress("Testing subspace candidates", logger) : null;
- for(int d = 3; !dDimensionalList.isEmpty(); d++) {
- if(dprog != null) {
- dprog.setProcessed(d, logger);
+ IndefiniteProgress qprog = LOG.isVerbose() ? new IndefiniteProgress("Testing subspace candidates", LOG) : null;
+ for (int d = 3; !dDimensionalList.isEmpty(); d++) {
+ if (dprog != null) {
+ dprog.setProcessed(d, LOG);
}
- subspaceList.addAll(dDimensionalList);
// result now contains all d-dimensional sets of subspaces
- ArrayList<HiCSSubspace> candidateList = new ArrayList<HiCSSubspace>(dDimensionalList);
+ ArrayList<HiCSSubspace> candidateList = new ArrayList<HiCSSubspace>(dDimensionalList.size());
+ for (HiCSSubspace sub : dDimensionalList) {
+ subspaceList.add(sub);
+ candidateList.add(sub);
+ }
dDimensionalList.clear();
// candidateList now contains the *m* best d-dimensional sets
Collections.sort(candidateList, HiCSSubspace.SORT_BY_SUBSPACE);
// TODO: optimize APRIORI style, by not even computing the bit set or?
- for(int i = 0; i < candidateList.size() - 1; i++) {
- for(int j = i + 1; j < candidateList.size(); j++) {
+ for (int i = 0; i < candidateList.size() - 1; i++) {
+ for (int j = i + 1; j < candidateList.size(); j++) {
HiCSSubspace set1 = candidateList.get(i);
HiCSSubspace set2 = candidateList.get(j);
HiCSSubspace joinedSet = new HiCSSubspace();
joinedSet.or(set1);
joinedSet.or(set2);
- if(joinedSet.cardinality() != d) {
+ if (joinedSet.cardinality() != d) {
continue;
}
- calculateContrast(relation, joinedSet, subspaceIndex);
+ calculateContrast(relation, joinedSet, subspaceIndex, random);
dDimensionalList.add(joinedSet);
- if(qprog != null) {
- qprog.incrementProcessed(logger);
+ if (qprog != null) {
+ qprog.incrementProcessed(LOG);
}
}
}
// Prune
- for(HiCSSubspace cand : candidateList) {
- for(HiCSSubspace nextSet : dDimensionalList) {
- if(nextSet.contrast > cand.contrast) {
+ for (HiCSSubspace cand : candidateList) {
+ for (HiCSSubspace nextSet : dDimensionalList) {
+ if (nextSet.contrast > cand.contrast) {
subspaceList.remove(cand);
break;
}
}
}
}
- if(qprog != null) {
- qprog.setCompleted(logger);
+ if (qprog != null) {
+ qprog.setCompleted(LOG);
}
- if(dprog != null) {
- dprog.setProcessed(dbdim, logger);
- dprog.ensureCompleted(logger);
+ if (dprog != null) {
+ dprog.setProcessed(dbdim, LOG);
+ dprog.ensureCompleted(LOG);
}
return subspaceList;
}
/**
- * Calculates the actual contrast of a given subspace
+ * Calculates the actual contrast of a given subspace.
*
- * @param relation
- * @param subspace
+ * @param relation Relation to process
+ * @param subspace Subspace
* @param subspaceIndex Subspace indexes
*/
- private void calculateContrast(Relation<? extends NumberVector<?, ?>> relation, HiCSSubspace subspace, ArrayList<ArrayDBIDs> subspaceIndex) {
+ private void calculateContrast(Relation<? extends NumberVector<?>> relation, HiCSSubspace subspace, ArrayList<ArrayDBIDs> subspaceIndex, Random random) {
final int card = subspace.cardinality();
final double alpha1 = Math.pow(alpha, (1.0 / card));
final int windowsize = (int) (relation.size() * alpha1);
- final FiniteProgress prog = logger.isDebugging() ? new FiniteProgress("Monte-Carlo iterations", m, logger) : null;
+ final FiniteProgress prog = LOG.isDebugging() ? new FiniteProgress("Monte-Carlo iterations", m, LOG) : null;
int retries = 0;
double deviationSum = 0.0;
- for(int i = 0; i < m; i++) {
+ for (int i = 0; i < m; i++) {
// Choose a random set bit.
int chosen = -1;
- for(int tmp = random.nextInt(card); tmp >= 0; tmp--) {
+ for (int tmp = random.nextInt(card); tmp >= 0; tmp--) {
chosen = subspace.nextSetBit(chosen + 1);
}
// initialize sample
DBIDs conditionalSample = relation.getDBIDs();
- for(int j = subspace.nextSetBit(0); j >= 0; j = subspace.nextSetBit(j + 1)) {
- if(j == chosen) {
+ for (int j = subspace.nextSetBit(0); j >= 0; j = subspace.nextSetBit(j + 1)) {
+ if (j == chosen) {
continue;
}
ArrayDBIDs sortedIndices = subspaceIndex.get(j);
- ArrayModifiableDBIDs indexBlock = DBIDUtil.newArray();
+ ArrayModifiableDBIDs indexBlock = DBIDUtil.newArray(windowsize);
// initialize index block
- int start = random.nextInt(relation.size() - windowsize);
- for(int k = start; k < start + windowsize; k++) {
- indexBlock.add(sortedIndices.get(k)); // select index block
+ DBIDArrayIter iter = sortedIndices.iter();
+ iter.seek(random.nextInt(relation.size() - windowsize));
+ for (int k = 0; k < windowsize; k++, iter.advance()) {
+ indexBlock.add(iter); // select index block
}
conditionalSample = DBIDUtil.intersection(conditionalSample, indexBlock);
}
- if(conditionalSample.size() < 10) {
+ if (conditionalSample.size() < 10) {
retries++;
- if(logger.isDebugging()) {
- logger.debug("Sample size very small. Retry no. " + retries);
+ if (LOG.isDebugging()) {
+ LOG.debug("Sample size very small. Retry no. " + retries);
}
- if(retries >= MAX_RETRIES) {
- logger.warning("Too many retries, for small samples: " + retries);
- }
- else {
+ if (retries >= MAX_RETRIES) {
+ LOG.warning("Too many retries, for small samples: " + retries);
+ } else {
i--;
continue;
}
@@ -391,7 +396,7 @@ public class HiCS<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlie
{
int l = 0;
for (DBIDIter iter = conditionalSample.iter(); iter.valid(); iter.advance()) {
- sampleValues[l] = relation.get(iter).doubleValue(chosen + 1);
+ sampleValues[l] = relation.get(iter).doubleValue(chosen);
l++;
}
}
@@ -400,23 +405,23 @@ public class HiCS<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlie
{
int l = 0;
for (DBIDIter iter = subspaceIndex.get(chosen).iter(); iter.valid(); iter.advance()) {
- fullValues[l] = relation.get(iter).doubleValue(chosen + 1);
+ fullValues[l] = relation.get(iter).doubleValue(chosen);
l++;
}
}
double contrast = statTest.deviation(fullValues, sampleValues);
- if(Double.isNaN(contrast)) {
+ if (Double.isNaN(contrast)) {
i--;
- logger.warning("Contrast was NaN");
+ LOG.warning("Contrast was NaN");
continue;
}
deviationSum += contrast;
- if(prog != null) {
- prog.incrementProcessed(logger);
+ if (prog != null) {
+ prog.incrementProcessed(LOG);
}
}
- if(prog != null) {
- prog.ensureCompleted(logger);
+ if (prog != null) {
+ prog.ensureCompleted(LOG);
}
subspace.contrast = deviationSum / m;
}
@@ -428,7 +433,7 @@ public class HiCS<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlie
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -441,12 +446,12 @@ public class HiCS<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlie
*/
public static class HiCSSubspace extends BitSet {
/**
- * Serial version
+ * Serial version.
*/
private static final long serialVersionUID = 1L;
/**
- * The HiCS contrast value
+ * The HiCS contrast value.
*/
protected double contrast;
@@ -459,22 +464,22 @@ public class HiCS<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlie
@Override
public String toString() {
- StringBuffer buf = new StringBuffer();
+ StringBuilder buf = new StringBuilder();
buf.append("[contrast=").append(contrast);
- for(int i = nextSetBit(0); i >= 0; i = nextSetBit(i + 1)) {
- buf.append(" ").append(i + 1);
+ for (int i = nextSetBit(0); i >= 0; i = nextSetBit(i + 1)) {
+ buf.append(' ').append(i + 1);
}
- buf.append("]");
+ buf.append(']');
return buf.toString();
}
/**
* Sort subspaces by their actual subspace.
*/
- public static Comparator<HiCSSubspace> SORT_BY_CONTRAST_ASC = new Comparator<HiCSSubspace>() {
+ public static final Comparator<HiCSSubspace> SORT_BY_CONTRAST_ASC = new Comparator<HiCSSubspace>() {
@Override
public int compare(HiCSSubspace o1, HiCSSubspace o2) {
- if(o1.contrast == o2.contrast) {
+ if (o1.contrast == o2.contrast) {
return 0;
}
return o1.contrast > o2.contrast ? 1 : -1;
@@ -484,10 +489,10 @@ public class HiCS<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlie
/**
* Sort subspaces by their actual subspace.
*/
- public static Comparator<HiCSSubspace> SORT_BY_CONTRAST_DESC = new Comparator<HiCSSubspace>() {
+ public static final Comparator<HiCSSubspace> SORT_BY_CONTRAST_DESC = new Comparator<HiCSSubspace>() {
@Override
public int compare(HiCSSubspace o1, HiCSSubspace o2) {
- if(o1.contrast == o2.contrast) {
+ if (o1.contrast == o2.contrast) {
return 0;
}
return o1.contrast < o2.contrast ? 1 : -1;
@@ -497,16 +502,15 @@ public class HiCS<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlie
/**
* Sort subspaces by their actual subspace.
*/
- public static Comparator<HiCSSubspace> SORT_BY_SUBSPACE = new Comparator<HiCSSubspace>() {
+ public static final Comparator<HiCSSubspace> SORT_BY_SUBSPACE = new Comparator<HiCSSubspace>() {
@Override
public int compare(HiCSSubspace o1, HiCSSubspace o2) {
int dim1 = o1.nextSetBit(0);
int dim2 = o2.nextSetBit(0);
- while(dim1 >= 0 && dim2 >= 0) {
- if(dim1 < dim2) {
+ while (dim1 >= 0 && dim2 >= 0) {
+ if (dim1 < dim2) {
return -1;
- }
- else if(dim1 > dim2) {
+ } else if (dim1 > dim2) {
return 1;
}
dim1 = o1.nextSetBit(dim1 + 1);
@@ -518,7 +522,7 @@ public class HiCS<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlie
}
/**
- * Parameterization class
+ * Parameterization class.
*
* @author Jan Brusis
*
@@ -526,40 +530,40 @@ public class HiCS<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlie
*
* @param <V> vector type
*/
- public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractParameterizer {
+ public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
/**
* Parameter that specifies the number of iterations in the Monte-Carlo
- * process of identifying high contrast subspaces
+ * process of identifying high contrast subspaces.
*/
- public static final OptionID M_ID = OptionID.getOrCreateOptionID("hics.m", "The number of iterations in the Monte-Carlo processing.");
+ public static final OptionID M_ID = new OptionID("hics.m", "The number of iterations in the Monte-Carlo processing.");
/**
* Parameter that determines the size of the test statistic during the
- * Monte-Carlo iteration
+ * Monte-Carlo iteration.
*/
- public static final OptionID ALPHA_ID = OptionID.getOrCreateOptionID("hics.alpha", "The discriminance value that determines the size of the test statistic .");
+ public static final OptionID ALPHA_ID = new OptionID("hics.alpha", "The discriminance value that determines the size of the test statistic .");
/**
* Parameter that specifies which outlier detection algorithm to use on the
- * resulting set of high contrast subspaces
+ * resulting set of high contrast subspaces.
*/
- public static final OptionID ALGO_ID = OptionID.getOrCreateOptionID("hics.algo", "The Algorithm that performs the actual outlier detection on the resulting set of subspace");
+ public static final OptionID ALGO_ID = new OptionID("hics.algo", "The Algorithm that performs the actual outlier detection on the resulting set of subspace");
/**
* Parameter that specifies which statistical test to use in order to
- * calculate the deviation of two given data samples
+ * calculate the deviation of two given data samples.
*/
- public static final OptionID TEST_ID = OptionID.getOrCreateOptionID("hics.test", "The statistical test that is used to calculate the deviation of two data samples");
+ public static final OptionID TEST_ID = new OptionID("hics.test", "The statistical test that is used to calculate the deviation of two data samples");
/**
- * Parameter that specifies the candidate_cutoff
+ * Parameter that specifies the candidate_cutoff.
*/
- public static final OptionID LIMIT_ID = OptionID.getOrCreateOptionID("hics.limit", "The threshold that determines how many d-dimensional subspace candidates to retain in each step of the generation");
+ public static final OptionID LIMIT_ID = new OptionID("hics.limit", "The threshold that determines how many d-dimensional subspace candidates to retain in each step of the generation");
/**
- * Parameter that specifies the random seed
+ * Parameter that specifies the random seed.
*/
- public static final OptionID SEED_ID = OptionID.getOrCreateOptionID("hics.seed", "The random seed.");
+ public static final OptionID SEED_ID = new OptionID("hics.seed", "The random seed.");
/**
* Holds the value of {@link #M_ID}.
@@ -582,52 +586,55 @@ public class HiCS<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlie
private GoodnessOfFitTest statTest;
/**
- * Holds the value of {@link #LIMIT_ID}
+ * Holds the value of {@link #LIMIT_ID}.
*/
private int cutoff = 400;
-
+
/**
- * Random seed (optional)
+ * Random generator.
*/
- private Long seed = null;
+ private RandomFactory rnd;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final IntParameter mP = new IntParameter(M_ID, new GreaterConstraint(1), 50);
- if(config.grab(mP)) {
- m = mP.getValue();
+ final IntParameter mP = new IntParameter(M_ID, 50);
+ mP.addConstraint(new GreaterConstraint(1));
+ if (config.grab(mP)) {
+ m = mP.intValue();
}
- final DoubleParameter alphaP = new DoubleParameter(ALPHA_ID, new GreaterConstraint(0), 0.1);
- if(config.grab(alphaP)) {
- alpha = alphaP.getValue();
+ final DoubleParameter alphaP = new DoubleParameter(ALPHA_ID, 0.1);
+ alphaP.addConstraint(new GreaterConstraint(0));
+ if (config.grab(alphaP)) {
+ alpha = alphaP.doubleValue();
}
final ObjectParameter<OutlierAlgorithm> algoP = new ObjectParameter<OutlierAlgorithm>(ALGO_ID, OutlierAlgorithm.class, LOF.class);
- if(config.grab(algoP)) {
+ if (config.grab(algoP)) {
outlierAlgorithm = algoP.instantiateClass(config);
}
final ObjectParameter<GoodnessOfFitTest> testP = new ObjectParameter<GoodnessOfFitTest>(TEST_ID, GoodnessOfFitTest.class, KolmogorovSmirnovTest.class);
- if(config.grab(testP)) {
+ if (config.grab(testP)) {
statTest = testP.instantiateClass(config);
}
- final IntParameter cutoffP = new IntParameter(LIMIT_ID, new GreaterConstraint(1), 100);
- if(config.grab(cutoffP)) {
- cutoff = cutoffP.getValue();
+ final IntParameter cutoffP = new IntParameter(LIMIT_ID, 100);
+ cutoffP.addConstraint(new GreaterConstraint(1));
+ if (config.grab(cutoffP)) {
+ cutoff = cutoffP.intValue();
}
- final LongParameter seedP = new LongParameter(SEED_ID, true);
- if(config.grab(seedP)) {
- seed = seedP.getValue();
+ final RandomParameter rndP = new RandomParameter(SEED_ID);
+ if (config.grab(rndP)) {
+ rnd = rndP.getValue();
}
-}
+ }
@Override
protected HiCS<V> makeInstance() {
- return new HiCS<V>(m, alpha, outlierAlgorithm, statTest, cutoff, seed);
+ return new HiCS<V>(m, alpha, outlierAlgorithm, statTest, cutoff, rnd);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/RescaleMetaOutlierAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/RescaleMetaOutlierAlgorithm.java
index a4db7e3d..387041da 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/RescaleMetaOutlierAlgorithm.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/RescaleMetaOutlierAlgorithm.java
@@ -62,7 +62,7 @@ public class RescaleMetaOutlierAlgorithm extends AbstractAlgorithm<OutlierResult
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(RescaleMetaOutlierAlgorithm.class);
+ private static final Logging LOG = Logging.getLogger(RescaleMetaOutlierAlgorithm.class);
/**
* Parameter to specify a scaling function to use.
@@ -70,7 +70,7 @@ public class RescaleMetaOutlierAlgorithm extends AbstractAlgorithm<OutlierResult
* Key: {@code -comphist.scaling}
* </p>
*/
- public static final OptionID SCALING_ID = OptionID.getOrCreateOptionID("metaoutlier.scaling", "Class to use as scaling function.");
+ public static final OptionID SCALING_ID = new OptionID("metaoutlier.scaling", "Class to use as scaling function.");
/**
* Holds the algorithm to run.
@@ -137,7 +137,7 @@ public class RescaleMetaOutlierAlgorithm extends AbstractAlgorithm<OutlierResult
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
@Override
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/SimpleOutlierEnsemble.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/SimpleOutlierEnsemble.java
new file mode 100644
index 00000000..b7791fc4
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/SimpleOutlierEnsemble.java
@@ -0,0 +1,222 @@
+package de.lmu.ifi.dbs.elki.algorithm.outlier.meta;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.Algorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
+import de.lmu.ifi.dbs.elki.data.type.CombinedTypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.result.Result;
+import de.lmu.ifi.dbs.elki.result.ResultUtil;
+import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.ensemble.EnsembleVoting;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ChainedParameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectListParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+
+/**
+ * Simple outlier ensemble method.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.composedOf EnsembleVoting
+ * @apiviz.uses OutlierResult oneway - - reads
+ * @apiviz.uses OutlierResult oneway - - «create»
+ */
+public class SimpleOutlierEnsemble extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging LOG = Logging.getLogger(SimpleOutlierEnsemble.class);
+
+ /**
+ * The algorithms to run.
+ */
+ private List<OutlierAlgorithm> algorithms;
+
+ /**
+ * The voting in use.
+ */
+ private EnsembleVoting voting;
+
+ /**
+ * Constructor.
+ *
+ * @param algorithms Algorithms to run
+ * @param voting Voting method
+ */
+ public SimpleOutlierEnsemble(List<OutlierAlgorithm> algorithms, EnsembleVoting voting) {
+ this.algorithms = algorithms;
+ this.voting = voting;
+ }
+
+ @Override
+ public OutlierResult run(Database database) throws IllegalStateException {
+ int num = algorithms.size();
+ // Run inner outlier algorithms
+ ModifiableDBIDs ids = DBIDUtil.newHashSet();
+ ArrayList<OutlierResult> results = new ArrayList<OutlierResult>(num);
+ {
+ FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Inner outlier algorithms", num, LOG) : null;
+ for (Algorithm alg : algorithms) {
+ Result res = alg.run(database);
+ List<OutlierResult> ors = ResultUtil.getOutlierResults(res);
+ for (OutlierResult or : ors) {
+ results.add(or);
+ ids.addDBIDs(or.getScores().getDBIDs());
+ }
+ if (prog != null) {
+ prog.incrementProcessed(LOG);
+ }
+ }
+ if (prog != null) {
+ prog.ensureCompleted(LOG);
+ }
+ }
+ // Combine
+ WritableDoubleDataStore sumscore = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
+ DoubleMinMax minmax = new DoubleMinMax();
+ {
+ FiniteProgress cprog = LOG.isVerbose() ? new FiniteProgress("Combining results", ids.size(), LOG) : null;
+ for (DBIDIter id = ids.iter(); id.valid(); id.advance()) {
+ double[] scores = new double[num];
+ int i = 0;
+ for (OutlierResult r : results) {
+ Double score = r.getScores().get(id);
+ if (score != null) {
+ scores[i] = score;
+ i++;
+ } else {
+ LOG.warning("DBID " + id + " was not given a score by result " + r);
+ }
+ }
+ if (i > 0) {
+ // Shrink array if necessary.
+ if (i < scores.length) {
+ scores = Arrays.copyOf(scores, i);
+ }
+ double combined = voting.combine(scores);
+ sumscore.putDouble(id, combined);
+ minmax.put(combined);
+ } else {
+ LOG.warning("DBID " + id + " was not given any score at all.");
+ }
+ if (cprog != null) {
+ cprog.incrementProcessed(LOG);
+ }
+ }
+ if (cprog != null) {
+ cprog.ensureCompleted(LOG);
+ }
+ }
+ OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
+ Relation<Double> scores = new MaterializedRelation<Double>("Simple Outlier Ensemble", "ensemble-outlier", TypeUtil.DOUBLE, sumscore, ids);
+ return new OutlierResult(meta, scores);
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ TypeInformation[] trs = new TypeInformation[algorithms.size()];
+ for (int i = 0; i < trs.length; i++) {
+ // FIXME: what if an algorithm needs more than one input data source?
+ trs[i] = algorithms.get(i).getInputTypeRestriction()[0];
+ }
+ return TypeUtil.array(new CombinedTypeInformation(trs));
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ /**
+ * Voting strategy to use in the ensemble.
+ */
+ public static final OptionID VOTING_ID = new OptionID("ensemble.voting", "Voting strategy to use in the ensemble.");
+
+ /**
+ * The algorithms to run.
+ */
+ private List<OutlierAlgorithm> algorithms;
+
+ /**
+ * The voting in use.
+ */
+ private EnsembleVoting voting;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ ObjectListParameter<OutlierAlgorithm> algP = new ObjectListParameter<OutlierAlgorithm>(OptionID.ALGORITHM, OutlierAlgorithm.class);
+ if (config.grab(algP)) {
+ ListParameterization subconfig = new ListParameterization();
+ ChainedParameterization chain = new ChainedParameterization(subconfig, config);
+ chain.errorsTo(config);
+ algorithms = algP.instantiateClasses(chain);
+ subconfig.logAndClearReportedErrors();
+ }
+ ObjectParameter<EnsembleVoting> votingP = new ObjectParameter<EnsembleVoting>(VOTING_ID, EnsembleVoting.class);
+ if (config.grab(votingP)) {
+ voting = votingP.instantiateClass(config);
+ }
+ }
+
+ @Override
+ protected SimpleOutlierEnsemble makeInstance() {
+ return new SimpleOutlierEnsemble(algorithms, voting);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/package-info.java
index d7e78281..7c5dd8b0 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/package-info.java
@@ -1,5 +1,8 @@
/**
* <p>Meta outlier detection algorithms: external scores, score rescaling.</p>
+ *
+ * @apiviz.exclude java.io.File
+ * @apiviz.exclude algorithm.AbstractAlgorithm
*/
/*
This file is part of ELKI:
@@ -23,4 +26,4 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-package de.lmu.ifi.dbs.elki.algorithm.outlier.meta; \ No newline at end of file
+package de.lmu.ifi.dbs.elki.algorithm.outlier.meta;
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/package-info.java
index ea5d3ec4..eca0d876 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/package-info.java
@@ -4,6 +4,11 @@
* @see de.lmu.ifi.dbs.elki.algorithm
*
* @apiviz.exclude database.query
+ * @apiviz.exclude java.lang.Comparable
+ * @apiviz.exclude de.lmu.ifi.dbs.elki.utilities
+ * @apiviz.exclude de.lmu.ifi.dbs.elki.algorithm.Algorithm
+ * @apiviz.exclude de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm
+ * @apiviz.exclude AggarwalYuEvoluationary.Individuum
*/
/*
This file is part of ELKI:
@@ -27,4 +32,4 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-package de.lmu.ifi.dbs.elki.algorithm.outlier; \ No newline at end of file
+package de.lmu.ifi.dbs.elki.algorithm.outlier;
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractDistanceBasedSpatialOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractDistanceBasedSpatialOutlier.java
index 1caf7582..f37ee182 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractDistanceBasedSpatialOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractDistanceBasedSpatialOutlier.java
@@ -45,7 +45,7 @@ public abstract class AbstractDistanceBasedSpatialOutlier<N, O, D extends Number
/**
* Parameter to specify the non spatial distance function to use
*/
- public static final OptionID NON_SPATIAL_DISTANCE_FUNCTION_ID = OptionID.getOrCreateOptionID("spatialoutlier.nonspatialdistance", "The distance function to use for non spatial attributes");
+ public static final OptionID NON_SPATIAL_DISTANCE_FUNCTION_ID = new OptionID("spatialoutlier.nonspatialdistance", "The distance function to use for non spatial attributes");
/**
* The distance function to use
@@ -84,7 +84,7 @@ public abstract class AbstractDistanceBasedSpatialOutlier<N, O, D extends Number
* @param <O> Non-spatial object type
* @param <D> Distance value type
*/
- public static abstract class Parameterizer<N, O, D extends NumberDistance<D, ?>> extends AbstractNeighborhoodOutlier.Parameterizer<N> {
+ public abstract static class Parameterizer<N, O, D extends NumberDistance<D, ?>> extends AbstractNeighborhoodOutlier.Parameterizer<N> {
/**
* The distance function to use on the non-spatial attributes.
*/
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractNeighborhoodOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractNeighborhoodOutlier.java
index f0c05e1e..d3770504 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractNeighborhoodOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractNeighborhoodOutlier.java
@@ -44,7 +44,7 @@ public abstract class AbstractNeighborhoodOutlier<O> extends AbstractAlgorithm<O
/**
* Parameter to specify the neighborhood predicate to use.
*/
- public static final OptionID NEIGHBORHOOD_ID = OptionID.getOrCreateOptionID("neighborhood", "The neighborhood predicate to use in comparison step.");
+ public static final OptionID NEIGHBORHOOD_ID = new OptionID("neighborhood", "The neighborhood predicate to use in comparison step.");
/**
* Our predicate to obtain the neighbors
@@ -79,7 +79,7 @@ public abstract class AbstractNeighborhoodOutlier<O> extends AbstractAlgorithm<O
*
* @param <O> Object type
*/
- public static abstract class Parameterizer<O> extends AbstractParameterizer {
+ public abstract static class Parameterizer<O> extends AbstractParameterizer {
/**
* The predicate to obtain the neighbors.
*/
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuGLSBackwardSearchAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuGLSBackwardSearchAlgorithm.java
index 7f3bac29..cd5670f7 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuGLSBackwardSearchAlgorithm.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuGLSBackwardSearchAlgorithm.java
@@ -37,13 +37,13 @@ import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.ProxyView;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
@@ -52,7 +52,6 @@ import de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution;
import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
@@ -85,11 +84,11 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
*/
@Title("GLS-Backward Search")
@Reference(authors = "F. Chen and C.-T. Lu and A. P. Boedihardjo", title = "GLS-SOD: A Generalized Local Statistical Approach for Spatial Outlier Detection", booktitle = "Proc. 16th ACM SIGKDD international conference on Knowledge discovery and data mining", url = "http://dx.doi.org/10.1145/1835804.1835939")
-public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?, ?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<V, D, OutlierResult> implements OutlierAlgorithm {
+public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<V, D, OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(CTLuGLSBackwardSearchAlgorithm.class);
+ private static final Logging LOG = Logging.getLogger(CTLuGLSBackwardSearchAlgorithm.class);
/**
* Parameter Alpha - significance niveau
@@ -121,7 +120,7 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?, ?>, D exte
* @param relationy Attribute relation
* @return Algorithm result
*/
- public OutlierResult run(Relation<V> relationx, Relation<? extends NumberVector<?, ?>> relationy) {
+ public OutlierResult run(Relation<V> relationx, Relation<? extends NumberVector<?>> relationy) {
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relationx.getDBIDs(), DataStoreFactory.HINT_STATIC);
DoubleMinMax mm = new DoubleMinMax(0.0, 0.0);
@@ -130,7 +129,7 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?, ?>, D exte
ModifiableDBIDs idview = DBIDUtil.newHashSet(relationx.getDBIDs());
ProxyView<V> proxy = new ProxyView<V>(relationx.getDatabase(), idview, relationx);
- double phialpha = NormalDistribution.standardNormalQuantile(1.0 - alpha / 2);
+ double phialpha = NormalDistribution.standardNormalQuantile(1.0 - alpha *.5);
// Detect outliers while significant.
while(true) {
Pair<DBID, Double> candidate = singleIteration(proxy, relationy);
@@ -138,15 +137,15 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?, ?>, D exte
break;
}
scores.putDouble(candidate.first, candidate.second);
- if (!Double.isNaN(candidate.second)) {
+ if(!Double.isNaN(candidate.second)) {
mm.put(candidate.second);
}
idview.remove(candidate.first);
}
// Remaining objects are inliers
- for (DBIDIter iter = idview.iter(); iter.valid(); iter.advance()) {
- scores.putDouble(iter.getDBID(), 0.0);
+ for(DBIDIter iter = idview.iter(); iter.valid(); iter.advance()) {
+ scores.putDouble(iter, 0.0);
}
}
@@ -162,9 +161,9 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?, ?>, D exte
* @param relationy Attribute relation
* @return Top outlier and associated score
*/
- private Pair<DBID, Double> singleIteration(Relation<V> relationx, Relation<? extends NumberVector<?, ?>> relationy) {
- final int dim = DatabaseUtil.dimensionality(relationx);
- final int dimy = DatabaseUtil.dimensionality(relationy);
+ private Pair<DBID, Double> singleIteration(Relation<V> relationx, Relation<? extends NumberVector<?>> relationy) {
+ final int dim = RelationUtil.dimensionality(relationx);
+ final int dimy = RelationUtil.dimensionality(relationy);
assert (dim == 2);
KNNQuery<V, D> knnQuery = QueryUtil.getKNNQuery(relationx, getDistanceFunction(), k + 1);
@@ -177,47 +176,51 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?, ?>, D exte
Matrix X = new Matrix(ids.size(), 6);
Matrix F = new Matrix(ids.size(), ids.size());
Matrix Y = new Matrix(ids.size(), dimy);
- for(int i = 0; i < ids.size(); i++) {
- DBID id = ids.get(i);
-
- // Fill the data matrix
- {
- V vec = relationx.get(id);
- double la = vec.doubleValue(1);
- double lo = vec.doubleValue(2);
- X.set(i, 0, 1.0);
- X.set(i, 1, la);
- X.set(i, 2, lo);
- X.set(i, 3, la * lo);
- X.set(i, 4, la * la);
- X.set(i, 5, lo * lo);
- }
- {
- for(int d = 0; d < dimy; d++) {
- double idy = relationy.get(id).doubleValue(d + 1);
- Y.set(i, d, idy);
+ {
+ int i = 0;
+ for(DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) {
+ // Fill the data matrix
+ {
+ V vec = relationx.get(id);
+ double la = vec.doubleValue(0);
+ double lo = vec.doubleValue(1);
+ X.set(i, 0, 1.0);
+ X.set(i, 1, la);
+ X.set(i, 2, lo);
+ X.set(i, 3, la * lo);
+ X.set(i, 4, la * la);
+ X.set(i, 5, lo * lo);
}
- }
- // Fill the neighborhood matrix F:
- {
- KNNResult<D> neighbors = knnQuery.getKNNForDBID(id, k + 1);
- ModifiableDBIDs neighborhood = DBIDUtil.newArray(neighbors.size());
- for(DistanceResultPair<D> dpair : neighbors) {
- if(id.sameDBID(dpair.getDBID())) {
- continue;
+ {
+ final NumberVector<?> vecy = relationy.get(id);
+ for(int d = 0; d < dimy; d++) {
+ double idy = vecy.doubleValue(d);
+ Y.set(i, d, idy);
}
- neighborhood.add(dpair.getDBID());
}
- // Weight object itself positively.
- F.set(i, i, 1.0);
- final int nweight = -1 / neighborhood.size();
- // We need to find the index positions of the neighbors, unfortunately.
- for (DBIDIter iter = neighborhood.iter(); iter.valid(); iter.advance()) {
- int pos = ids.binarySearch(iter.getDBID());
- assert (pos >= 0);
- F.set(pos, i, nweight);
+
+ // Fill the neighborhood matrix F:
+ {
+ KNNResult<D> neighbors = knnQuery.getKNNForDBID(id, k + 1);
+ ModifiableDBIDs neighborhood = DBIDUtil.newArray(neighbors.size());
+ for(DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ if(DBIDUtil.equal(id, neighbor)) {
+ continue;
+ }
+ neighborhood.add(neighbor);
+ }
+ // Weight object itself positively.
+ F.set(i, i, 1.0);
+ final int nweight = -1 / neighborhood.size();
+ // We need to find the index positions of the neighbors,
+ // unfortunately.
+ for(DBIDIter iter = neighborhood.iter(); iter.valid(); iter.advance()) {
+ int pos = ids.binarySearch(iter);
+ assert (pos >= 0);
+ F.set(pos, i, nweight);
+ }
}
}
}
@@ -236,13 +239,13 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?, ?>, D exte
DBID worstid = null;
double worstscore = Double.NEGATIVE_INFINITY;
- for(int i = 0; i < ids.size(); i++) {
- DBID id = ids.get(i);
+ int i = 0;
+ for(DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) {
double err = E.getRow(i).euclideanLength();
// double err = Math.abs(E.get(i, 0));
if(err > worstscore) {
worstscore = err;
- worstid = id;
+ worstid = DBIDUtil.deref(id);
}
}
@@ -256,7 +259,7 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?, ?>, D exte
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -269,16 +272,16 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?, ?>, D exte
* @param <V> Input vector type
* @param <D> Distance type
*/
- public static class Parameterizer<V extends NumberVector<?, ?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<V, D> {
+ public static class Parameterizer<V extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<V, D> {
/**
* Holds the alpha value - significance niveau
*/
- public static final OptionID ALPHA_ID = OptionID.getOrCreateOptionID("glsbs.alpha", "Significance niveau");
+ public static final OptionID ALPHA_ID = new OptionID("glsbs.alpha", "Significance niveau");
/**
* Parameter to specify the k nearest neighbors
*/
- public static final OptionID K_ID = OptionID.getOrCreateOptionID("glsbs.k", "k nearest neighbors to use");
+ public static final OptionID K_ID = new OptionID("glsbs.k", "k nearest neighbors to use");
/**
* Parameter Alpha - significance niveau
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMeanMultipleAttributes.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMeanMultipleAttributes.java
index a0c09057..2caee128 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMeanMultipleAttributes.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMeanMultipleAttributes.java
@@ -31,11 +31,11 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid;
@@ -45,7 +45,6 @@ import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
/**
@@ -72,11 +71,11 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
* @param <O> Attribute Vector
*/
@Reference(authors = "Chang-Tien Lu and Dechang Chen and Yufeng Kou", title = "Detecting Spatial Outliers with Multiple Attributes", booktitle = "Proc. 15th IEEE International Conference on Tools with Artificial Intelligence, 2003", url = "http://dx.doi.org/10.1109/TAI.2003.1250179")
-public class CTLuMeanMultipleAttributes<N, O extends NumberVector<?, ?>> extends AbstractNeighborhoodOutlier<N> {
+public class CTLuMeanMultipleAttributes<N, O extends NumberVector<?>> extends AbstractNeighborhoodOutlier<N> {
/**
* logger
*/
- public static final Logging logger = Logging.getLogger(CTLuMeanMultipleAttributes.class);
+ private static final Logging LOG = Logging.getLogger(CTLuMeanMultipleAttributes.class);
/**
* Constructor
@@ -89,28 +88,27 @@ public class CTLuMeanMultipleAttributes<N, O extends NumberVector<?, ?>> extends
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
public OutlierResult run(Relation<N> spatial, Relation<O> attributes) {
- if(logger.isDebugging()) {
- logger.debug("Dimensionality: " + DatabaseUtil.dimensionality(attributes));
+ if(LOG.isDebugging()) {
+ LOG.debug("Dimensionality: " + RelationUtil.dimensionality(attributes));
}
final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(spatial);
- CovarianceMatrix covmaker = new CovarianceMatrix(DatabaseUtil.dimensionality(attributes));
+ CovarianceMatrix covmaker = new CovarianceMatrix(RelationUtil.dimensionality(attributes));
WritableDataStore<Vector> deltas = DataStoreUtil.makeStorage(attributes.getDBIDs(), DataStoreFactory.HINT_TEMP, Vector.class);
for(DBIDIter iditer = attributes.iterDBIDs(); iditer.valid(); iditer.advance()) {
- DBID id = iditer.getDBID();
- final O obj = attributes.get(id);
- final DBIDs neighbors = npred.getNeighborDBIDs(id);
+ final O obj = attributes.get(iditer);
+ final DBIDs neighbors = npred.getNeighborDBIDs(iditer);
// TODO: remove object itself from neighbors?
// Mean vector "g"
Vector mean = Centroid.make(attributes, neighbors);
// Delta vector "h"
- Vector delta = obj.getColumnVector().minus(mean);
- deltas.put(id, delta);
+ Vector delta = obj.getColumnVector().minusEquals(mean);
+ deltas.put(iditer, delta);
covmaker.put(delta);
}
// Finalize covariance matrix:
@@ -120,11 +118,10 @@ public class CTLuMeanMultipleAttributes<N, O extends NumberVector<?, ?>> extends
DoubleMinMax minmax = new DoubleMinMax();
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(attributes.getDBIDs(), DataStoreFactory.HINT_STATIC);
for(DBIDIter iditer = attributes.iterDBIDs(); iditer.valid(); iditer.advance()) {
- DBID id = iditer.getDBID();
- Vector temp = deltas.get(id).minus(mean);
+ Vector temp = deltas.get(iditer).minus(mean);
final double score = temp.transposeTimesTimes(cmati, temp);
minmax.put(score);
- scores.putDouble(id, score);
+ scores.putDouble(iditer, score);
}
Relation<Double> scoreResult = new MaterializedRelation<Double>("mean multiple attributes spatial outlier", "mean-multipleattributes-outlier", TypeUtil.DOUBLE, scores, attributes.getDBIDs());
@@ -149,7 +146,7 @@ public class CTLuMeanMultipleAttributes<N, O extends NumberVector<?, ?>> extends
* @param <N> Neighborhood type
* @param <O> Attribute object type
*/
- public static class Parameterizer<N, O extends NumberVector<?, ?>> extends AbstractNeighborhoodOutlier.Parameterizer<N> {
+ public static class Parameterizer<N, O extends NumberVector<?>> extends AbstractNeighborhoodOutlier.Parameterizer<N> {
@Override
protected CTLuMeanMultipleAttributes<N, O> makeInstance() {
return new CTLuMeanMultipleAttributes<N, O>(npredf);
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianAlgorithm.java
index 20ab9a00..7755a459 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianAlgorithm.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianAlgorithm.java
@@ -1,26 +1,27 @@
package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial;
-/*
-This file is part of ELKI:
-Environment for Developing KDD-Applications Supported by Index-Structures
-
-Copyright (C) 2012
-Ludwig-Maximilians-Universität München
-Lehr- und Forschungseinheit für Datenbanksysteme
-ELKI Development Team
-
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU Affero General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU Affero General Public License for more details.
-
-You should have received a copy of the GNU Affero General Public License
-along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
import de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate;
import de.lmu.ifi.dbs.elki.data.NumberVector;
@@ -30,8 +31,8 @@ import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
@@ -60,22 +61,22 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
* The Difference e = non-spatial-Attribute-Value - Median (Neighborhood) is
* computed.<br>
* The Spatial Objects with the highest standardized e value are Spatial
- * Outliers. </p>
+ * Outliers.
*
* @author Ahmed Hettab
*
* @param <N> Neighborhood type
*/
@Title("Median Algorithm for Spatial Outlier Detection")
-@Reference(authors = "C.-T. Lu and D. Chen and Y. Kou", title = "Algorithms for Spatial Outlier Detection", booktitle = "Proc. 3rd IEEE International Conference on Data Mining", url="http://dx.doi.org/10.1109/ICDM.2003.1250986")
+@Reference(authors = "C.-T. Lu and D. Chen and Y. Kou", title = "Algorithms for Spatial Outlier Detection", booktitle = "Proc. 3rd IEEE International Conference on Data Mining", url = "http://dx.doi.org/10.1109/ICDM.2003.1250986")
public class CTLuMedianAlgorithm<N> extends AbstractNeighborhoodOutlier<N> {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(CTLuMedianAlgorithm.class);
+ private static final Logging LOG = Logging.getLogger(CTLuMedianAlgorithm.class);
/**
- * Constructor
+ * Constructor.
*
* @param npredf Neighborhood predicate
*/
@@ -84,42 +85,40 @@ public class CTLuMedianAlgorithm<N> extends AbstractNeighborhoodOutlier<N> {
}
/**
- * Main method
+ * Main method.
*
* @param nrel Neighborhood relation
* @param relation Data relation (1d!)
* @return Outlier detection result
*/
- public OutlierResult run(Relation<N> nrel, Relation<? extends NumberVector<?, ?>> relation) {
+ public OutlierResult run(Relation<N> nrel, Relation<? extends NumberVector<?>> relation) {
final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(nrel);
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
MeanVariance mv = new MeanVariance();
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- DBID id = iditer.getDBID();
- DBIDs neighbors = npred.getNeighborDBIDs(id);
+ for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ DBIDs neighbors = npred.getNeighborDBIDs(iditer);
final double median;
{
double[] fi = new double[neighbors.size()];
// calculate and store Median of neighborhood
int c = 0;
- for(DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
- if(id.sameDBID(iter)) {
+ for (DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
+ if (DBIDUtil.equal(iditer, iter)) {
continue;
}
- fi[c] = relation.get(iter).doubleValue(1);
+ fi[c] = relation.get(iter).doubleValue(0);
c++;
}
- if(c > 0) {
+ if (c > 0) {
median = QuickSelect.median(fi, 0, c);
- }
- else {
- median = relation.get(id).doubleValue(1);
+ } else {
+ median = relation.get(iditer).doubleValue(0);
}
}
- double h = relation.get(id).doubleValue(1) - median;
- scores.putDouble(id, h);
+ double h = relation.get(iditer).doubleValue(0) - median;
+ scores.putDouble(iditer, h);
mv.put(h);
}
@@ -127,11 +126,10 @@ public class CTLuMedianAlgorithm<N> extends AbstractNeighborhoodOutlier<N> {
final double mean = mv.getMean();
final double stddev = mv.getNaiveStddev();
DoubleMinMax minmax = new DoubleMinMax();
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- DBID id = iditer.getDBID();
- double score = Math.abs((scores.doubleValue(id) - mean) / stddev);
+ for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ double score = Math.abs((scores.doubleValue(iditer) - mean) / stddev);
minmax.put(score);
- scores.putDouble(id, score);
+ scores.putDouble(iditer, score);
}
Relation<Double> scoreResult = new MaterializedRelation<Double>("MO", "Median-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs());
@@ -143,16 +141,16 @@ public class CTLuMedianAlgorithm<N> extends AbstractNeighborhoodOutlier<N> {
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
@Override
public TypeInformation[] getInputTypeRestriction() {
- return TypeUtil.array(getNeighborSetPredicateFactory().getInputTypeRestriction(), VectorFieldTypeInformation.get(NumberVector.class, 1));
+ return TypeUtil.array(getNeighborSetPredicateFactory().getInputTypeRestriction(), new VectorFieldTypeInformation<NumberVector<?>>(NumberVector.class, 1));
}
/**
- * Parameterization class
+ * Parameterization class.
*
* @author Ahmed Hettab
*
@@ -166,4 +164,4 @@ public class CTLuMedianAlgorithm<N> extends AbstractNeighborhoodOutlier<N> {
return new CTLuMedianAlgorithm<N>(npredf);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianMultipleAttributes.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianMultipleAttributes.java
index c8bcba74..0d515ac7 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianMultipleAttributes.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianMultipleAttributes.java
@@ -31,11 +31,11 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix;
@@ -44,7 +44,6 @@ import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.datastructures.QuickSelect;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
@@ -73,11 +72,11 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
* @param <O> Non Spatial Vector
*/
@Reference(authors = "Chang-Tien Lu and Dechang Chen and Yufeng Kou", title = "Detecting Spatial Outliers with Multiple Attributes", booktitle = "Proc. 15th IEEE International Conference on Tools with Artificial Intelligence, 2003", url = "http://dx.doi.org/10.1109/TAI.2003.1250179")
-public class CTLuMedianMultipleAttributes<N, O extends NumberVector<?, ?>> extends AbstractNeighborhoodOutlier<N> {
+public class CTLuMedianMultipleAttributes<N, O extends NumberVector<?>> extends AbstractNeighborhoodOutlier<N> {
/**
* logger
*/
- public static final Logging logger = Logging.getLogger(CTLuMedianMultipleAttributes.class);
+ private static final Logging LOG = Logging.getLogger(CTLuMedianMultipleAttributes.class);
/**
* Constructor
@@ -90,7 +89,7 @@ public class CTLuMedianMultipleAttributes<N, O extends NumberVector<?, ?>> exten
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -101,18 +100,17 @@ public class CTLuMedianMultipleAttributes<N, O extends NumberVector<?, ?>> exten
* @return Outlier detection result
*/
public OutlierResult run(Relation<N> spatial, Relation<O> attributes) {
- final int dim = DatabaseUtil.dimensionality(attributes);
- if(logger.isDebugging()) {
- logger.debug("Dimensionality: " + dim);
+ final int dim = RelationUtil.dimensionality(attributes);
+ if(LOG.isDebugging()) {
+ LOG.debug("Dimensionality: " + dim);
}
final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(spatial);
CovarianceMatrix covmaker = new CovarianceMatrix(dim);
WritableDataStore<Vector> deltas = DataStoreUtil.makeStorage(attributes.getDBIDs(), DataStoreFactory.HINT_TEMP, Vector.class);
for(DBIDIter iditer = attributes.iterDBIDs(); iditer.valid(); iditer.advance()) {
- DBID id = iditer.getDBID();
- final O obj = attributes.get(id);
- final DBIDs neighbors = npred.getNeighborDBIDs(id);
+ final O obj = attributes.get(iditer);
+ final DBIDs neighbors = npred.getNeighborDBIDs(iditer);
// Compute the median vector
final Vector median;
{
@@ -123,7 +121,7 @@ public class CTLuMedianMultipleAttributes<N, O extends NumberVector<?, ?>> exten
// TODO: skip object itself within neighbors?
O nobj = attributes.get(iter);
for(int d = 0; d < dim; d++) {
- data[d][i] = nobj.doubleValue(d + 1);
+ data[d][i] = nobj.doubleValue(d);
}
i++;
}
@@ -135,8 +133,8 @@ public class CTLuMedianMultipleAttributes<N, O extends NumberVector<?, ?>> exten
}
// Delta vector "h"
- Vector delta = obj.getColumnVector().minus(median);
- deltas.put(id, delta);
+ Vector delta = obj.getColumnVector().minusEquals(median);
+ deltas.put(iditer, delta);
covmaker.put(delta);
}
// Finalize covariance matrix:
@@ -146,11 +144,10 @@ public class CTLuMedianMultipleAttributes<N, O extends NumberVector<?, ?>> exten
DoubleMinMax minmax = new DoubleMinMax();
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(attributes.getDBIDs(), DataStoreFactory.HINT_STATIC);
for(DBIDIter iditer = attributes.iterDBIDs(); iditer.valid(); iditer.advance()) {
- DBID id = iditer.getDBID();
- Vector temp = deltas.get(id).minus(mean);
+ Vector temp = deltas.get(iditer).minus(mean);
final double score = temp.transposeTimesTimes(cmati, temp);
minmax.put(score);
- scores.putDouble(id, score);
+ scores.putDouble(iditer, score);
}
Relation<Double> scoreResult = new MaterializedRelation<Double>("Median multiple attributes outlier", "median-outlier", TypeUtil.DOUBLE, scores, attributes.getDBIDs());
@@ -175,7 +172,7 @@ public class CTLuMedianMultipleAttributes<N, O extends NumberVector<?, ?>> exten
* @param <N> Neighborhood type
* @param <O> Attributes vector type
*/
- public static class Parameterizer<N, O extends NumberVector<?, ?>> extends AbstractNeighborhoodOutlier.Parameterizer<N> {
+ public static class Parameterizer<N, O extends NumberVector<?>> extends AbstractNeighborhoodOutlier.Parameterizer<N> {
@Override
protected CTLuMedianMultipleAttributes<N, O> makeInstance() {
return new CTLuMedianMultipleAttributes<N, O>(npredf);
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMoranScatterplotOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMoranScatterplotOutlier.java
index 7b88ae66..3b876bba 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMoranScatterplotOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMoranScatterplotOutlier.java
@@ -32,8 +32,8 @@ import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.logging.Logging;
@@ -76,10 +76,10 @@ public class CTLuMoranScatterplotOutlier<N> extends AbstractNeighborhoodOutlier<
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(CTLuMoranScatterplotOutlier.class);
+ private static final Logging LOG = Logging.getLogger(CTLuMoranScatterplotOutlier.class);
/**
- * Constructor
+ * Constructor.
*
* @param npredf Neighborhood
*/
@@ -88,20 +88,19 @@ public class CTLuMoranScatterplotOutlier<N> extends AbstractNeighborhoodOutlier<
}
/**
- * Main method
+ * Main method.
*
* @param nrel Neighborhood relation
* @param relation Data relation (1d!)
* @return Outlier detection result
*/
- public OutlierResult run(Relation<N> nrel, Relation<? extends NumberVector<?, ?>> relation) {
+ public OutlierResult run(Relation<N> nrel, Relation<? extends NumberVector<?>> relation) {
final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(nrel);
// Compute the global mean and variance
MeanVariance globalmv = new MeanVariance();
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- DBID id = iditer.getDBID();
- globalmv.put(relation.get(id).doubleValue(1));
+ globalmv.put(relation.get(iditer).doubleValue(0));
}
DoubleMinMax minmax = new DoubleMinMax();
@@ -110,17 +109,15 @@ public class CTLuMoranScatterplotOutlier<N> extends AbstractNeighborhoodOutlier<
// calculate normalized attribute values
// calculate neighborhood average of normalized attribute values.
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- DBID id = iditer.getDBID();
// Compute global z score
- final double globalZ = (relation.get(id).doubleValue(1) - globalmv.getMean()) / globalmv.getNaiveStddev();
+ final double globalZ = (relation.get(iditer).doubleValue(0) - globalmv.getMean()) / globalmv.getNaiveStddev();
// Compute local average z score
Mean localm = new Mean();
- for(DBIDIter iter = npred.getNeighborDBIDs(id).iter(); iter.valid(); iter.advance()) {
- DBID n = iter.getDBID();
- if(id.equals(n)) {
+ for(DBIDIter iter = npred.getNeighborDBIDs(iditer).iter(); iter.valid(); iter.advance()) {
+ if(DBIDUtil.equal(iditer, iter)) {
continue;
}
- localm.put((relation.get(n).doubleValue(1) - globalmv.getMean()) / globalmv.getNaiveStddev());
+ localm.put((relation.get(iter).doubleValue(0) - globalmv.getMean()) / globalmv.getNaiveStddev());
}
// if neighors.size == 0
final double localZ;
@@ -136,7 +133,7 @@ public class CTLuMoranScatterplotOutlier<N> extends AbstractNeighborhoodOutlier<
// Note: in the original moran scatterplot, any object with a score < 0 would be an outlier.
final double score = Math.max(-globalZ * localZ, 0);
minmax.put(score);
- scores.putDouble(id, score);
+ scores.putDouble(iditer, score);
}
Relation<Double> scoreResult = new MaterializedRelation<Double>("MoranOutlier", "Moran Scatterplot Outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs());
@@ -148,16 +145,16 @@ public class CTLuMoranScatterplotOutlier<N> extends AbstractNeighborhoodOutlier<
@Override
public TypeInformation[] getInputTypeRestriction() {
- return TypeUtil.array(getNeighborSetPredicateFactory().getInputTypeRestriction(), VectorFieldTypeInformation.get(NumberVector.class, 1));
+ return TypeUtil.array(getNeighborSetPredicateFactory().getInputTypeRestriction(), new VectorFieldTypeInformation<NumberVector<?>>(NumberVector.class, 1));
}
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
- * Parameterization class
+ * Parameterization class.
*
* @author Ahmed Hettab
*
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuRandomWalkEC.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuRandomWalkEC.java
index 852c4be4..ec92afd7 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuRandomWalkEC.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuRandomWalkEC.java
@@ -1,26 +1,27 @@
package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial;
-/*
-This file is part of ELKI:
-Environment for Developing KDD-Applications Supported by Index-Structures
-
-Copyright (C) 2012
-Ludwig-Maximilians-Universität München
-Lehr- und Forschungseinheit für Datenbanksysteme
-ELKI Development Team
-
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU Affero General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU Affero General Public License for more details.
-
-You should have received a copy of the GNU Affero General Public License
-along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
@@ -33,7 +34,6 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
@@ -42,6 +42,8 @@ import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNHeap;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNUtil;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
@@ -51,7 +53,6 @@ import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
-import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.KNNHeap;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
@@ -82,30 +83,30 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
*/
@Title("Random Walk on Exhaustive Combination")
@Description("Spatial Outlier Detection using Random Walk on Exhaustive Combination")
-@Reference(authors = "X. Liu and C.-T. Lu and F. Chen", title = "Spatial outlier detection: random walk based approaches", booktitle = "Proc. 18th SIGSPATIAL International Conference on Advances in Geographic Information Systems, 2010", url="http://dx.doi.org/10.1145/1869790.1869841")
+@Reference(authors = "X. Liu and C.-T. Lu and F. Chen", title = "Spatial outlier detection: random walk based approaches", booktitle = "Proc. 18th SIGSPATIAL International Conference on Advances in Geographic Information Systems, 2010", url = "http://dx.doi.org/10.1145/1869790.1869841")
public class CTLuRandomWalkEC<N, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<N, D, OutlierResult> implements OutlierAlgorithm {
/**
- * Logger
+ * Logger.
*/
- private static final Logging logger = Logging.getLogger(CTLuRandomWalkEC.class);
+ private static final Logging LOG = Logging.getLogger(CTLuRandomWalkEC.class);
/**
- * Parameter alpha: Attribute difference exponent
+ * Parameter alpha: Attribute difference exponent.
*/
private double alpha;
/**
- * Parameter c: damping factor
+ * Parameter c: damping factor.
*/
private double c;
/**
- * Parameter k
+ * Parameter k.
*/
private int k;
/**
- * Constructor
+ * Constructor.
*
* @param distanceFunction Distance function
* @param alpha Alpha parameter
@@ -120,13 +121,13 @@ public class CTLuRandomWalkEC<N, D extends NumberDistance<D, ?>> extends Abstrac
}
/**
- * Run the algorithm
+ * Run the algorithm.
*
* @param spatial Spatial neighborhood relation
* @param relation Attribute value relation
* @return Outlier result
*/
- public OutlierResult run(Relation<N> spatial, Relation<? extends NumberVector<?, ?>> relation) {
+ public OutlierResult run(Relation<N> spatial, Relation<? extends NumberVector<?>> relation) {
DistanceQuery<N, D> distFunc = getDistanceFunction().instantiate(spatial);
WritableDataStore<Vector> similarityVectors = DataStoreUtil.makeStorage(spatial.getDBIDs(), DataStoreFactory.HINT_TEMP, Vector.class);
WritableDataStore<DBIDs> neighbors = DataStoreUtil.makeStorage(spatial.getDBIDs(), DataStoreFactory.HINT_TEMP, DBIDs.class);
@@ -136,39 +137,41 @@ public class CTLuRandomWalkEC<N, D extends NumberDistance<D, ?>> extends Abstrac
// construct the relation Matrix of the ec-graph
Matrix E = new Matrix(ids.size(), ids.size());
- KNNHeap<D> heap = new KNNHeap<D>(k);
- for(int i = 0; i < ids.size(); i++) {
- final DBID id = ids.get(i);
- final double val = relation.get(id).doubleValue(1);
- assert (heap.size() == 0);
- for(int j = 0; j < ids.size(); j++) {
- if(i == j) {
- continue;
- }
- final DBID n = ids.get(j);
- final double e;
- final D distance = distFunc.distance(id, n);
- heap.add(distance, n);
- double dist = distance.doubleValue();
- if(dist == 0) {
- logger.warning("Zero distances are not supported - skipping: " + id + " " + n);
- e = 0;
+ KNNHeap<D> heap = KNNUtil.newHeap(distFunc.getDistanceFactory(), k);
+ {
+ int i = 0;
+ for(DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) {
+ final double val = relation.get(id).doubleValue(0);
+ assert (heap.size() == 0);
+ int j = 0;
+ for(DBIDIter n = ids.iter(); n.valid(); n.advance(), j++) {
+ if(i == j) {
+ continue;
+ }
+ final double e;
+ final D distance = distFunc.distance(id, n);
+ heap.add(distance, n);
+ double dist = distance.doubleValue();
+ if(dist == 0) {
+ LOG.warning("Zero distances are not supported - skipping: " + DBIDUtil.toString(id) + " " + DBIDUtil.toString(n));
+ e = 0;
+ }
+ else {
+ double diff = Math.abs(val - relation.get(n).doubleValue(0));
+ double exp = Math.exp(Math.pow(diff, alpha));
+ // Implementation note: not inverting exp worked a lot better.
+ // Therefore we diverge from the article here.
+ e = exp / dist;
+ }
+ E.set(j, i, e);
}
- else {
- double diff = Math.abs(val - relation.get(n).doubleValue(1));
- double exp = Math.exp(Math.pow(diff, alpha));
- // Implementation note: not inverting exp worked a lot better.
- // Therefore we diverge from the article here.
- e = exp / dist;
+ // Convert kNN Heap into DBID array
+ ModifiableDBIDs nids = DBIDUtil.newArray(heap.size());
+ while(heap.size() > 0) {
+ nids.add(heap.poll());
}
- E.set(j, i, e);
- }
- // Convert kNN Heap into DBID array
- ModifiableDBIDs nids = DBIDUtil.newArray(heap.size());
- while(!heap.isEmpty()) {
- nids.add(heap.poll().getDBID());
+ neighbors.put(id, nids);
}
- neighbors.put(id, nids);
}
// normalize the adjacent Matrix
// Sum based normalization - don't use E.normalizeColumns()
@@ -195,26 +198,26 @@ public class CTLuRandomWalkEC<N, D extends NumberDistance<D, ?>> extends Abstrac
E = E.inverse().timesEquals(1 - c);
// Split the matrix into columns
- for(int i = 0; i < ids.size(); i++) {
- DBID id = ids.get(i);
- // Note: matrix times ith unit vector = ith column
- Vector sim = E.getCol(i);
- similarityVectors.put(id, sim);
+ {
+ int i = 0;
+ for(DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) {
+ // Note: matrix times ith unit vector = ith column
+ Vector sim = E.getCol(i);
+ similarityVectors.put(id, sim);
+ }
}
E = null;
// compute the relevance scores between specified Object and its neighbors
DoubleMinMax minmax = new DoubleMinMax();
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(spatial.getDBIDs(), DataStoreFactory.HINT_STATIC);
- for(int i = 0; i < ids.size(); i++) {
- DBID id = ids.get(i);
+ for(DBIDIter id = ids.iter(); id.valid(); id.advance()) {
double gmean = 1.0;
int cnt = 0;
for(DBIDIter iter = neighbors.get(id).iter(); iter.valid(); iter.advance()) {
- DBID n = iter.getDBID();
- if(id.equals(n)) {
+ if(DBIDUtil.equal(id, iter)) {
continue;
}
- double sim = MathUtil.angle(similarityVectors.get(id), similarityVectors.get(n));
+ double sim = MathUtil.angle(similarityVectors.get(id), similarityVectors.get(iter));
gmean *= sim;
cnt++;
}
@@ -230,12 +233,12 @@ public class CTLuRandomWalkEC<N, D extends NumberDistance<D, ?>> extends Abstrac
@Override
public TypeInformation[] getInputTypeRestriction() {
- return TypeUtil.array(getDistanceFunction().getInputTypeRestriction(), VectorFieldTypeInformation.get(NumberVector.class, 1));
+ return TypeUtil.array(getDistanceFunction().getInputTypeRestriction(), new VectorFieldTypeInformation<NumberVector<?>>(NumberVector.class, 1));
}
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -250,32 +253,32 @@ public class CTLuRandomWalkEC<N, D extends NumberDistance<D, ?>> extends Abstrac
*/
public static class Parameterizer<N, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<N, D> {
/**
- * Parameter to specify the number of neighbors
+ * Parameter to specify the number of neighbors.
*/
- public static final OptionID K_ID = OptionID.getOrCreateOptionID("randomwalkec.k", "Number of nearest neighbors to use.");
+ public static final OptionID K_ID = new OptionID("randomwalkec.k", "Number of nearest neighbors to use.");
/**
- * Parameter to specify alpha
+ * Parameter to specify alpha.
*/
- public static final OptionID ALPHA_ID = OptionID.getOrCreateOptionID("randomwalkec.alpha", "Scaling exponent for value differences.");
+ public static final OptionID ALPHA_ID = new OptionID("randomwalkec.alpha", "Scaling exponent for value differences.");
/**
- * Parameter to specify the c
+ * Parameter to specify the c.
*/
- public static final OptionID C_ID = OptionID.getOrCreateOptionID("randomwalkec.c", "The damping parameter c.");
+ public static final OptionID C_ID = new OptionID("randomwalkec.c", "The damping parameter c.");
/**
- * Parameter alpha: scaling
+ * Parameter alpha: scaling.
*/
double alpha = 0.5;
/**
- * Parameter c: damping coefficient
+ * Parameter c: damping coefficient.
*/
double c = 0.9;
/**
- * Parameter for kNN
+ * Parameter for kNN.
*/
int k;
@@ -288,19 +291,20 @@ public class CTLuRandomWalkEC<N, D extends NumberDistance<D, ?>> extends Abstrac
}
/**
- * Get the kNN parameter
+ * Get the kNN parameter.
*
* @param config Parameterization
*/
protected void configK(Parameterization config) {
- final IntParameter param = new IntParameter(K_ID, new GreaterEqualConstraint(1));
+ final IntParameter param = new IntParameter(K_ID);
+ param.addConstraint(new GreaterEqualConstraint(1));
if(config.grab(param)) {
k = param.getValue();
}
}
/**
- * Get the alpha parameter
+ * Get the alpha parameter.
*
* @param config Parameterization
*/
@@ -312,9 +316,9 @@ public class CTLuRandomWalkEC<N, D extends NumberDistance<D, ?>> extends Abstrac
}
/**
- * get the c parameter
+ * get the c parameter.
*
- * @param config
+ * @param config Parameterization
*/
protected void configC(Parameterization config) {
final DoubleParameter param = new DoubleParameter(C_ID);
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuScatterplotOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuScatterplotOutlier.java
index 4f11cb38..295c7414 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuScatterplotOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuScatterplotOutlier.java
@@ -31,8 +31,8 @@ import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
@@ -78,10 +78,10 @@ public class CTLuScatterplotOutlier<N> extends AbstractNeighborhoodOutlier<N> {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(CTLuScatterplotOutlier.class);
+ private static final Logging LOG = Logging.getLogger(CTLuScatterplotOutlier.class);
/**
- * Constructor
+ * Constructor.
*
* @param npredf Neighborhood predicate
*/
@@ -90,13 +90,13 @@ public class CTLuScatterplotOutlier<N> extends AbstractNeighborhoodOutlier<N> {
}
/**
- * Main method
+ * Main method.
*
* @param nrel Neighborhood relation
* @param relation Data relation (1d!)
* @return Outlier detection result
*/
- public OutlierResult run(Relation<N> nrel, Relation<? extends NumberVector<?, ?>> relation) {
+ public OutlierResult run(Relation<N> nrel, Relation<? extends NumberVector<?>> relation) {
final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(nrel);
WritableDoubleDataStore means = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP);
@@ -104,17 +104,15 @@ public class CTLuScatterplotOutlier<N> extends AbstractNeighborhoodOutlier<N> {
// regression using the covariance matrix
CovarianceMatrix covm = new CovarianceMatrix(2);
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- DBID id = iditer.getDBID();
- final double local = relation.get(id).doubleValue(1);
+ final double local = relation.get(iditer).doubleValue(0);
// Compute mean of neighbors
Mean mean = new Mean();
- DBIDs neighbors = npred.getNeighborDBIDs(id);
+ DBIDs neighbors = npred.getNeighborDBIDs(iditer);
for(DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
- DBID n = iter.getDBID();
- if(id.equals(n)) {
+ if(DBIDUtil.equal(iditer, iter)) {
continue;
}
- mean.put(relation.get(n).doubleValue(1));
+ mean.put(relation.get(iter).doubleValue(0));
}
final double m;
if(mean.getCount() > 0) {
@@ -125,7 +123,7 @@ public class CTLuScatterplotOutlier<N> extends AbstractNeighborhoodOutlier<N> {
m = local;
}
// Store the mean for the score calculation
- means.putDouble(id, m);
+ means.putDouble(iditer, m);
covm.put(new double[] { local, m });
}
// Finalize covariance matrix, compute linear regression
@@ -143,11 +141,10 @@ public class CTLuScatterplotOutlier<N> extends AbstractNeighborhoodOutlier<N> {
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
MeanVariance mv = new MeanVariance();
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- DBID id = iditer.getDBID();
// Compute the error from the linear regression
- double y_i = relation.get(id).doubleValue(1);
- double e = means.doubleValue(id) - (slope * y_i + inter);
- scores.putDouble(id, e);
+ double y_i = relation.get(iditer).doubleValue(0);
+ double e = means.doubleValue(iditer) - (slope * y_i + inter);
+ scores.putDouble(iditer, e);
mv.put(e);
}
@@ -157,10 +154,9 @@ public class CTLuScatterplotOutlier<N> extends AbstractNeighborhoodOutlier<N> {
final double mean = mv.getMean();
final double variance = mv.getNaiveStddev();
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- DBID id = iditer.getDBID();
- double score = Math.abs((scores.doubleValue(id) - mean) / variance);
+ double score = Math.abs((scores.doubleValue(iditer) - mean) / variance);
minmax.put(score);
- scores.putDouble(id, score);
+ scores.putDouble(iditer, score);
}
}
// build representation
@@ -173,16 +169,16 @@ public class CTLuScatterplotOutlier<N> extends AbstractNeighborhoodOutlier<N> {
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
@Override
public TypeInformation[] getInputTypeRestriction() {
- return TypeUtil.array(getNeighborSetPredicateFactory().getInputTypeRestriction(), VectorFieldTypeInformation.get(NumberVector.class, 1));
+ return TypeUtil.array(getNeighborSetPredicateFactory().getInputTypeRestriction(), new VectorFieldTypeInformation<NumberVector<?>>(NumberVector.class, 1));
}
/**
- * Parameterization class
+ * Parameterization class.
*
* @author Ahmed Hettab
*
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuZTestOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuZTestOutlier.java
index 05729481..02573a06 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuZTestOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuZTestOutlier.java
@@ -32,8 +32,8 @@ import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
@@ -79,60 +79,57 @@ public class CTLuZTestOutlier<N> extends AbstractNeighborhoodOutlier<N> {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(CTLuZTestOutlier.class);
+ private static final Logging LOG = Logging.getLogger(CTLuZTestOutlier.class);
/**
- * Constructor
+ * Constructor.
*
- * @param npredf
+ * @param npredf Neighbor predicate
*/
public CTLuZTestOutlier(NeighborSetPredicate.Factory<N> npredf) {
super(npredf);
}
/**
- * Main method
+ * Main method.
*
* @param database Database
* @param nrel Neighborhood relation
* @param relation Data relation (1d!)
* @return Outlier detection result
*/
- public OutlierResult run(Database database, Relation<N> nrel, Relation<? extends NumberVector<?, ?>> relation) {
+ public OutlierResult run(Database database, Relation<N> nrel, Relation<? extends NumberVector<?>> relation) {
final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(nrel);
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
MeanVariance zmv = new MeanVariance();
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- DBID id = iditer.getDBID();
- DBIDs neighbors = npred.getNeighborDBIDs(id);
+ DBIDs neighbors = npred.getNeighborDBIDs(iditer);
// Compute Mean of neighborhood
Mean localmean = new Mean();
for(DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
- DBID n = iter.getDBID();
- if(id.equals(n)) {
+ if(DBIDUtil.equal(iditer, iter)) {
continue;
}
- localmean.put(relation.get(n).doubleValue(1));
+ localmean.put(relation.get(iter).doubleValue(0));
}
final double localdiff;
if(localmean.getCount() > 0) {
- localdiff = relation.get(id).doubleValue(1) - localmean.getMean();
+ localdiff = relation.get(iditer).doubleValue(0) - localmean.getMean();
}
else {
localdiff = 0.0;
}
- scores.putDouble(id, localdiff);
+ scores.putDouble(iditer, localdiff);
zmv.put(localdiff);
}
// Normalize scores using mean and variance
DoubleMinMax minmax = new DoubleMinMax();
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- DBID id = iditer.getDBID();
- double score = Math.abs(scores.doubleValue(id) - zmv.getMean()) / zmv.getSampleStddev();
+ double score = Math.abs(scores.doubleValue(iditer) - zmv.getMean()) / zmv.getSampleStddev();
minmax.put(score);
- scores.putDouble(id, score);
+ scores.putDouble(iditer, score);
}
// Wrap result
@@ -145,16 +142,16 @@ public class CTLuZTestOutlier<N> extends AbstractNeighborhoodOutlier<N> {
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
@Override
public TypeInformation[] getInputTypeRestriction() {
- return TypeUtil.array(getNeighborSetPredicateFactory().getInputTypeRestriction(), VectorFieldTypeInformation.get(NumberVector.class, 1));
+ return TypeUtil.array(getNeighborSetPredicateFactory().getInputTypeRestriction(), new VectorFieldTypeInformation<NumberVector<?>>(NumberVector.class, 1));
}
/**
- * Parameterization class
+ * Parameterization class.
*
* @author Ahmed Hettab
*
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SLOM.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SLOM.java
index 8ae23229..720fa39f 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SLOM.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SLOM.java
@@ -30,8 +30,8 @@ import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
@@ -74,7 +74,7 @@ public class SLOM<N, O, D extends NumberDistance<D, ?>> extends AbstractDistance
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(SLOM.class);
+ private static final Logging LOG = Logging.getLogger(SLOM.class);
/**
* Constructor.
@@ -100,29 +100,27 @@ public class SLOM<N, O, D extends NumberDistance<D, ?>> extends AbstractDistance
WritableDoubleDataStore modifiedDistance = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
// calculate D-Tilde
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- DBID id = iditer.getDBID();
double sum = 0;
double maxDist = 0;
int cnt = 0;
- final DBIDs neighbors = npred.getNeighborDBIDs(id);
+ final DBIDs neighbors = npred.getNeighborDBIDs(iditer);
for(DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
- DBID neighbor = iter.getDBID();
- if(id.equals(neighbor)) {
+ if(DBIDUtil.equal(iditer, iter)) {
continue;
}
- double dist = distFunc.distance(id, neighbor).doubleValue();
+ double dist = distFunc.distance(iditer, iter).doubleValue();
sum += dist;
cnt++;
maxDist = Math.max(maxDist, dist);
}
if(cnt > 1) {
- modifiedDistance.putDouble(id, ((sum - maxDist) / (cnt - 1)));
+ modifiedDistance.putDouble(iditer, ((sum - maxDist) / (cnt - 1)));
}
else {
// Use regular distance when the d-tilde trick is undefined.
// Note: this can be 0 when there were no neighbors.
- modifiedDistance.putDouble(id, maxDist);
+ modifiedDistance.putDouble(iditer, maxDist);
}
}
@@ -131,29 +129,26 @@ public class SLOM<N, O, D extends NumberDistance<D, ?>> extends AbstractDistance
WritableDoubleDataStore sloms = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- DBID id = iditer.getDBID();
double sum = 0;
int cnt = 0;
- final DBIDs neighbors = npred.getNeighborDBIDs(id);
+ final DBIDs neighbors = npred.getNeighborDBIDs(iditer);
for(DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
- DBID neighbor = iter.getDBID();
- if(neighbor.equals(id)) {
+ if(DBIDUtil.equal(iditer, iter)) {
continue;
}
- sum += modifiedDistance.doubleValue(neighbor);
+ sum += modifiedDistance.doubleValue(iter);
cnt++;
}
double slom;
if(cnt > 0) {
// With and without the object itself:
- double avgPlus = (sum + modifiedDistance.doubleValue(id)) / (cnt + 1);
+ double avgPlus = (sum + modifiedDistance.doubleValue(iditer)) / (cnt + 1);
double avg = sum / cnt;
double beta = 0;
for(DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
- DBID neighbor = iter.getDBID();
- final double dist = modifiedDistance.doubleValue(neighbor);
+ final double dist = modifiedDistance.doubleValue(iter);
if(dist > avgPlus) {
beta += 1;
}
@@ -162,8 +157,8 @@ public class SLOM<N, O, D extends NumberDistance<D, ?>> extends AbstractDistance
}
}
// Include object itself
- if(!neighbors.contains(id)) {
- final double dist = modifiedDistance.doubleValue(id);
+ if(!neighbors.contains(iditer)) {
+ final double dist = modifiedDistance.doubleValue(iditer);
if(dist > avgPlus) {
beta += 1;
}
@@ -182,13 +177,13 @@ public class SLOM<N, O, D extends NumberDistance<D, ?>> extends AbstractDistance
}
beta = beta / (1 + avg);
- slom = beta * modifiedDistance.doubleValue(id);
+ slom = beta * modifiedDistance.doubleValue(iditer);
}
else {
// No neighbors to compare to - no score.
slom = 0.0;
}
- sloms.putDouble(id, slom);
+ sloms.putDouble(iditer, slom);
slomminmax.put(slom);
}
@@ -201,7 +196,7 @@ public class SLOM<N, O, D extends NumberDistance<D, ?>> extends AbstractDistance
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
@Override
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SOF.java
index e9987bf0..a6f39a60 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SOF.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SOF.java
@@ -29,7 +29,6 @@ import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
@@ -74,7 +73,7 @@ public class SOF<N, O, D extends NumberDistance<D, ?>> extends AbstractDistanceB
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(SOF.class);
+ private static final Logging LOG = Logging.getLogger(SOF.class);
/**
* Constructor.
@@ -89,7 +88,7 @@ public class SOF<N, O, D extends NumberDistance<D, ?>> extends AbstractDistanceB
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -110,33 +109,31 @@ public class SOF<N, O, D extends NumberDistance<D, ?>> extends AbstractDistanceB
// Compute densities
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- DBID id = iditer.getDBID();
- DBIDs neighbors = npred.getNeighborDBIDs(id);
+ DBIDs neighbors = npred.getNeighborDBIDs(iditer);
double avg = 0;
for(DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
- avg += distFunc.distance(id, iter.getDBID()).doubleValue();
+ avg += distFunc.distance(iditer, iter).doubleValue();
}
double lrd = 1 / (avg / neighbors.size());
if (Double.isNaN(lrd)) {
lrd = 0;
}
- lrds.putDouble(id, lrd);
+ lrds.putDouble(iditer, lrd);
}
// Compute density quotients
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- DBID id = iditer.getDBID();
- DBIDs neighbors = npred.getNeighborDBIDs(id);
+ DBIDs neighbors = npred.getNeighborDBIDs(iditer);
double avg = 0;
for(DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
- avg += lrds.doubleValue(iter.getDBID());
+ avg += lrds.doubleValue(iter);
}
- final double lrd = (avg / neighbors.size()) / lrds.doubleValue(id);
+ final double lrd = (avg / neighbors.size()) / lrds.doubleValue(iditer);
if (!Double.isNaN(lrd)) {
- lofs.putDouble(id, lrd);
+ lofs.putDouble(iditer, lrd);
lofminmax.put(lrd);
} else {
- lofs.putDouble(id, 0.0);
+ lofs.putDouble(iditer, 0.0);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/TrimmedMeanApproach.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/TrimmedMeanApproach.java
index 41022414..9aa21b66 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/TrimmedMeanApproach.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/TrimmedMeanApproach.java
@@ -33,11 +33,11 @@ import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
@@ -45,14 +45,13 @@ import de.lmu.ifi.dbs.elki.math.Mean;
import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.datastructures.QuickSelect;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.IntervalConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.IntervalConstraint.IntervalBoundary;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
@@ -83,15 +82,15 @@ public class TrimmedMeanApproach<N> extends AbstractNeighborhoodOutlier<N> {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(TrimmedMeanApproach.class);
+ private static final Logging LOG = Logging.getLogger(TrimmedMeanApproach.class);
/**
- * the parameter p
+ * the parameter p.
*/
private double p;
/**
- * Constructor
+ * Constructor.
*
* @param p Parameter p
* @param npredf Neighborhood factory.
@@ -102,29 +101,28 @@ public class TrimmedMeanApproach<N> extends AbstractNeighborhoodOutlier<N> {
}
/**
- * Run the algorithm
+ * Run the algorithm.
*
* @param database Database
* @param nrel Neighborhood relation
* @param relation Data Relation (1 dimensional!)
* @return Outlier detection result
*/
- public OutlierResult run(Database database, Relation<N> nrel, Relation<? extends NumberVector<?, ?>> relation) {
- assert (DatabaseUtil.dimensionality(relation) == 1) : "TrimmedMean can only process one-dimensional data sets.";
+ public OutlierResult run(Database database, Relation<N> nrel, Relation<? extends NumberVector<?>> relation) {
+ assert (RelationUtil.dimensionality(relation) == 1) : "TrimmedMean can only process one-dimensional data sets.";
final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(nrel);
WritableDoubleDataStore errors = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP);
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
- FiniteProgress progress = logger.isVerbose() ? new FiniteProgress("Computing trimmed means", relation.size(), logger) : null;
+ FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Computing trimmed means", relation.size(), LOG) : null;
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- DBID id = iditer.getDBID();
- DBIDs neighbors = npred.getNeighborDBIDs(id);
+ DBIDs neighbors = npred.getNeighborDBIDs(iditer);
int num = 0;
double[] values = new double[neighbors.size()];
// calculate trimmedMean
for(DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
- values[num] = relation.get(iter).doubleValue(1);
+ values[num] = relation.get(iter).doubleValue(0);
num++;
}
@@ -141,21 +139,21 @@ public class TrimmedMeanApproach<N> extends AbstractNeighborhoodOutlier<N> {
tm = mean.getMean();
}
else {
- tm = relation.get(id).doubleValue(1);
+ tm = relation.get(iditer).doubleValue(0);
}
// Error: deviation from trimmed mean
- errors.putDouble(id, relation.get(id).doubleValue(1) - tm);
+ errors.putDouble(iditer, relation.get(iditer).doubleValue(0) - tm);
if(progress != null) {
- progress.incrementProcessed(logger);
+ progress.incrementProcessed(LOG);
}
}
if(progress != null) {
- progress.ensureCompleted(logger);
+ progress.ensureCompleted(LOG);
}
- if(logger.isVerbose()) {
- logger.verbose("Computing median error.");
+ if(LOG.isVerbose()) {
+ LOG.verbose("Computing median error.");
}
double median_dev_from_median;
{
@@ -164,8 +162,7 @@ public class TrimmedMeanApproach<N> extends AbstractNeighborhoodOutlier<N> {
{
int i = 0;
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- DBID id = iditer.getDBID();
- ei[i] = errors.doubleValue(id);
+ ei[i] = errors.doubleValue(iditer);
i++;
}
}
@@ -178,15 +175,14 @@ public class TrimmedMeanApproach<N> extends AbstractNeighborhoodOutlier<N> {
median_dev_from_median = QuickSelect.median(ei);
}
- if(logger.isVerbose()) {
- logger.verbose("Normalizing scores.");
+ if(LOG.isVerbose()) {
+ LOG.verbose("Normalizing scores.");
}
// calculate score
DoubleMinMax minmax = new DoubleMinMax();
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- DBID id = iditer.getDBID();
- double score = Math.abs(errors.doubleValue(id)) * 0.6745 / median_dev_from_median;
- scores.putDouble(id, score);
+ double score = Math.abs(errors.doubleValue(iditer)) * 0.6745 / median_dev_from_median;
+ scores.putDouble(iditer, score);
minmax.put(score);
}
//
@@ -199,17 +195,17 @@ public class TrimmedMeanApproach<N> extends AbstractNeighborhoodOutlier<N> {
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
@Override
public TypeInformation[] getInputTypeRestriction() {
// Get one dimensional attribute for analysis.
- return TypeUtil.array(getNeighborSetPredicateFactory().getInputTypeRestriction(), VectorFieldTypeInformation.get(NumberVector.class, 1));
+ return TypeUtil.array(getNeighborSetPredicateFactory().getInputTypeRestriction(), new VectorFieldTypeInformation<NumberVector<?>>(NumberVector.class, 1));
}
/**
- * Parameterizer
+ * Parameterizer.
*
* @author Ahmed Hettab
*
@@ -219,19 +215,21 @@ public class TrimmedMeanApproach<N> extends AbstractNeighborhoodOutlier<N> {
*/
public static class Parameterizer<N> extends AbstractNeighborhoodOutlier.Parameterizer<N> {
/**
- * Parameter for the percentile value p
+ * Parameter for the percentile value p.
*/
- public static final OptionID P_ID = OptionID.getOrCreateOptionID("tma.p", "the percentile parameter");
+ public static final OptionID P_ID = new OptionID("tma.p", "the percentile parameter");
/**
- * Percentile parameter p
+ * Percentile parameter p.
*/
protected double p = 0.2;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- DoubleParameter pP = new DoubleParameter(P_ID, new IntervalConstraint(0.0, IntervalBoundary.OPEN, 0.5, IntervalBoundary.OPEN));
+ DoubleParameter pP = new DoubleParameter(P_ID);
+ pP.addConstraint(new GreaterConstraint(0.0));
+ pP.addConstraint(new LessConstraint(0.5));
if(config.grab(pP)) {
p = pP.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/AbstractPrecomputedNeighborhood.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/AbstractPrecomputedNeighborhood.java
index 5898b053..2c706ce0 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/AbstractPrecomputedNeighborhood.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/AbstractPrecomputedNeighborhood.java
@@ -24,7 +24,8 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood;
*/
import de.lmu.ifi.dbs.elki.database.datastore.DataStore;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.logging.Logging;
@@ -50,7 +51,7 @@ public abstract class AbstractPrecomputedNeighborhood implements NeighborSetPred
}
@Override
- public DBIDs getNeighborDBIDs(DBID reference) {
+ public DBIDs getNeighborDBIDs(DBIDRef reference) {
DBIDs neighbors = store.get(reference);
if(neighbors != null) {
return neighbors;
@@ -60,7 +61,7 @@ public abstract class AbstractPrecomputedNeighborhood implements NeighborSetPred
if(getLogger().isDebugging()) {
getLogger().warning("No neighbors for object " + reference);
}
- return reference;
+ return DBIDUtil.deref(reference);
}
}
@@ -69,7 +70,7 @@ public abstract class AbstractPrecomputedNeighborhood implements NeighborSetPred
*
* @return Logger
*/
- abstract protected Logging getLogger();
+ protected abstract Logging getLogger();
/**
* Factory class.
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExtendedNeighborhood.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExtendedNeighborhood.java
index 7a2fda52..4aa96b25 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExtendedNeighborhood.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExtendedNeighborhood.java
@@ -28,7 +28,6 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStore;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
@@ -54,7 +53,7 @@ public class ExtendedNeighborhood extends AbstractPrecomputedNeighborhood {
/**
* The logger to use.
*/
- static final Logging logger = Logging.getLogger(ExtendedNeighborhood.class);
+ private static final Logging LOG = Logging.getLogger(ExtendedNeighborhood.class);
/**
* Constructor.
@@ -67,7 +66,7 @@ public class ExtendedNeighborhood extends AbstractPrecomputedNeighborhood {
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
@Override
@@ -132,23 +131,22 @@ public class ExtendedNeighborhood extends AbstractPrecomputedNeighborhood {
final WritableDataStore<DBIDs> store = DataStoreUtil.makeStorage(database.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC | DataStoreFactory.HINT_TEMP, DBIDs.class);
// Expand multiple steps
- FiniteProgress progress = logger.isVerbose() ? new FiniteProgress("Expanding neighborhoods", database.size(), logger) : null;
+ FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Expanding neighborhoods", database.size(), LOG) : null;
for(DBIDIter iter = database.iterDBIDs(); iter.valid(); iter.advance()) {
- DBID id = iter.getDBID();
- HashSetModifiableDBIDs res = DBIDUtil.newHashSet(id);
- DBIDs todo = id;
+ HashSetModifiableDBIDs res = DBIDUtil.newHashSet();
+ res.add(iter);
+ DBIDs todo = DBIDUtil.deref(iter);
for(int i = 0; i < steps; i++) {
ModifiableDBIDs ntodo = DBIDUtil.newHashSet();
for(DBIDIter iter2 = todo.iter(); iter2.valid(); iter2.advance()) {
- DBIDs add = innerinst.getNeighborDBIDs(iter2.getDBID());
+ DBIDs add = innerinst.getNeighborDBIDs(iter2);
if(add != null) {
- for(DBIDIter iter3 = add.iter(); iter.valid(); iter.advance()) {
- DBID nid = iter3.getDBID();
- if(res.contains(nid)) {
+ for(DBIDIter iter3 = add.iter(); iter3.valid(); iter3.advance()) {
+ if(res.contains(iter3)) {
continue;
}
- ntodo.add(nid);
- res.add(nid);
+ ntodo.add(iter3);
+ res.add(iter3);
}
}
}
@@ -157,13 +155,13 @@ public class ExtendedNeighborhood extends AbstractPrecomputedNeighborhood {
}
todo = ntodo;
}
- store.put(id, res);
+ store.put(iter, res);
if(progress != null) {
- progress.incrementProcessed(logger);
+ progress.incrementProcessed(LOG);
}
}
if(progress != null) {
- progress.ensureCompleted(logger);
+ progress.ensureCompleted(LOG);
}
return store;
@@ -180,12 +178,12 @@ public class ExtendedNeighborhood extends AbstractPrecomputedNeighborhood {
/**
* Parameter to specify the neighborhood predicate to use.
*/
- public static final OptionID NEIGHBORHOOD_ID = OptionID.getOrCreateOptionID("extendedneighbors.neighborhood", "The inner neighborhood predicate to use.");
+ public static final OptionID NEIGHBORHOOD_ID = new OptionID("extendedneighbors.neighborhood", "The inner neighborhood predicate to use.");
/**
* Parameter to specify the number of steps allowed
*/
- public static final OptionID STEPS_ID = OptionID.getOrCreateOptionID("extendedneighbors.steps", "The number of steps allowed in the neighborhood graph.");
+ public static final OptionID STEPS_ID = new OptionID("extendedneighbors.steps", "The number of steps allowed in the neighborhood graph.");
/**
* The number of steps to do.
@@ -225,7 +223,8 @@ public class ExtendedNeighborhood extends AbstractPrecomputedNeighborhood {
* @return number of steps, default 1
*/
public static int getParameterSteps(Parameterization config) {
- final IntParameter param = new IntParameter(STEPS_ID, new GreaterEqualConstraint(1));
+ final IntParameter param = new IntParameter(STEPS_ID);
+ param.addConstraint(new GreaterEqualConstraint(1));
if(config.grab(param)) {
return param.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExternalNeighborhood.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExternalNeighborhood.java
index 74e5bbcf..01052c1f 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExternalNeighborhood.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExternalNeighborhood.java
@@ -63,12 +63,12 @@ public class ExternalNeighborhood extends AbstractPrecomputedNeighborhood {
/**
* Logger
*/
- static final Logging logger = Logging.getLogger(ExternalNeighborhood.class);
+ private static final Logging LOG = Logging.getLogger(ExternalNeighborhood.class);
/**
* Parameter to specify the neighborhood file
*/
- public static final OptionID NEIGHBORHOOD_FILE_ID = OptionID.getOrCreateOptionID("externalneighbors.file", "The file listing the neighbors.");
+ public static final OptionID NEIGHBORHOOD_FILE_ID = new OptionID("externalneighbors.file", "The file listing the neighbors.");
/**
* Constructor.
@@ -91,7 +91,7 @@ public class ExternalNeighborhood extends AbstractPrecomputedNeighborhood {
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -136,33 +136,32 @@ public class ExternalNeighborhood extends AbstractPrecomputedNeighborhood {
private DataStore<DBIDs> loadNeighbors(Relation<?> database) {
final WritableDataStore<DBIDs> store = DataStoreUtil.makeStorage(database.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC | DataStoreFactory.HINT_TEMP, DBIDs.class);
- if(logger.isVerbose()) {
- logger.verbose("Loading external neighborhoods.");
+ if(LOG.isVerbose()) {
+ LOG.verbose("Loading external neighborhoods.");
}
- if(logger.isDebugging()) {
- logger.verbose("Building reverse label index...");
+ if(LOG.isDebugging()) {
+ LOG.verbose("Building reverse label index...");
}
// Build a map label/ExternalId -> DBID
// (i.e. a reverse index!)
// TODO: move this into the database layer to share?
- Map<String, DBID> lblmap = new HashMap<String, DBID>(database.size() * 2);
+ Map<String, DBID> lblmap = new HashMap<String, DBID>(database.size() << 1);
{
Relation<LabelList> olq = database.getDatabase().getRelation(TypeUtil.LABELLIST);
Relation<ExternalID> eidq = database.getDatabase().getRelation(TypeUtil.EXTERNALID);
for(DBIDIter iditer = database.iterDBIDs(); iditer.valid(); iditer.advance()) {
- DBID id = iditer.getDBID();
if(eidq != null) {
- ExternalID eid = eidq.get(id);
+ ExternalID eid = eidq.get(iditer);
if(eid != null) {
- lblmap.put(eid.toString(), id);
+ lblmap.put(eid.toString(), DBIDUtil.deref(iditer));
}
}
if(olq != null) {
- LabelList label = olq.get(id);
+ LabelList label = olq.get(iditer);
if(label != null) {
for(String lbl : label) {
- lblmap.put(lbl, id);
+ lblmap.put(lbl, DBIDUtil.deref(iditer));
}
}
}
@@ -170,8 +169,8 @@ public class ExternalNeighborhood extends AbstractPrecomputedNeighborhood {
}
try {
- if(logger.isDebugging()) {
- logger.verbose("Loading neighborhood file.");
+ if(LOG.isDebugging()) {
+ LOG.verbose("Loading neighborhood file.");
}
InputStream in = new FileInputStream(file);
in = FileUtil.tryGzipInput(in);
@@ -187,16 +186,16 @@ public class ExternalNeighborhood extends AbstractPrecomputedNeighborhood {
neighbours.add(neigh);
}
else {
- if(logger.isDebugging()) {
- logger.debug("No object found for label " + entries[i]);
+ if(LOG.isDebugging()) {
+ LOG.debug("No object found for label " + entries[i]);
}
}
}
store.put(id, neighbours);
}
else {
- if(logger.isDebugging()) {
- logger.warning("No object found for label " + entries[0]);
+ if(LOG.isDebugging()) {
+ LOG.warning("No object found for label " + entries[0]);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/NeighborSetPredicate.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/NeighborSetPredicate.java
index 3a6d0e28..b52f8e91 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/NeighborSetPredicate.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/NeighborSetPredicate.java
@@ -24,7 +24,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood;
*/
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.result.Result;
@@ -42,7 +42,7 @@ public interface NeighborSetPredicate extends Result {
* @param reference Reference object
* @return Neighborhood
*/
- public DBIDs getNeighborDBIDs(DBID reference);
+ public DBIDs getNeighborDBIDs(DBIDRef reference);
/**
* Factory interface to produce instances.
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/PrecomputedKNearestNeighborNeighborhood.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/PrecomputedKNearestNeighborNeighborhood.java
index 9dd2dee1..f6000ef0 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/PrecomputedKNearestNeighborNeighborhood.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/PrecomputedKNearestNeighborNeighborhood.java
@@ -29,15 +29,13 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
@@ -57,7 +55,7 @@ public class PrecomputedKNearestNeighborNeighborhood<D extends Distance<D>> exte
/**
* Logger
*/
- private static final Logging logger = Logging.getLogger(PrecomputedKNearestNeighborNeighborhood.class);
+ private static final Logging LOG = Logging.getLogger(PrecomputedKNearestNeighborNeighborhood.class);
/**
* Constructor.
@@ -80,7 +78,7 @@ public class PrecomputedKNearestNeighborNeighborhood<D extends Distance<D>> exte
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -121,13 +119,12 @@ public class PrecomputedKNearestNeighborNeighborhood<D extends Distance<D>> exte
// TODO: use bulk?
WritableDataStore<DBIDs> s = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, DBIDs.class);
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- DBID id = iditer.getDBID();
- KNNResult<D> neighbors = knnQuery.getKNNForDBID(id, k);
+ KNNResult<D> neighbors = knnQuery.getKNNForDBID(iditer, k);
ArrayModifiableDBIDs neighbours = DBIDUtil.newArray(neighbors.size());
- for(DistanceResultPair<D> dpair : neighbors) {
- neighbours.add(dpair.getDBID());
+ for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ neighbours.add(neighbor);
}
- s.put(id, neighbours);
+ s.put(iditer, neighbours);
}
return new PrecomputedKNearestNeighborNeighborhood<D>(s);
}
@@ -151,12 +148,12 @@ public class PrecomputedKNearestNeighborNeighborhood<D extends Distance<D>> exte
/**
* Parameter k
*/
- public static final OptionID K_ID = OptionID.getOrCreateOptionID("neighborhood.k", "the number of neighbors");
+ public static final OptionID K_ID = new OptionID("neighborhood.k", "the number of neighbors");
/**
* Parameter to specify the distance function to use
*/
- public static final OptionID DISTANCEFUNCTION_ID = OptionID.getOrCreateOptionID("neighborhood.distancefunction", "the distance function to use");
+ public static final OptionID DISTANCEFUNCTION_ID = new OptionID("neighborhood.distancefunction", "the distance function to use");
/**
* Parameter k
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/LinearWeightedExtendedNeighborhood.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/LinearWeightedExtendedNeighborhood.java
index d170571f..f1c68577 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/LinearWeightedExtendedNeighborhood.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/LinearWeightedExtendedNeighborhood.java
@@ -29,10 +29,11 @@ import java.util.List;
import de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
@@ -41,7 +42,6 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualCons
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
-import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair;
/**
* Neighborhood obtained by computing the k-fold closure of an existing
@@ -87,29 +87,27 @@ public class LinearWeightedExtendedNeighborhood implements WeightedNeighborSetPr
}
@Override
- public Collection<DoubleObjPair<DBID>> getWeightedNeighbors(DBID reference) {
+ public Collection<DoubleDBIDPair> getWeightedNeighbors(DBIDRef reference) {
ModifiableDBIDs seen = DBIDUtil.newHashSet();
- List<DoubleObjPair<DBID>> result = new ArrayList<DoubleObjPair<DBID>>();
+ List<DoubleDBIDPair> result = new ArrayList<DoubleDBIDPair>();
// Add starting object
- result.add(new DoubleObjPair<DBID>(computeWeight(0), reference));
+ result.add(DBIDUtil.newPair(computeWeight(0), reference));
seen.add(reference);
// Extend.
- DBIDs cur = reference;
+ DBIDs cur = DBIDUtil.deref(reference);
for(int i = 1; i <= steps; i++) {
final double weight = computeWeight(i);
// Collect newly discovered IDs
ModifiableDBIDs add = DBIDUtil.newHashSet();
for(DBIDIter iter = cur.iter(); iter.valid(); iter.advance()) {
- DBID id = iter.getDBID();
- for(DBIDIter iter2 = inner.getNeighborDBIDs(id).iter(); iter2.valid(); iter2.advance()) {
- DBID nid = iter2.getDBID();
+ for(DBIDIter iter2 = inner.getNeighborDBIDs(iter).iter(); iter2.valid(); iter2.advance()) {
// Seen before?
- if(seen.contains(nid)) {
+ if(seen.contains(iter2)) {
continue;
}
- add.add(nid);
- result.add(new DoubleObjPair<DBID>(weight, nid));
+ add.add(iter2);
+ result.add(DBIDUtil.newPair(weight, iter2));
}
}
if(add.size() == 0) {
@@ -172,12 +170,12 @@ public class LinearWeightedExtendedNeighborhood implements WeightedNeighborSetPr
/**
* Parameter to specify the neighborhood predicate to use.
*/
- public static final OptionID NEIGHBORHOOD_ID = OptionID.getOrCreateOptionID("extendedneighbors.neighborhood", "The inner neighborhood predicate to use.");
+ public static final OptionID NEIGHBORHOOD_ID = new OptionID("extendedneighbors.neighborhood", "The inner neighborhood predicate to use.");
/**
* Parameter to specify the number of steps allowed
*/
- public static final OptionID STEPS_ID = OptionID.getOrCreateOptionID("extendedneighbors.steps", "The number of steps allowed in the neighborhood graph.");
+ public static final OptionID STEPS_ID = new OptionID("extendedneighbors.steps", "The number of steps allowed in the neighborhood graph.");
/**
* The number of steps to do.
@@ -217,7 +215,8 @@ public class LinearWeightedExtendedNeighborhood implements WeightedNeighborSetPr
* @return number of steps, default 1
*/
public static int getParameterSteps(Parameterization config) {
- final IntParameter param = new IntParameter(STEPS_ID, new GreaterEqualConstraint(1));
+ final IntParameter param = new IntParameter(STEPS_ID);
+ param.addConstraint(new GreaterEqualConstraint(1));
if(config.grab(param)) {
return param.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/UnweightedNeighborhoodAdapter.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/UnweightedNeighborhoodAdapter.java
index ce0666df..c179d81f 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/UnweightedNeighborhoodAdapter.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/UnweightedNeighborhoodAdapter.java
@@ -28,15 +28,16 @@ import java.util.Collection;
import de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
-import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair;
/**
* Adapter to use unweighted neighborhoods in an algorithm that requires
@@ -61,12 +62,11 @@ public class UnweightedNeighborhoodAdapter implements WeightedNeighborSetPredica
}
@Override
- public Collection<DoubleObjPair<DBID>> getWeightedNeighbors(DBID reference) {
+ public Collection<DoubleDBIDPair> getWeightedNeighbors(DBIDRef reference) {
DBIDs neighbors = inner.getNeighborDBIDs(reference);
- ArrayList<DoubleObjPair<DBID>> adapted = new ArrayList<DoubleObjPair<DBID>>(neighbors.size());
+ ArrayList<DoubleDBIDPair> adapted = new ArrayList<DoubleDBIDPair>(neighbors.size());
for(DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
- DBID id = iter.getDBID();
- adapted.add(new DoubleObjPair<DBID>(1.0, id));
+ adapted.add(DBIDUtil.newPair(1.0, iter));
}
return adapted;
}
@@ -120,7 +120,7 @@ public class UnweightedNeighborhoodAdapter implements WeightedNeighborSetPredica
/**
* The parameter to give the non-weighted neighborhood to use.
*/
- public static final OptionID INNER_ID = OptionID.getOrCreateOptionID("neighborhood.inner", "Parameter for the non-weighted neighborhood to use.");
+ public static final OptionID INNER_ID = new OptionID("neighborhood.inner", "Parameter for the non-weighted neighborhood to use.");
/**
* The actual predicate.
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/WeightedNeighborSetPredicate.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/WeightedNeighborSetPredicate.java
index b147935a..16d37587 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/WeightedNeighborSetPredicate.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/WeightedNeighborSetPredicate.java
@@ -26,10 +26,10 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.weighted;
import java.util.Collection;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.Parameterizable;
-import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair;
/**
* Neighbor predicate with weight support.
@@ -43,7 +43,7 @@ public interface WeightedNeighborSetPredicate {
* @param reference Reference object
* @return Weighted Neighborhood
*/
- public Collection<DoubleObjPair<DBID>> getWeightedNeighbors(DBID reference);
+ public Collection<DoubleDBIDPair> getWeightedNeighbors(DBIDRef reference);
/**
* Factory interface to produce instances.
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OUTRES.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OUTRES.java
index 573233a7..1965914d 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OUTRES.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OUTRES.java
@@ -23,10 +23,8 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.subspace;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
-import java.util.List;
import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
@@ -37,16 +35,20 @@ import de.lmu.ifi.dbs.elki.database.QueryUtil;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
-import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
-import de.lmu.ifi.dbs.elki.database.query.DoubleDistanceResultPair;
+import de.lmu.ifi.dbs.elki.database.ids.DistanceDBIDPair;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDistanceDBIDPair;
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceDBIDList;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceDBIDResultIter;
import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
@@ -58,7 +60,6 @@ import de.lmu.ifi.dbs.elki.math.statistics.distribution.GammaDistribution;
import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
@@ -89,11 +90,11 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
* @param <V> vector type
*/
@Reference(authors = "E. Müller, M. Schiffer, T. Seidl", title = "Adaptive outlierness for subspace outlier ranking", booktitle = "Proc. 19th ACM International Conference on Information and knowledge management")
-public class OUTRES<V extends NumberVector<V, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
+public class OUTRES<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(OUTRES.class);
+ private static final Logging LOG = Logging.getLogger(OUTRES.class);
/**
* The epsilon (in 2d) parameter
@@ -128,7 +129,7 @@ public class OUTRES<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outl
KernelDensityEstimator kernel = new KernelDensityEstimator(relation);
BitSet subspace = new BitSet(kernel.dim);
- FiniteProgress progress = logger.isVerbose() ? new FiniteProgress("OutRank scores", relation.size(), logger) : null;
+ FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("OUTRES scores", relation.size(), LOG) : null;
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
subspace.clear();
@@ -136,11 +137,11 @@ public class OUTRES<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outl
ranks.putDouble(iditer, score);
minmax.put(score);
if(progress != null) {
- progress.incrementProcessed(logger);
+ progress.incrementProcessed(LOG);
}
}
if(progress != null) {
- progress.ensureCompleted(logger);
+ progress.ensureCompleted(LOG);
}
OutlierScoreMeta meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0., 1., 1.);
@@ -159,33 +160,34 @@ public class OUTRES<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outl
*/
public double outresScore(final int s, BitSet subspace, DBIDRef id, KernelDensityEstimator kernel) {
double score = 1.0; // Initial score is 1.0
+ final SubspaceEuclideanDistanceFunction df = new SubspaceEuclideanDistanceFunction(subspace);
+ MeanVariance meanv = new MeanVariance();
for(int i = s; i < kernel.dim; i++) {
if(subspace.get(i)) { // TODO: needed? Or should we always start with i=0?
continue;
}
subspace.set(i);
- final SubspaceEuclideanDistanceFunction df = new SubspaceEuclideanDistanceFunction(subspace);
+ df.setSelectedDimensions(subspace);
final double adjustedEps = kernel.adjustedEps(kernel.dim);
// Query with a larger window, to also get neighbors of neighbors
// Subspace euclidean is metric!
- final DoubleDistance range = new DoubleDistance(adjustedEps * 2);
+ final DoubleDistance range = new DoubleDistance(adjustedEps * 2.);
RangeQuery<V, DoubleDistance> rq = QueryUtil.getRangeQuery(kernel.relation, df, range);
- List<DistanceResultPair<DoubleDistance>> neighc = rq.getRangeForDBID(id, range);
- List<DoubleDistanceResultPair> neigh = refineRange(neighc, adjustedEps);
+ DistanceDBIDResult<DoubleDistance> neighc = rq.getRangeForDBID(id, range);
+ DoubleDistanceDBIDList neigh = refineRange(neighc, adjustedEps);
if(neigh.size() > 2) {
// Relevance test
if(relevantSubspace(subspace, neigh, kernel)) {
final double density = kernel.subspaceDensity(subspace, neigh);
- final double deviation;
// Compute mean and standard deviation for densities of neighbors.
- MeanVariance meanv = new MeanVariance();
- for(DoubleDistanceResultPair pair : neigh) {
- List<DoubleDistanceResultPair> n2 = subsetNeighborhoodQuery(neighc, pair.getDBID(), df, adjustedEps, kernel);
+ meanv.reset();
+ for (DoubleDistanceDBIDResultIter neighbor = neigh.iter(); neighbor.valid(); neighbor.advance()) {
+ DoubleDistanceDBIDList n2 = subsetNeighborhoodQuery(neighc, neighbor, df, adjustedEps, kernel);
meanv.put(kernel.subspaceDensity(subspace, n2));
}
- deviation = (meanv.getMean() - density) / (2. * meanv.getSampleStddev());
+ final double deviation = (meanv.getMean() - density) / (2. * meanv.getSampleStddev());
// High deviation:
if(deviation >= 1) {
score *= (density / deviation);
@@ -206,19 +208,20 @@ public class OUTRES<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outl
* @param adjustedEps New epsilon
* @return refined list
*/
- private List<DoubleDistanceResultPair> refineRange(List<DistanceResultPair<DoubleDistance>> neighc, double adjustedEps) {
- List<DoubleDistanceResultPair> n = new ArrayList<DoubleDistanceResultPair>(neighc.size());
+ private DoubleDistanceDBIDList refineRange(DistanceDBIDResult<DoubleDistance> neighc, double adjustedEps) {
+ DoubleDistanceDBIDList n = new DoubleDistanceDBIDList(neighc.size());
// We don't have a guarantee for this list to be sorted
- for(DistanceResultPair<DoubleDistance> p : neighc) {
- if(p instanceof DoubleDistanceResultPair) {
- if(((DoubleDistanceResultPair) p).getDoubleDistance() <= adjustedEps) {
- n.add((DoubleDistanceResultPair) p);
+ for (DistanceDBIDResultIter<DoubleDistance> neighbor = neighc.iter(); neighbor.valid(); neighbor.advance()) {
+ DistanceDBIDPair<DoubleDistance> p = neighbor.getDistancePair();
+ if(p instanceof DoubleDistanceDBIDPair) {
+ if(((DoubleDistanceDBIDPair) p).doubleDistance() <= adjustedEps) {
+ n.add((DoubleDistanceDBIDPair) p);
}
}
else {
double dist = p.getDistance().doubleValue();
if(dist <= adjustedEps) {
- n.add(new DoubleDistanceResultPair(dist, p.getDBID()));
+ n.add(dist, p);
}
}
}
@@ -235,13 +238,14 @@ public class OUTRES<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outl
* @param kernel Kernel
* @return Neighbors of neighbor object
*/
- private List<DoubleDistanceResultPair> subsetNeighborhoodQuery(List<DistanceResultPair<DoubleDistance>> neighc, DBID dbid, PrimitiveDoubleDistanceFunction<? super V> df, double adjustedEps, KernelDensityEstimator kernel) {
- List<DoubleDistanceResultPair> n = new ArrayList<DoubleDistanceResultPair>(neighc.size());
+ private DoubleDistanceDBIDList subsetNeighborhoodQuery(DistanceDBIDResult<DoubleDistance> neighc, DBIDRef dbid, PrimitiveDoubleDistanceFunction<? super V> df, double adjustedEps, KernelDensityEstimator kernel) {
+ DoubleDistanceDBIDList n = new DoubleDistanceDBIDList(neighc.size());
V query = kernel.relation.get(dbid);
- for(DistanceResultPair<DoubleDistance> p : neighc) {
+ for (DistanceDBIDResultIter<DoubleDistance> neighbor = neighc.iter(); neighbor.valid(); neighbor.advance()) {
+ DistanceDBIDPair<DoubleDistance> p = neighbor.getDistancePair();
double dist = df.doubleDistance(query, kernel.relation.get(p));
if(dist <= adjustedEps) {
- n.add(new DoubleDistanceResultPair(dist, p.getDBID()));
+ n.add(dist, p);
}
}
return n;
@@ -255,7 +259,7 @@ public class OUTRES<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outl
* @param kernel Kernel density estimator
* @return relevance test result
*/
- protected boolean relevantSubspace(BitSet subspace, List<DoubleDistanceResultPair> neigh, KernelDensityEstimator kernel) {
+ protected boolean relevantSubspace(BitSet subspace, DoubleDistanceDBIDList neigh, KernelDensityEstimator kernel) {
Relation<V> relation = kernel.relation;
final double crit = K_S_CRITICAL001 / Math.sqrt(neigh.size());
@@ -264,9 +268,9 @@ public class OUTRES<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outl
double[] data = new double[neigh.size()];
{
int count = 0;
- for(DoubleDistanceResultPair object : neigh) {
- V vector = relation.get(object.getDBID());
- data[count] = vector.doubleValue(dim + 1);
+ for (DBIDIter neighbor = neigh.iter(); neighbor.valid(); neighbor.advance()) {
+ V vector = relation.get(neighbor);
+ data[count] = vector.doubleValue(dim);
count++;
}
assert (count == neigh.size());
@@ -278,7 +282,7 @@ public class OUTRES<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outl
// Kolmogorow-Smirnow-Test against uniform distribution:
for(int j = 1; j < data.length - 2; j++) {
- double delta = (j / (data.length - 1)) - ((data[j] - min) / norm);
+ double delta = (j / (data.length - 1.)) - ((data[j] - min) / norm);
if(Math.abs(delta) > crit) {
return false;
}
@@ -326,7 +330,7 @@ public class OUTRES<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outl
public KernelDensityEstimator(Relation<V> relation) {
super();
this.relation = relation;
- dim = DatabaseUtil.dimensionality(relation);
+ dim = RelationUtil.dimensionality(relation);
hopttwo = optimalBandwidth(2);
epsilons = new double[dim + 1];
Arrays.fill(epsilons, Double.NEGATIVE_INFINITY);
@@ -337,15 +341,15 @@ public class OUTRES<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outl
* Compute density in the given subspace.
*
* @param subspace Subspace
- * @param neighbours Neighbor distance list
+ * @param neighbors Neighbor distance list
* @return Density
*/
- protected double subspaceDensity(BitSet subspace, List<DoubleDistanceResultPair> neighbours) {
+ protected double subspaceDensity(BitSet subspace, DoubleDistanceDBIDList neighbors) {
final double bandwidth = optimalBandwidth(subspace.cardinality());
double density = 0;
- for(DoubleDistanceResultPair pair : neighbours) {
- double v = pair.getDoubleDistance() / bandwidth;
+ for (DoubleDistanceDBIDResultIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ double v = neighbor.doubleDistance() / bandwidth;
if(v < 1) {
density += 1 - (v * v);
}
@@ -363,7 +367,7 @@ public class OUTRES<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outl
protected double optimalBandwidth(int dim) {
// Pi in the publication is redundant and cancels out!
double hopt = 8 * GammaDistribution.gamma(dim / 2.0 + 1) * (dim + 4) * Math.pow(2, dim);
- return hopt * Math.pow(relation.size(), (-1 / (dim + 4)));
+ return hopt * Math.pow(relation.size(), (-1. / (dim + 4)));
}
/**
@@ -385,7 +389,7 @@ public class OUTRES<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outl
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
@Override
@@ -400,11 +404,11 @@ public class OUTRES<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outl
*
* @apiviz.exclude
*/
- public static class Parameterizer<O extends NumberVector<O, ?>> extends AbstractParameterizer {
+ public static class Parameterizer<O extends NumberVector<?>> extends AbstractParameterizer {
/**
* Option ID for Epsilon parameter
*/
- public static final OptionID D_ID = OptionID.getOrCreateOptionID("outres.epsilon", "Range value for OUTRES in 2 dimensions.");
+ public static final OptionID D_ID = new OptionID("outres.epsilon", "Range value for OUTRES in 2 dimensions.");
/**
* Query radius
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OutRankS1.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OutRankS1.java
index e370d2bf..79243213 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OutRankS1.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OutRankS1.java
@@ -78,7 +78,7 @@ public class OutRankS1 extends AbstractAlgorithm<OutlierResult> implements Outli
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(OutRankS1.class);
+ private static final Logging LOG = Logging.getLogger(OutRankS1.class);
/**
* Clustering algorithm to run.
@@ -110,23 +110,23 @@ public class OutRankS1 extends AbstractAlgorithm<OutlierResult> implements Outli
Clustering<? extends SubspaceModel<?>> clustering = clusteralg.run(database);
WritableDoubleDataStore score = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT);
- for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
score.putDouble(iter, 0);
}
int maxdim = 0, maxsize = 0;
// Find maximum dimensionality and cluster size
- for(Cluster<? extends SubspaceModel<?>> cluster : clustering.getAllClusters()) {
+ for (Cluster<? extends SubspaceModel<?>> cluster : clustering.getAllClusters()) {
maxsize = Math.max(maxsize, cluster.size());
maxdim = Math.max(maxdim, cluster.getModel().getDimensions().cardinality());
}
// Iterate over all clusters:
DoubleMinMax minmax = new DoubleMinMax();
- for(Cluster<? extends SubspaceModel<?>> cluster : clustering.getAllClusters()) {
+ for (Cluster<? extends SubspaceModel<?>> cluster : clustering.getAllClusters()) {
double relsize = cluster.size() / (double) maxsize;
double reldim = cluster.getModel().getDimensions().cardinality() / (double) maxdim;
// Process objects in the cluster
- for(DBIDIter iter = cluster.getIDs().iter(); iter.valid(); iter.advance()) {
+ for (DBIDIter iter = cluster.getIDs().iter(); iter.valid(); iter.advance()) {
double newscore = score.doubleValue(iter) + alpha * relsize + (1 - alpha) * reldim;
score.putDouble(iter, newscore);
minmax.put(newscore);
@@ -147,7 +147,7 @@ public class OutRankS1 extends AbstractAlgorithm<OutlierResult> implements Outli
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -161,12 +161,12 @@ public class OutRankS1 extends AbstractAlgorithm<OutlierResult> implements Outli
/**
* Clustering algorithm to use.
*/
- public static final OptionID ALGORITHM_ID = OptionID.getOrCreateOptionID("outrank.algorithm", "Subspace clustering algorithm to use.");
+ public static final OptionID ALGORITHM_ID = new OptionID("outrank.algorithm", "Subspace clustering algorithm to use.");
/**
* Alpha parameter for S1
*/
- public static final OptionID ALPHA_ID = OptionID.getOrCreateOptionID("outrank.s1.alpha", "Alpha parameter for S1 score.");
+ public static final OptionID ALPHA_ID = new OptionID("outrank.s1.alpha", "Alpha parameter for S1 score.");
/**
* Clustering algorithm to run.
@@ -182,12 +182,13 @@ public class OutRankS1 extends AbstractAlgorithm<OutlierResult> implements Outli
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
ObjectParameter<SubspaceClusteringAlgorithm<? extends SubspaceModel<?>>> algP = new ObjectParameter<SubspaceClusteringAlgorithm<? extends SubspaceModel<?>>>(ALGORITHM_ID, SubspaceClusteringAlgorithm.class);
- if(config.grab(algP)) {
+ if (config.grab(algP)) {
algorithm = algP.instantiateClass(config);
}
- DoubleParameter alphaP = new DoubleParameter(ALPHA_ID, new GreaterConstraint(0), 0.25);
- if(config.grab(alphaP)) {
- alpha = alphaP.getValue();
+ DoubleParameter alphaP = new DoubleParameter(ALPHA_ID, 0.25);
+ alphaP.addConstraint(new GreaterConstraint(0));
+ if (config.grab(alphaP)) {
+ alpha = alphaP.doubleValue();
}
}
@@ -196,4 +197,4 @@ public class OutRankS1 extends AbstractAlgorithm<OutlierResult> implements Outli
return new OutRankS1(algorithm, alpha);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/SOD.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/SOD.java
index 7fef95e0..35a780cd 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/SOD.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/SOD.java
@@ -36,14 +36,15 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair;
import de.lmu.ifi.dbs.elki.database.query.similarity.SimilarityQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.distance.similarityfunction.SharedNearestNeighborSimilarityFunction;
@@ -57,7 +58,6 @@ import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.textwriter.TextWriteable;
import de.lmu.ifi.dbs.elki.result.textwriter.TextWriterStream;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap;
import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TiedTopBoundedHeap;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
@@ -70,10 +70,10 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameteriz
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
-import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair;
/**
- * Subspace Outlier Degree. Outlier detection method for axis-parallel subspaces.
+ * Subspace Outlier Degree. Outlier detection method for axis-parallel
+ * subspaces.
*
* Reference:
* <p>
@@ -89,34 +89,35 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair;
* @apiviz.has SharedNearestNeighborSimilarityFunction
*
* @param <V> the type of NumberVector handled by this Algorithm
+ * @param <D> distance type
*/
// todo arthur comment
@Title("SOD: Subspace outlier degree")
@Description("Outlier Detection in Axis-Parallel Subspaces of High Dimensional Data")
@Reference(authors = "H.-P. Kriegel, P. Kröger, E. Schubert, A. Zimek", title = "Outlier Detection in Axis-Parallel Subspaces of High Dimensional Data", booktitle = "Proceedings of the 13th Pacific-Asia Conference on Knowledge Discovery and Data Mining (PAKDD), Bangkok, Thailand, 2009", url = "http://dx.doi.org/10.1007/978-3-642-01307-2")
-public class SOD<V extends NumberVector<V, ?>, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
+public class SOD<V extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(SOD.class);
+ private static final Logging LOG = Logging.getLogger(SOD.class);
/**
* Parameter to specify the number of shared nearest neighbors to be
* considered for learning the subspace properties., must be an integer
* greater than 0.
*/
- public static final OptionID KNN_ID = OptionID.getOrCreateOptionID("sod.knn", "The number of most snn-similar objects to use as reference set for learning the subspace properties.");
+ public static final OptionID KNN_ID = new OptionID("sod.knn", "The number of most snn-similar objects to use as reference set for learning the subspace properties.");
/**
* Parameter to indicate the multiplier for the discriminance value for
* discerning small from large variances.
*/
- public static final OptionID ALPHA_ID = OptionID.getOrCreateOptionID("sod.alpha", "The multiplier for the discriminance value for discerning small from large variances.");
+ public static final OptionID ALPHA_ID = new OptionID("sod.alpha", "The multiplier for the discriminance value for discerning small from large variances.");
/**
* Parameter for the similarity function.
*/
- public static final OptionID SIM_ID = OptionID.getOrCreateOptionID("sod.similarity", "The similarity function used for the neighborhood set.");
+ public static final OptionID SIM_ID = new OptionID("sod.similarity", "The similarity function used for the neighborhood set.");
/**
* Holds the value of {@link #KNN_ID}.
@@ -155,20 +156,20 @@ public class SOD<V extends NumberVector<V, ?>, D extends NumberDistance<D, ?>> e
*/
public OutlierResult run(Relation<V> relation) {
SimilarityQuery<V, D> snnInstance = similarityFunction.instantiate(relation);
- FiniteProgress progress = logger.isVerbose() ? new FiniteProgress("Assigning Subspace Outlier Degree", relation.size(), logger) : null;
+ FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Assigning Subspace Outlier Degree", relation.size(), LOG) : null;
WritableDataStore<SODModel<?>> sod_models = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, SODModel.class);
DoubleMinMax minmax = new DoubleMinMax();
- for(DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
- if(progress != null) {
- progress.incrementProcessed(logger);
+ for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
+ if (progress != null) {
+ progress.incrementProcessed(LOG);
}
DBIDs knnList = getNearestNeighbors(relation, snnInstance, iter);
SODModel<V> model = new SODModel<V>(relation, knnList, alpha, relation.get(iter));
sod_models.put(iter, model);
minmax.put(model.getSod());
}
- if(progress != null) {
- progress.ensureCompleted(logger);
+ if (progress != null) {
+ progress.ensureCompleted(LOG);
}
// combine results.
Relation<SODModel<?>> models = new MaterializedRelation<SODModel<?>>("Subspace Outlier Model", "sod-outlier", new SimpleTypeInformation<SODModel<?>>(SODModel.class), sod_models, relation.getDBIDs());
@@ -193,20 +194,19 @@ public class SOD<V extends NumberVector<V, ?>, D extends NumberDistance<D, ?>> e
*/
private DBIDs getNearestNeighbors(Relation<V> relation, SimilarityQuery<V, D> simQ, DBIDRef queryObject) {
// similarityFunction.getPreprocessor().getParameters();
- Heap<DoubleObjPair<DBID>> nearestNeighbors = new TiedTopBoundedHeap<DoubleObjPair<DBID>>(knn);
- for(DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
- if(!iter.sameDBID(queryObject)) {
+ Heap<DoubleDBIDPair> nearestNeighbors = new TiedTopBoundedHeap<DoubleDBIDPair>(knn);
+ for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
+ if (!DBIDUtil.equal(iter, queryObject)) {
double sim = simQ.similarity(queryObject, iter).doubleValue();
- if(sim > 0) {
- nearestNeighbors.add(new DoubleObjPair<DBID>(sim, iter.getDBID()));
+ if (sim > 0) {
+ nearestNeighbors.add(DBIDUtil.newPair(sim, iter));
}
}
}
// Collect DBIDs
ArrayModifiableDBIDs dbids = DBIDUtil.newArray(nearestNeighbors.size());
- while(nearestNeighbors.size() > 0) {
- final DoubleObjPair<DBID> next = nearestNeighbors.poll();
- dbids.add(next.second);
+ while (nearestNeighbors.size() > 0) {
+ dbids.add(nearestNeighbors.poll());
}
return dbids;
}
@@ -218,17 +218,17 @@ public class SOD<V extends NumberVector<V, ?>, D extends NumberDistance<D, ?>> e
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
- *
+ * SOD Model class
*
* @author Arthur Zimek
* @param <V> the type of DatabaseObjects handled by this Result
*/
// TODO: arthur comment
- public static class SODModel<V extends NumberVector<V, ?>> implements TextWriteable, Comparable<SODModel<?>> {
+ public static class SODModel<V extends NumberVector<?>> implements TextWriteable, Comparable<SODModel<?>> {
private double[] centerValues;
private V center;
@@ -250,61 +250,60 @@ public class SOD<V extends NumberVector<V, ?>, D extends NumberDistance<D, ?>> e
* @param queryObject Query object
*/
public SODModel(Relation<V> relation, DBIDs neighborhood, double alpha, V queryObject) {
- if(neighborhood.size() > 0) {
+ if (neighborhood.size() > 0) {
// TODO: store database link?
- centerValues = new double[DatabaseUtil.dimensionality(relation)];
+ centerValues = new double[RelationUtil.dimensionality(relation)];
variances = new double[centerValues.length];
- for(DBIDIter iter = neighborhood.iter(); iter.valid(); iter.advance()) {
+ for (DBIDIter iter = neighborhood.iter(); iter.valid(); iter.advance()) {
V databaseObject = relation.get(iter);
- for(int d = 0; d < centerValues.length; d++) {
- centerValues[d] += databaseObject.doubleValue(d + 1);
+ for (int d = 0; d < centerValues.length; d++) {
+ centerValues[d] += databaseObject.doubleValue(d);
}
}
- for(int d = 0; d < centerValues.length; d++) {
+ for (int d = 0; d < centerValues.length; d++) {
centerValues[d] /= neighborhood.size();
}
- for(DBIDIter iter = neighborhood.iter(); iter.valid(); iter.advance()) {
+ for (DBIDIter iter = neighborhood.iter(); iter.valid(); iter.advance()) {
V databaseObject = relation.get(iter);
- for(int d = 0; d < centerValues.length; d++) {
+ for (int d = 0; d < centerValues.length; d++) {
// distance
- double distance = centerValues[d] - databaseObject.doubleValue(d + 1);
+ double distance = centerValues[d] - databaseObject.doubleValue(d);
// variance
variances[d] += distance * distance;
}
}
expectationOfVariance = 0;
- for(int d = 0; d < variances.length; d++) {
+ for (int d = 0; d < variances.length; d++) {
variances[d] /= neighborhood.size();
expectationOfVariance += variances[d];
}
expectationOfVariance /= variances.length;
weightVector = new BitSet(variances.length);
- for(int d = 0; d < variances.length; d++) {
- if(variances[d] < alpha * expectationOfVariance) {
+ for (int d = 0; d < variances.length; d++) {
+ if (variances[d] < alpha * expectationOfVariance) {
weightVector.set(d, true);
}
}
- center = DatabaseUtil.assumeVectorField(relation).getFactory().newNumberVector(centerValues);
+ center = RelationUtil.getNumberVectorFactory(relation).newNumberVector(centerValues);
sod = subspaceOutlierDegree(queryObject, center, weightVector);
- }
- else {
+ } else {
center = queryObject;
sod = 0.0;
}
}
/**
- * Compute SOD score
+ * Compute SOD score.
*
- * @param queryObject
- * @param center
- * @param weightVector
- * @return sod value
+ * @param queryObject Query object
+ * @param center Center vector
+ * @param weightVector Weight vector
+ * @return sod score
*/
private double subspaceOutlierDegree(V queryObject, V center, BitSet weightVector) {
final SubspaceEuclideanDistanceFunction df = new SubspaceEuclideanDistanceFunction(weightVector);
final int card = weightVector.cardinality();
- if(card == 0) {
+ if (card == 0) {
return 0;
}
double distance = df.distance(queryObject, center).doubleValue();
@@ -352,7 +351,7 @@ public class SOD<V extends NumberVector<V, ?>, D extends NumberDistance<D, ?>> e
Relation<SODModel<?>> models;
/**
- * The IDs we are defined for
+ * The IDs we are defined for.
*/
DBIDs dbids;
@@ -436,7 +435,7 @@ public class SOD<V extends NumberVector<V, ?>, D extends NumberDistance<D, ?>> e
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<V, ?>, D extends NumberDistance<D, ?>> extends AbstractParameterizer {
+ public static class Parameterizer<V extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractParameterizer {
/**
* Holds the value of {@link #KNN_ID}.
*/
@@ -456,18 +455,20 @@ public class SOD<V extends NumberVector<V, ?>, D extends NumberDistance<D, ?>> e
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
final ObjectParameter<SimilarityFunction<V, D>> simP = new ObjectParameter<SimilarityFunction<V, D>>(SIM_ID, SimilarityFunction.class, SharedNearestNeighborSimilarityFunction.class);
- if(config.grab(simP)) {
+ if (config.grab(simP)) {
similarityFunction = simP.instantiateClass(config);
}
- final IntParameter knnP = new IntParameter(KNN_ID, new GreaterConstraint(0));
- if(config.grab(knnP)) {
+ final IntParameter knnP = new IntParameter(KNN_ID);
+ knnP.addConstraint(new GreaterConstraint(0));
+ if (config.grab(knnP)) {
knn = knnP.getValue();
}
- final DoubleParameter alphaP = new DoubleParameter(ALPHA_ID, new GreaterConstraint(0), 1.1);
- if(config.grab(alphaP)) {
- alpha = alphaP.getValue();
+ final DoubleParameter alphaP = new DoubleParameter(ALPHA_ID, 1.1);
+ alphaP.addConstraint(new GreaterConstraint(0));
+ if (config.grab(alphaP)) {
+ alpha = alphaP.doubleValue();
}
}
@@ -476,4 +477,4 @@ public class SOD<V extends NumberVector<V, ?>, D extends NumberDistance<D, ?>> e
return new SOD<V, D>(knn, alpha, similarityFunction);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/ByLabelOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/ByLabelOutlier.java
index 66a89cf5..ae95abfa 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/ByLabelOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/ByLabelOutlier.java
@@ -57,7 +57,7 @@ public class ByLabelOutlier extends AbstractAlgorithm<OutlierResult> implements
/**
* Our logger.
*/
- private static final Logging logger = Logging.getLogger(ByLabelOutlier.class);
+ private static final Logging LOG = Logging.getLogger(ByLabelOutlier.class);
/**
* The default pattern to use.
@@ -124,7 +124,7 @@ public class ByLabelOutlier extends AbstractAlgorithm<OutlierResult> implements
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -145,7 +145,7 @@ public class ByLabelOutlier extends AbstractAlgorithm<OutlierResult> implements
* Key: {@code -outlier.pattern}
* </p>
*/
- public static final OptionID OUTLIER_PATTERN_ID = OptionID.getOrCreateOptionID("outlier.pattern", "Label pattern to match outliers.");
+ public static final OptionID OUTLIER_PATTERN_ID = new OptionID("outlier.pattern", "Label pattern to match outliers.");
/**
* Stores the "outlier" class.
*/
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAllOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAllOutlier.java
index b50226f1..35a85d51 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAllOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAllOutlier.java
@@ -48,7 +48,7 @@ public class TrivialAllOutlier extends AbstractAlgorithm<OutlierResult> implemen
/**
* Our logger.
*/
- private static final Logging logger = Logging.getLogger(TrivialAllOutlier.class);
+ private static final Logging LOG = Logging.getLogger(TrivialAllOutlier.class);
/**
* Constructor.
@@ -80,6 +80,6 @@ public class TrivialAllOutlier extends AbstractAlgorithm<OutlierResult> implemen
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialGeneratedOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialGeneratedOutlier.java
index d1c2e076..e4c3861f 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialGeneratedOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialGeneratedOutlier.java
@@ -65,12 +65,12 @@ public class TrivialGeneratedOutlier extends AbstractAlgorithm<OutlierResult> im
/**
* Class logger
*/
- private static final Logging logger = Logging.getLogger(TrivialGeneratedOutlier.class);
+ private static final Logging LOG = Logging.getLogger(TrivialGeneratedOutlier.class);
/**
* Expected share of outliers
*/
- public static final OptionID EXPECT_ID = OptionID.getOrCreateOptionID("modeloutlier.expect", "Expected amount of outliers, for making the scores more intuitive.");
+ public static final OptionID EXPECT_ID = new OptionID("modeloutlier.expect", "Expected amount of outliers, for making the scores more intuitive.");
/**
* Expected share of outliers.
@@ -101,7 +101,7 @@ public class TrivialGeneratedOutlier extends AbstractAlgorithm<OutlierResult> im
@Override
public OutlierResult run(Database database) {
- Relation<NumberVector<?, ?>> vecs = database.getRelation(TypeUtil.NUMBER_VECTOR_FIELD);
+ Relation<NumberVector<?>> vecs = database.getRelation(TypeUtil.NUMBER_VECTOR_FIELD);
Relation<Model> models = database.getRelation(new SimpleTypeInformation<Model>(Model.class));
// Prefer a true class label
try {
@@ -122,7 +122,7 @@ public class TrivialGeneratedOutlier extends AbstractAlgorithm<OutlierResult> im
* @param labels Label relation
* @return Outlier result
*/
- public OutlierResult run(Relation<Model> models, Relation<NumberVector<?, ?>> vecs, Relation<?> labels) {
+ public OutlierResult run(Relation<Model> models, Relation<NumberVector<?>> vecs, Relation<?> labels) {
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(models.getDBIDs(), DataStoreFactory.HINT_HOT);
// Adjustment constant
@@ -136,7 +136,7 @@ public class TrivialGeneratedOutlier extends AbstractAlgorithm<OutlierResult> im
}
}
if(generators.size() == 0) {
- logger.warning("No generator models found for dataset - all points will be considered outliers.");
+ LOG.warning("No generator models found for dataset - all points will be considered outliers.");
}
for(DBIDIter iditer = models.iterDBIDs(); iditer.valid(); iditer.advance()) {
@@ -179,7 +179,7 @@ public class TrivialGeneratedOutlier extends AbstractAlgorithm<OutlierResult> im
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialNoOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialNoOutlier.java
index 6d8e9f46..695ff112 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialNoOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialNoOutlier.java
@@ -48,7 +48,7 @@ public class TrivialNoOutlier extends AbstractAlgorithm<OutlierResult> implement
/**
* Our logger.
*/
- private static final Logging logger = Logging.getLogger(TrivialNoOutlier.class);
+ private static final Logging LOG = Logging.getLogger(TrivialNoOutlier.class);
/**
* Constructor.
@@ -80,6 +80,6 @@ public class TrivialNoOutlier extends AbstractAlgorithm<OutlierResult> implement
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/package-info.java
index c18579f0..44eb2aba 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/package-info.java
@@ -5,6 +5,9 @@
* the {@link de.lmu.ifi.dbs.elki.algorithm.Algorithm}-Interface.
* Basic functions are already provided within {@link de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm},
* see there for basic instructions of how to implement an algorithm suitable to the framework.
+ *
+ * @apiviz.exclude workflow.AlgorithmStep
+ * @apiviz.exclude database.query.knn.KNNQuery
*/
/*
This file is part of ELKI:
@@ -28,4 +31,4 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-package de.lmu.ifi.dbs.elki.algorithm; \ No newline at end of file
+package de.lmu.ifi.dbs.elki.algorithm;
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/statistics/AddSingleScale.java b/src/de/lmu/ifi/dbs/elki/algorithm/statistics/AddSingleScale.java
index 481261b3..159fb691 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/statistics/AddSingleScale.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/statistics/AddSingleScale.java
@@ -27,30 +27,45 @@ import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.math.scales.LinearScale;
import de.lmu.ifi.dbs.elki.result.Result;
import de.lmu.ifi.dbs.elki.result.ResultUtil;
import de.lmu.ifi.dbs.elki.result.ScalesResult;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayLikeUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.ListSizeConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleListParameter;
/**
- * Pseudo "algorith" that computes the global min/max for a relation across all
+ * Pseudo "algorithm" that computes the global min/max for a relation across all
* attributes.
*
+ * FIXME: this should become part of relation metadata.
+ *
* @author Erich Schubert
*/
@Description("Setup a scaling so that all dimensions are scaled equally in visualization.")
public class AddSingleScale implements Algorithm {
/**
+ * Minimum and maximum to use.
+ */
+ double[] minmax = null;
+
+ /**
* Constructor.
+ *
+ * @param minmax Minimum and maximum values
*/
- public AddSingleScale() {
+ public AddSingleScale(double[] minmax) {
super();
+ this.minmax = minmax;
}
@SuppressWarnings("unchecked")
@@ -58,7 +73,7 @@ public class AddSingleScale implements Algorithm {
public Result run(Database database) {
for(Relation<?> rel : database.getRelations()) {
if(TypeUtil.NUMBER_VECTOR_FIELD.isAssignableFromType(rel.getDataTypeInformation())) {
- ScalesResult res = run((Relation<? extends NumberVector<?, ?>>) rel);
+ ScalesResult res = run((Relation<? extends NumberVector<?>>) rel);
ResultUtil.addChildResult(rel, res);
}
}
@@ -71,20 +86,28 @@ public class AddSingleScale implements Algorithm {
* @param rel Relation
* @return Scales
*/
- private ScalesResult run(Relation<? extends NumberVector<?, ?>> rel) {
- final int dim = DatabaseUtil.dimensionality(rel);
- DoubleMinMax minmax = new DoubleMinMax();
- for(DBIDIter iditer = rel.iterDBIDs(); iditer.valid(); iditer.advance()) {
- DBID id = iditer.getDBID();
- NumberVector<?, ?> vec = rel.get(id);
- for(int d = 1; d <= dim; d++) {
- minmax.put(vec.doubleValue(d));
+ private ScalesResult run(Relation<? extends NumberVector<?>> rel) {
+ final int dim = RelationUtil.dimensionality(rel);
+ LinearScale[] scales = new LinearScale[dim];
+ if(minmax == null) {
+ DoubleMinMax mm = new DoubleMinMax();
+ for(DBIDIter iditer = rel.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ NumberVector<?> vec = rel.get(iditer);
+ for(int d = 0; d < dim; d++) {
+ mm.put(vec.doubleValue(d));
+ }
+ }
+ LinearScale scale = new LinearScale(mm.getMin(), mm.getMax());
+ for(int i = 0; i < dim; i++) {
+ scales[i] = scale;
}
}
- LinearScale scale = new LinearScale(minmax.getMin(), minmax.getMax());
- LinearScale[] scales = new LinearScale[dim];
- for(int i = 0; i < dim; i++) {
- scales[i] = scale;
+ else {
+ // Use predefined.
+ LinearScale scale = new LinearScale(minmax[0], minmax[1]);
+ for(int i = 0; i < dim; i++) {
+ scales[i] = scale;
+ }
}
ScalesResult res = new ScalesResult(scales);
return res;
@@ -94,4 +117,39 @@ public class AddSingleScale implements Algorithm {
public TypeInformation[] getInputTypeRestriction() {
return TypeUtil.array(TypeUtil.NUMBER_VECTOR_FIELD);
}
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ /**
+ * Minimum and maximum to use.
+ */
+ double[] minmax = null;
+
+ /**
+ * Minimum and maximum values.
+ */
+ public static final OptionID MINMAX_ID = new OptionID("scales.minmax", "Forcibly set the scales to the given range.");
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ DoubleListParameter minmaxP = new DoubleListParameter(MINMAX_ID);
+ minmaxP.setOptional(true);
+ minmaxP.addConstraint(new ListSizeConstraint(2));
+ if(config.grab(minmaxP)) {
+ minmax = ArrayLikeUtil.toPrimitiveDoubleArray(minmaxP.getValue());
+ }
+ }
+
+ @Override
+ protected AddSingleScale makeInstance() {
+ return new AddSingleScale(minmax);
+ }
+ }
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/statistics/AveragePrecisionAtK.java b/src/de/lmu/ifi/dbs/elki/algorithm/statistics/AveragePrecisionAtK.java
index f6f1d16f..e8165afc 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/statistics/AveragePrecisionAtK.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/statistics/AveragePrecisionAtK.java
@@ -25,7 +25,6 @@ package de.lmu.ifi.dbs.elki.algorithm.statistics;
import java.util.ArrayList;
import java.util.Collection;
-import java.util.Iterator;
import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
import de.lmu.ifi.dbs.elki.data.DoubleVector;
@@ -33,16 +32,14 @@ import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
@@ -50,9 +47,9 @@ import de.lmu.ifi.dbs.elki.math.MeanVariance;
import de.lmu.ifi.dbs.elki.result.CollectionResult;
import de.lmu.ifi.dbs.elki.result.HistogramResult;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.IntervalConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.IntervalConstraint.IntervalBoundary;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessEqualConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -70,7 +67,7 @@ public class AveragePrecisionAtK<V extends Object, D extends NumberDistance<D, ?
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(AveragePrecisionAtK.class);
+ private static final Logging LOG = Logging.getLogger(AveragePrecisionAtK.class);
/**
* The parameter k - the number of neighbors to retrieve.
@@ -91,6 +88,7 @@ public class AveragePrecisionAtK<V extends Object, D extends NumberDistance<D, ?
* Constructor.
*
* @param distanceFunction Distance function
+ * @param k K parameter
* @param sampling Sampling rate
* @param seed Random sampling seed (may be null)
*/
@@ -118,21 +116,18 @@ public class AveragePrecisionAtK<V extends Object, D extends NumberDistance<D, ?
ids = relation.getDBIDs();
}
- if(logger.isVerbose()) {
- logger.verbose("Processing points...");
+ if(LOG.isVerbose()) {
+ LOG.verbose("Processing points...");
}
- FiniteProgress objloop = logger.isVerbose() ? new FiniteProgress("Computing nearest neighbors", ids.size(), logger) : null;
+ FiniteProgress objloop = LOG.isVerbose() ? new FiniteProgress("Computing nearest neighbors", ids.size(), LOG) : null;
// sort neighbors
for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
- DBID id = iter.getDBID();
- KNNResult<D> knn = knnQuery.getKNNForDBID(id, k);
- Object label = lrelation.get(id);
-
- int positive = 0;
- Iterator<DistanceResultPair<D>> ri = knn.iterator();
- for(int i = 0; i < k && ri.hasNext(); i++) {
- DBID nid = ri.next().getDBID();
- Object olabel = lrelation.get(nid);
+ KNNResult<D> knn = knnQuery.getKNNForDBID(iter, k);
+ Object label = lrelation.get(iter);
+
+ int positive = 0, i = 0;
+ for (DBIDIter ri = knn.iter(); i < k && ri.valid(); ri.advance(), i++) {
+ Object olabel = lrelation.get(ri);
if(label == null) {
if(olabel == null) {
positive += 1;
@@ -147,11 +142,11 @@ public class AveragePrecisionAtK<V extends Object, D extends NumberDistance<D, ?
mvs[i].put(precision);
}
if(objloop != null) {
- objloop.incrementProcessed(logger);
+ objloop.incrementProcessed(LOG);
}
}
if(objloop != null) {
- objloop.ensureCompleted(logger);
+ objloop.ensureCompleted(LOG);
}
// Collections.sort(results);
@@ -171,7 +166,7 @@ public class AveragePrecisionAtK<V extends Object, D extends NumberDistance<D, ?
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -181,29 +176,29 @@ public class AveragePrecisionAtK<V extends Object, D extends NumberDistance<D, ?
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<V, ?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<V, D> {
+ public static class Parameterizer<V extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<V, D> {
/**
* Parameter k to compute the average precision at.
*/
- private static final OptionID K_ID = OptionID.getOrCreateOptionID("avep.k", "K to compute the average precision at.");
+ private static final OptionID K_ID = new OptionID("avep.k", "K to compute the average precision at.");
/**
- * Parameter to enable sampling
+ * Parameter to enable sampling.
*/
- public static final OptionID SAMPLING_ID = OptionID.getOrCreateOptionID("avep.sampling", "Relative amount of object to sample.");
+ public static final OptionID SAMPLING_ID = new OptionID("avep.sampling", "Relative amount of object to sample.");
/**
- * Parameter to control the sampling random seed
+ * Parameter to control the sampling random seed.
*/
- public static final OptionID SEED_ID = OptionID.getOrCreateOptionID("avep.sampling-seed", "Random seed for deterministic sampling.");
+ public static final OptionID SEED_ID = new OptionID("avep.sampling-seed", "Random seed for deterministic sampling.");
/**
- * Neighborhood size
+ * Neighborhood size.
*/
protected int k = 20;
/**
- * Relative amount of data to sample
+ * Relative amount of data to sample.
*/
protected double sampling = 1.0;
@@ -215,17 +210,22 @@ public class AveragePrecisionAtK<V extends Object, D extends NumberDistance<D, ?
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final IntParameter kP = new IntParameter(K_ID, new GreaterEqualConstraint(2));
+ final IntParameter kP = new IntParameter(K_ID);
+ kP.addConstraint(new GreaterEqualConstraint(2));
if(config.grab(kP)) {
k = kP.getValue();
}
- final DoubleParameter samplingP = new DoubleParameter(SAMPLING_ID, new IntervalConstraint(0.0, IntervalBoundary.OPEN, 1.0, IntervalBoundary.CLOSE), true);
+ final DoubleParameter samplingP = new DoubleParameter(SAMPLING_ID);
+ samplingP.addConstraint(new GreaterConstraint(0.0));
+ samplingP.addConstraint(new LessEqualConstraint(1.0));
+ samplingP.setOptional(true);
if (config.grab(samplingP)) {
sampling = samplingP.getValue();
}
- final LongParameter seedP = new LongParameter(SEED_ID, true);
- if (config.grab(seedP)) {
- seed = seedP.getValue();
+ final LongParameter rndP = new LongParameter(SEED_ID);
+ rndP.setOptional(true);
+ if (config.grab(rndP)) {
+ seed = rndP.getValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/statistics/DistanceStatisticsWithClasses.java b/src/de/lmu/ifi/dbs/elki/algorithm/statistics/DistanceStatisticsWithClasses.java
index d6ce6a15..ebf588b6 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/statistics/DistanceStatisticsWithClasses.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/statistics/DistanceStatisticsWithClasses.java
@@ -42,6 +42,7 @@ import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
@@ -52,10 +53,11 @@ import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.logging.progress.StepProgress;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.math.MeanVariance;
-import de.lmu.ifi.dbs.elki.math.histograms.AggregatingHistogram;
-import de.lmu.ifi.dbs.elki.math.histograms.FlexiHistogram;
import de.lmu.ifi.dbs.elki.result.CollectionResult;
import de.lmu.ifi.dbs.elki.result.HistogramResult;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.histogram.AbstractObjDynamicHistogram;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.histogram.LongArrayStaticHistogram;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.histogram.ObjHistogram;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.exceptions.ExceptionMessages;
@@ -66,14 +68,13 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameteriz
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Parameter;
-import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair;
-import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
/**
* Algorithm to gather statistics over the distance distribution in the data
* set.
*
* @author Erich Schubert
+ *
* @param <O> Object type
* @param <D> Distance type
*/
@@ -84,22 +85,22 @@ public class DistanceStatisticsWithClasses<O, D extends NumberDistance<D, ?>> ex
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(DistanceStatisticsWithClasses.class);
+ private static final Logging LOG = Logging.getLogger(DistanceStatisticsWithClasses.class);
/**
* Flag to compute exact value range for binning.
*/
- public static final OptionID EXACT_ID = OptionID.getOrCreateOptionID("diststat.exact", "In a first pass, compute the exact minimum and maximum, at the cost of O(2*n*n) instead of O(n*n). The number of resulting bins is guaranteed to be as requested.");
+ public static final OptionID EXACT_ID = new OptionID("diststat.exact", "In a first pass, compute the exact minimum and maximum, at the cost of O(2*n*n) instead of O(n*n). The number of resulting bins is guaranteed to be as requested.");
/**
- * Flag to enable sampling
+ * Flag to enable sampling.
*/
- public static final OptionID SAMPLING_ID = OptionID.getOrCreateOptionID("diststat.sampling", "Enable sampling of O(n) size to determine the minimum and maximum distances approximately. The resulting number of bins can be larger than the given n.");
+ public static final OptionID SAMPLING_ID = new OptionID("diststat.sampling", "Enable sampling of O(n) size to determine the minimum and maximum distances approximately. The resulting number of bins can be larger than the given n.");
/**
* Option to configure the number of bins to use.
*/
- public static final OptionID HISTOGRAM_BINS_ID = OptionID.getOrCreateOptionID("diststat.bins", "Number of bins to use in the histogram. By default, it is only guaranteed to be within 1*n and 2*n of the given number.");
+ public static final OptionID HISTOGRAM_BINS_ID = new OptionID("diststat.bins", "Number of bins to use in the histogram. By default, it is only guaranteed to be within 1*n and 2*n of the given number.");
/**
* Number of bins to use in sampling.
@@ -107,12 +108,12 @@ public class DistanceStatisticsWithClasses<O, D extends NumberDistance<D, ?>> ex
private int numbin;
/**
- * Sampling
+ * Sampling flag.
*/
private boolean sampling = false;
/**
- * Sampling
+ * Compute exactly (slower).
*/
private boolean exact = false;
@@ -136,7 +137,7 @@ public class DistanceStatisticsWithClasses<O, D extends NumberDistance<D, ?>> ex
final Relation<O> relation = database.getRelation(getInputTypeRestriction()[0]);
final DistanceQuery<O, D> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
- final StepProgress stepprog = logger.isVerbose() ? new StepProgress("Distance statistics", 2) : null;
+ final StepProgress stepprog = LOG.isVerbose() ? new StepProgress("Distance statistics", 2) : null;
// determine binning ranges.
DoubleMinMax gminmax = new DoubleMinMax();
@@ -157,43 +158,71 @@ public class DistanceStatisticsWithClasses<O, D extends NumberDistance<D, ?>> ex
MeanVariance momax = new MeanVariance();
MeanVariance modif = new MeanVariance();
// Histogram
- final AggregatingHistogram<Pair<Long, Long>, Pair<Long, Long>> histogram;
- if(stepprog != null) {
- stepprog.beginStep(1, "Prepare histogram.", logger);
+ final ObjHistogram<long[]> histogram;
+ if (stepprog != null) {
+ stepprog.beginStep(1, "Prepare histogram.", LOG);
}
- if(exact) {
+ if (exact) {
gminmax = exactMinMax(relation, distFunc);
- histogram = AggregatingHistogram.LongSumLongSumHistogram(numbin, gminmax.getMin(), gminmax.getMax());
- }
- else if(sampling) {
+ histogram = new LongArrayStaticHistogram(numbin, gminmax.getMin(), gminmax.getMax(), 2);
+ } else if (sampling) {
gminmax = sampleMinMax(relation, distFunc);
- histogram = AggregatingHistogram.LongSumLongSumHistogram(numbin, gminmax.getMin(), gminmax.getMax());
- }
- else {
- histogram = FlexiHistogram.LongSumLongSumHistogram(numbin);
+ histogram = new LongArrayStaticHistogram(numbin, gminmax.getMin(), gminmax.getMax(), 2);
+ } else {
+ histogram = new AbstractObjDynamicHistogram<long[]>(numbin) {
+ @Override
+ protected long[] downsample(Object[] data, int start, int end, int size) {
+ long[] ret = new long[2];
+ for (int i = start; i < end; i++) {
+ long[] existing = (long[]) data[i];
+ if (existing != null) {
+ for (int c = 0; c < 2; c++) {
+ ret[c] += existing[c];
+ }
+ }
+ }
+ return ret;
+ }
+
+ @Override
+ protected long[] aggregate(long[] first, long[] second) {
+ for (int c = 0; c < 2; c++) {
+ first[c] += second[c];
+ }
+ return first;
+ }
+
+ @Override
+ protected long[] cloneForCache(long[] data) {
+ return data.clone();
+ }
+
+ @Override
+ protected long[] makeObject() {
+ return new long[2];
+ }
+ };
}
- if(stepprog != null) {
- stepprog.beginStep(2, "Build histogram.", logger);
+ if (stepprog != null) {
+ stepprog.beginStep(2, "Build histogram.", LOG);
}
- final FiniteProgress progress = logger.isVerbose() ? new FiniteProgress("Distance computations", relation.size(), logger) : null;
+ final FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Distance computations", relation.size(), LOG) : null;
// iterate per cluster
- final Pair<Long, Long> incFirst = new Pair<Long, Long>(1L, 0L);
- final Pair<Long, Long> incSecond = new Pair<Long, Long>(0L, 1L);
- for(Cluster<?> c1 : split) {
- for(DBIDIter iter = c1.getIDs().iter(); iter.valid(); iter.advance()) {
- DBID id1 = iter.getDBID();
+ final long[] incFirst = new long[] { 1L, 0L };
+ final long[] incSecond = new long[] { 0L, 1L };
+ for (Cluster<?> c1 : split) {
+ for (DBIDIter id1 = c1.getIDs().iter(); id1.valid(); id1.advance()) {
// in-cluster distances
DoubleMinMax iminmax = new DoubleMinMax();
- for(DBIDIter iter2 = c1.getIDs().iter(); iter2.valid(); iter2.advance()) {
- DBID id2 = iter2.getDBID();
+ for (DBIDIter iter2 = c1.getIDs().iter(); iter2.valid(); iter2.advance()) {
// skip the point itself.
- if(id1.sameDBID(id2)) {
+ if (DBIDUtil.equal(id1, iter2)) {
continue;
}
- double d = distFunc.distance(id1, id2).doubleValue();
+ double d = distFunc.distance(id1, iter2).doubleValue();
- histogram.aggregate(d, incFirst);
+ histogram.putData(d, incFirst);
iminmax.put(d);
}
@@ -207,19 +236,18 @@ public class DistanceStatisticsWithClasses<O, D extends NumberDistance<D, ?>> ex
// other-cluster distances
DoubleMinMax ominmax = new DoubleMinMax();
- for(Cluster<?> c2 : split) {
- if(c2 == c1) {
+ for (Cluster<?> c2 : split) {
+ if (c2 == c1) {
continue;
}
- for(DBIDIter iter2 = c2.getIDs().iter(); iter2.valid(); iter2.advance()) {
- DBID id2 = iter2.getDBID();
+ for (DBIDIter iter2 = c2.getIDs().iter(); iter2.valid(); iter2.advance()) {
// skip the point itself (shouldn't happen though)
- if(id1.sameDBID(id2)) {
+ if (DBIDUtil.equal(id1, iter2)) {
continue;
}
- double d = distFunc.distance(id1, id2).doubleValue();
+ double d = distFunc.distance(id1, iter2).doubleValue();
- histogram.aggregate(d, incSecond);
+ histogram.putData(d, incSecond);
ominmax.put(d);
}
@@ -231,38 +259,39 @@ public class DistanceStatisticsWithClasses<O, D extends NumberDistance<D, ?>> ex
// min/max
gominmax.put(ominmax.getMin());
gominmax.put(ominmax.getMax());
- if(progress != null) {
- progress.incrementProcessed(logger);
+ if (progress != null) {
+ progress.incrementProcessed(LOG);
}
}
}
- if(progress != null) {
- progress.ensureCompleted(logger);
+ if (progress != null) {
+ progress.ensureCompleted(LOG);
}
// Update values (only needed for sampling case).
gminmax.setFirst(Math.min(giminmax.getMin(), gominmax.getMin()));
gminmax.setSecond(Math.max(giminmax.getMax(), gominmax.getMax()));
- if(stepprog != null) {
- stepprog.setCompleted(logger);
+ if (stepprog != null) {
+ stepprog.setCompleted(LOG);
}
// count the number of samples we have in the data
long inum = 0;
long onum = 0;
- for(DoubleObjPair<Pair<Long, Long>> ppair : histogram) {
- inum += ppair.getSecond().getFirst();
- onum += ppair.getSecond().getSecond();
+ for (ObjHistogram.Iter<long[]> iter = histogram.iter(); iter.valid(); iter.advance()) {
+ inum += iter.getValue()[0];
+ onum += iter.getValue()[1];
}
long bnum = inum + onum;
Collection<DoubleVector> binstat = new ArrayList<DoubleVector>(numbin);
- for(DoubleObjPair<Pair<Long, Long>> ppair : histogram) {
- final double icof = (inum == 0) ? 0 : ((double) ppair.getSecond().getFirst()) / inum / histogram.getBinsize();
- final double icaf = ((double) ppair.getSecond().getFirst()) / bnum / histogram.getBinsize();
- final double ocof = (onum == 0) ? 0 : ((double) ppair.getSecond().getSecond()) / onum / histogram.getBinsize();
- final double ocaf = ((double) ppair.getSecond().getSecond()) / bnum / histogram.getBinsize();
- DoubleVector row = new DoubleVector(new double[] { ppair.first, icof, icaf, ocof, ocaf });
+ for (ObjHistogram.Iter<long[]> iter = histogram.iter(); iter.valid(); iter.advance()) {
+ final long[] value = iter.getValue();
+ final double icof = (inum == 0) ? 0 : ((double) value[0]) / inum / histogram.getBinsize();
+ final double icaf = ((double) value[0]) / bnum / histogram.getBinsize();
+ final double ocof = (onum == 0) ? 0 : ((double) value[1]) / onum / histogram.getBinsize();
+ final double ocaf = ((double) value[1]) / bnum / histogram.getBinsize();
+ DoubleVector row = new DoubleVector(new double[] { iter.getCenter(), icof, icaf, ocof, ocaf });
binstat.add(row);
}
HistogramResult<DoubleVector> result = new HistogramResult<DoubleVector>("Distance Histogram", "distance-histogram", binstat);
@@ -278,111 +307,121 @@ public class DistanceStatisticsWithClasses<O, D extends NumberDistance<D, ?>> ex
return result;
}
- private DoubleMinMax sampleMinMax(Relation<O> database, DistanceQuery<O, D> distFunc) {
- int size = database.size();
+ /**
+ * Estimate minimum and maximum via sampling.
+ *
+ * @param relation Relation to process
+ * @param distFunc Distance function to use
+ * @return Minimum and maximum
+ */
+ private DoubleMinMax sampleMinMax(Relation<O> relation, DistanceQuery<O, D> distFunc) {
+ int size = relation.size();
Random rnd = new Random();
// estimate minimum and maximum.
- int k = (int) Math.max(25, Math.pow(database.size(), 0.2));
- TreeSet<DoubleObjPair<DBID>> minhotset = new TreeSet<DoubleObjPair<DBID>>();
- TreeSet<DoubleObjPair<DBID>> maxhotset = new TreeSet<DoubleObjPair<DBID>>(Collections.reverseOrder());
+ int k = (int) Math.max(25, Math.pow(relation.size(), 0.2));
+ TreeSet<DoubleDBIDPair> minhotset = new TreeSet<DoubleDBIDPair>();
+ TreeSet<DoubleDBIDPair> maxhotset = new TreeSet<DoubleDBIDPair>(Collections.reverseOrder());
- int randomsize = (int) Math.max(25, Math.pow(database.size(), 0.2));
+ int randomsize = (int) Math.max(25, Math.pow(relation.size(), 0.2));
double rprob = ((double) randomsize) / size;
ArrayModifiableDBIDs randomset = DBIDUtil.newArray(randomsize);
- DBIDIter iter = database.iterDBIDs();
- if(!iter.valid()) {
+ DBIDIter iter = relation.iterDBIDs();
+ if (!iter.valid()) {
throw new IllegalStateException(ExceptionMessages.DATABASE_EMPTY);
}
- DBID firstid = iter.getDBID();
+ DBID firstid = DBIDUtil.deref(iter);
iter.advance();
- minhotset.add(new DoubleObjPair<DBID>(Double.MAX_VALUE, firstid));
- maxhotset.add(new DoubleObjPair<DBID>(Double.MIN_VALUE, firstid));
- while(iter.valid()) {
- DBID id1 = iter.getDBID();
- iter.advance();
+ minhotset.add(DBIDUtil.newPair(Double.MAX_VALUE, firstid));
+ maxhotset.add(DBIDUtil.newPair(Double.MIN_VALUE, firstid));
+ for (; iter.valid(); iter.advance()) {
// generate candidates for min distance.
- ArrayList<DoubleObjPair<DBID>> np = new ArrayList<DoubleObjPair<DBID>>(k * 2 + randomsize * 2);
- for(DoubleObjPair<DBID> pair : minhotset) {
- DBID id2 = pair.getSecond();
+ ArrayList<DoubleDBIDPair> np = new ArrayList<DoubleDBIDPair>(k * 2 + randomsize * 2);
+ for (DoubleDBIDPair pair : minhotset) {
// skip the object itself
- if(id1.compareTo(id2) == 0) {
+ if (DBIDUtil.equal(iter, pair)) {
continue;
}
- double d = distFunc.distance(id1, id2).doubleValue();
- np.add(new DoubleObjPair<DBID>(d, id1));
- np.add(new DoubleObjPair<DBID>(d, id2));
+ double d = distFunc.distance(iter, pair).doubleValue();
+ np.add(DBIDUtil.newPair(d, iter));
+ np.add(DBIDUtil.newPair(d, pair));
}
- for(DBIDIter iter2 = randomset.iter(); iter2.valid(); iter2.advance()) {
- DBID id2 = iter2.getDBID();
- double d = distFunc.distance(id1, id2).doubleValue();
- np.add(new DoubleObjPair<DBID>(d, id1));
- np.add(new DoubleObjPair<DBID>(d, id2));
+ for (DBIDIter iter2 = randomset.iter(); iter2.valid(); iter2.advance()) {
+ double d = distFunc.distance(iter, iter2).doubleValue();
+ np.add(DBIDUtil.newPair(d, iter));
+ np.add(DBIDUtil.newPair(d, iter2));
}
minhotset.addAll(np);
shrinkHeap(minhotset, k);
// generate candidates for max distance.
- ArrayList<DoubleObjPair<DBID>> np2 = new ArrayList<DoubleObjPair<DBID>>(k * 2 + randomsize * 2);
- for(DoubleObjPair<DBID> pair : minhotset) {
- DBID id2 = pair.getSecond();
+ ArrayList<DoubleDBIDPair> np2 = new ArrayList<DoubleDBIDPair>(k * 2 + randomsize * 2);
+ for (DoubleDBIDPair pair : minhotset) {
// skip the object itself
- if(id1.compareTo(id2) == 0) {
+ if (DBIDUtil.equal(iter, pair)) {
continue;
}
- double d = distFunc.distance(id1, id2).doubleValue();
- np2.add(new DoubleObjPair<DBID>(d, id1));
- np2.add(new DoubleObjPair<DBID>(d, id2));
+ double d = distFunc.distance(iter, pair).doubleValue();
+ np2.add(DBIDUtil.newPair(d, iter));
+ np2.add(DBIDUtil.newPair(d, pair));
}
- for(DBIDIter iter2 = randomset.iter(); iter2.valid(); iter2.advance()) {
- DBID id2 = iter2.getDBID();
- double d = distFunc.distance(id1, id2).doubleValue();
- np.add(new DoubleObjPair<DBID>(d, id1));
- np.add(new DoubleObjPair<DBID>(d, id2));
+ for (DBIDIter iter2 = randomset.iter(); iter2.valid(); iter2.advance()) {
+ double d = distFunc.distance(iter, iter2).doubleValue();
+ np.add(DBIDUtil.newPair(d, iter));
+ np.add(DBIDUtil.newPair(d, iter2));
}
maxhotset.addAll(np2);
shrinkHeap(maxhotset, k);
// update random set
- if(randomset.size() < randomsize) {
- randomset.add(id1);
- }
- else if(rnd.nextDouble() < rprob) {
- randomset.set((int) Math.floor(rnd.nextDouble() * randomsize), id1);
+ if (randomset.size() < randomsize) {
+ randomset.add(iter);
+ } else if (rnd.nextDouble() < rprob) {
+ randomset.set((int) Math.floor(rnd.nextDouble() * randomsize), iter);
}
}
- return new DoubleMinMax(minhotset.first().first, maxhotset.first().first);
+ return new DoubleMinMax(minhotset.first().doubleValue(), maxhotset.first().doubleValue());
}
- private DoubleMinMax exactMinMax(Relation<O> database, DistanceQuery<O, D> distFunc) {
+ /**
+ * Compute the exact maximum and minimum.
+ *
+ * @param relation Relation to process
+ * @param distFunc Distance function
+ * @return Exact maximum and minimum
+ */
+ private DoubleMinMax exactMinMax(Relation<O> relation, DistanceQuery<O, D> distFunc) {
DoubleMinMax minmax = new DoubleMinMax();
// find exact minimum and maximum first.
- for(DBIDIter iditer = database.iterDBIDs(); iditer.valid(); iditer.advance()) {
- DBID id1 = iditer.getDBID();
- for(DBIDIter iditer2 = database.iterDBIDs(); iditer2.valid(); iditer2.advance()) {
- DBID id2 = iditer2.getDBID();
+ for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for (DBIDIter iditer2 = relation.iterDBIDs(); iditer2.valid(); iditer2.advance()) {
// skip the point itself.
- if(id1.compareTo(id2) == 0) {
+ if (DBIDUtil.equal(iditer, iditer2)) {
continue;
}
- double d = distFunc.distance(id1, id2).doubleValue();
+ double d = distFunc.distance(iditer, iditer2).doubleValue();
minmax.put(d);
}
}
return minmax;
}
- private void shrinkHeap(TreeSet<DoubleObjPair<DBID>> hotset, int k) {
+ /**
+ * Shrink the heap of "hot" (extreme) items.
+ *
+ * @param hotset Set of hot items
+ * @param k target size
+ */
+ private static void shrinkHeap(TreeSet<DoubleDBIDPair> hotset, int k) {
// drop duplicates
ModifiableDBIDs seenids = DBIDUtil.newHashSet(2 * k);
int cnt = 0;
- for(Iterator<DoubleObjPair<DBID>> i = hotset.iterator(); i.hasNext();) {
- DoubleObjPair<DBID> p = i.next();
- if(cnt > k || seenids.contains(p.getSecond())) {
+ for (Iterator<DoubleDBIDPair> i = hotset.iterator(); i.hasNext();) {
+ DoubleDBIDPair p = i.next();
+ if (cnt > k || seenids.contains(p)) {
i.remove();
- }
- else {
- seenids.add(p.getSecond());
+ } else {
+ seenids.add(p);
cnt++;
}
}
@@ -395,7 +434,7 @@ public class DistanceStatisticsWithClasses<O, D extends NumberDistance<D, ?>> ex
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -412,36 +451,37 @@ public class DistanceStatisticsWithClasses<O, D extends NumberDistance<D, ?>> ex
private int numbin = 20;
/**
- * Sampling
+ * Sampling.
*/
private boolean sampling = false;
/**
- * Sampling
+ * Exactness flag.
*/
private boolean exact = false;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final IntParameter numbinP = new IntParameter(HISTOGRAM_BINS_ID, new GreaterEqualConstraint(2), 20);
- if(config.grab(numbinP)) {
+ final IntParameter numbinP = new IntParameter(HISTOGRAM_BINS_ID, 20);
+ numbinP.addConstraint(new GreaterEqualConstraint(2));
+ if (config.grab(numbinP)) {
numbin = numbinP.getValue();
}
- final Flag EXACT_FLAG = new Flag(EXACT_ID);
- if(config.grab(EXACT_FLAG)) {
- exact = EXACT_FLAG.getValue();
+ final Flag exactF = new Flag(EXACT_ID);
+ if (config.grab(exactF)) {
+ exact = exactF.getValue();
}
- final Flag SAMPLING_FLAG = new Flag(SAMPLING_ID);
- if(config.grab(SAMPLING_FLAG)) {
- sampling = SAMPLING_FLAG.getValue();
+ final Flag samplingF = new Flag(SAMPLING_ID);
+ if (config.grab(samplingF)) {
+ sampling = samplingF.getValue();
}
- ArrayList<Parameter<?, ?>> exclusive = new ArrayList<Parameter<?, ?>>();
- exclusive.add(EXACT_FLAG);
- exclusive.add(SAMPLING_FLAG);
+ ArrayList<Parameter<?>> exclusive = new ArrayList<Parameter<?>>();
+ exclusive.add(exactF);
+ exclusive.add(samplingF);
config.checkConstraint(new OnlyOneIsAllowedToBeSetGlobalConstraint(exclusive));
}
@@ -450,4 +490,4 @@ public class DistanceStatisticsWithClasses<O, D extends NumberDistance<D, ?>> ex
return new DistanceStatisticsWithClasses<O, D>(distanceFunction, numbin, exact, sampling);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/statistics/EvaluateRankingQuality.java b/src/de/lmu/ifi/dbs/elki/algorithm/statistics/EvaluateRankingQuality.java
index 353c1b02..1643d378 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/statistics/EvaluateRankingQuality.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/statistics/EvaluateRankingQuality.java
@@ -38,33 +38,31 @@ import de.lmu.ifi.dbs.elki.data.type.CombinedTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.evaluation.roc.ROC;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.math.MathUtil;
-import de.lmu.ifi.dbs.elki.math.MeanVariance;
-import de.lmu.ifi.dbs.elki.math.histograms.AggregatingHistogram;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
import de.lmu.ifi.dbs.elki.result.CollectionResult;
import de.lmu.ifi.dbs.elki.result.HistogramResult;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.histogram.MeanVarianceStaticHistogram;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
-import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair;
-import de.lmu.ifi.dbs.elki.utilities.pairs.FCPair;
/**
* Evaluate a distance function with respect to kNN queries. For each point, the
@@ -88,22 +86,22 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.FCPair;
*/
@Title("Evaluate Ranking Quality")
@Description("Evaluates the effectiveness of a distance function via the obtained rankings.")
-public class EvaluateRankingQuality<V extends NumberVector<V, ?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<V, D, CollectionResult<DoubleVector>> {
+public class EvaluateRankingQuality<V extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<V, D, CollectionResult<DoubleVector>> {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(EvaluateRankingQuality.class);
+ private static final Logging LOG = Logging.getLogger(EvaluateRankingQuality.class);
/**
* Option to configure the number of bins to use.
*/
- public static final OptionID HISTOGRAM_BINS_ID = OptionID.getOrCreateOptionID("rankqual.bins", "Number of bins to use in the histogram");
+ public static final OptionID HISTOGRAM_BINS_ID = new OptionID("rankqual.bins", "Number of bins to use in the histogram");
/**
* Constructor.
*
- * @param distanceFunction
- * @param numbins
+ * @param distanceFunction Distance function
+ * @param numbins Number of bins
*/
public EvaluateRankingQuality(DistanceFunction<? super V, D> distanceFunction, int numbins) {
super(distanceFunction);
@@ -121,61 +119,60 @@ public class EvaluateRankingQuality<V extends NumberVector<V, ?>, D extends Numb
final DistanceQuery<V, D> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
final KNNQuery<V, D> knnQuery = database.getKNNQuery(distQuery, relation.size());
- if(logger.isVerbose()) {
- logger.verbose("Preprocessing clusters...");
+ if (LOG.isVerbose()) {
+ LOG.verbose("Preprocessing clusters...");
}
// Cluster by labels
Collection<Cluster<Model>> split = (new ByLabelOrAllInOneClustering()).run(database).getAllClusters();
// Compute cluster averages and covariance matrix
- HashMap<Cluster<?>, V> averages = new HashMap<Cluster<?>, V>(split.size());
+ HashMap<Cluster<?>, Vector> averages = new HashMap<Cluster<?>, Vector>(split.size());
HashMap<Cluster<?>, Matrix> covmats = new HashMap<Cluster<?>, Matrix>(split.size());
- for(Cluster<?> clus : split) {
- averages.put(clus, DatabaseUtil.centroid(relation, clus.getIDs()));
- covmats.put(clus, DatabaseUtil.covarianceMatrix(relation, clus.getIDs()));
+ for (Cluster<?> clus : split) {
+ CovarianceMatrix covmat = CovarianceMatrix.make(relation, clus.getIDs());
+ averages.put(clus, covmat.getMeanVector());
+ covmats.put(clus, covmat.destroyToNaiveMatrix());
}
- AggregatingHistogram<MeanVariance, Double> hist = AggregatingHistogram.MeanVarianceHistogram(numbins, 0.0, 1.0);
+ MeanVarianceStaticHistogram hist = new MeanVarianceStaticHistogram(numbins, 0.0, 1.0);
- if(logger.isVerbose()) {
- logger.verbose("Processing points...");
+ if (LOG.isVerbose()) {
+ LOG.verbose("Processing points...");
}
- FiniteProgress rocloop = logger.isVerbose() ? new FiniteProgress("Computing ROC AUC values", relation.size(), logger) : null;
+ FiniteProgress rocloop = LOG.isVerbose() ? new FiniteProgress("Computing ROC AUC values", relation.size(), LOG) : null;
// sort neighbors
- for(Cluster<?> clus : split) {
- ArrayList<FCPair<Double, DBID>> cmem = new ArrayList<FCPair<Double, DBID>>(clus.size());
- Vector av = averages.get(clus).getColumnVector();
+ for (Cluster<?> clus : split) {
+ ArrayList<DoubleDBIDPair> cmem = new ArrayList<DoubleDBIDPair>(clus.size());
+ Vector av = averages.get(clus);
Matrix covm = covmats.get(clus);
- for(DBIDIter iter = clus.getIDs().iter(); iter.valid(); iter.advance()) {
- DBID i1 = iter.getDBID();
- Double d = MathUtil.mahalanobisDistance(covm, av.minus(relation.get(i1).getColumnVector()));
- cmem.add(new FCPair<Double, DBID>(d, i1));
+ for (DBIDIter iter = clus.getIDs().iter(); iter.valid(); iter.advance()) {
+ double d = MathUtil.mahalanobisDistance(covm, relation.get(iter).getColumnVector().minusEquals(av));
+ cmem.add(DBIDUtil.newPair(d, iter));
}
Collections.sort(cmem);
- for(int ind = 0; ind < cmem.size(); ind++) {
- DBID i1 = cmem.get(ind).getSecond();
- KNNResult<D> knn = knnQuery.getKNNForDBID(i1, relation.size());
+ for (int ind = 0; ind < cmem.size(); ind++) {
+ KNNResult<D> knn = knnQuery.getKNNForDBID(cmem.get(ind), relation.size());
double result = ROC.computeROCAUCDistanceResult(relation.size(), clus, knn);
- hist.aggregate(((double) ind) / clus.size(), result);
+ hist.put(((double) ind) / clus.size(), result);
- if(rocloop != null) {
- rocloop.incrementProcessed(logger);
+ if (rocloop != null) {
+ rocloop.incrementProcessed(LOG);
}
}
}
- if(rocloop != null) {
- rocloop.ensureCompleted(logger);
+ if (rocloop != null) {
+ rocloop.ensureCompleted(LOG);
}
// Collections.sort(results);
// Transform Histogram into a Double Vector array.
Collection<DoubleVector> res = new ArrayList<DoubleVector>(relation.size());
- for(DoubleObjPair<MeanVariance> pair : hist) {
- DoubleVector row = new DoubleVector(new double[] { pair.first, pair.getSecond().getCount(), pair.getSecond().getMean(), pair.getSecond().getSampleVariance() });
+ for (MeanVarianceStaticHistogram.Iter iter = hist.iter(); iter.valid(); iter.advance()) {
+ DoubleVector row = new DoubleVector(new double[] { iter.getCenter(), iter.getValue().getCount(), iter.getValue().getMean(), iter.getValue().getSampleVariance() });
res.add(row);
}
return new HistogramResult<DoubleVector>("Ranking Quality Histogram", "ranking-histogram", res);
@@ -188,7 +185,7 @@ public class EvaluateRankingQuality<V extends NumberVector<V, ?>, D extends Numb
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -198,14 +195,18 @@ public class EvaluateRankingQuality<V extends NumberVector<V, ?>, D extends Numb
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<V, ?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<V, D> {
+ public static class Parameterizer<V extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<V, D> {
+ /**
+ * Number of bins to use.
+ */
protected int numbins = 20;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final IntParameter param = new IntParameter(HISTOGRAM_BINS_ID, new GreaterEqualConstraint(2), 20);
- if(config.grab(param)) {
+ final IntParameter param = new IntParameter(HISTOGRAM_BINS_ID, 20);
+ param.addConstraint(new GreaterEqualConstraint(2));
+ if (config.grab(param)) {
numbins = param.getValue();
}
}
@@ -215,4 +216,4 @@ public class EvaluateRankingQuality<V extends NumberVector<V, ?>, D extends Numb
return new EvaluateRankingQuality<V, D>(distanceFunction, numbins);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/statistics/RankingQualityHistogram.java b/src/de/lmu/ifi/dbs/elki/algorithm/statistics/RankingQualityHistogram.java
index 4305bbca..6d8167a5 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/statistics/RankingQualityHistogram.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/statistics/RankingQualityHistogram.java
@@ -34,28 +34,26 @@ import de.lmu.ifi.dbs.elki.data.model.Model;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.evaluation.roc.ROC;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.math.MeanVariance;
-import de.lmu.ifi.dbs.elki.math.histograms.AggregatingHistogram;
import de.lmu.ifi.dbs.elki.result.CollectionResult;
import de.lmu.ifi.dbs.elki.result.HistogramResult;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.histogram.DoubleStaticHistogram;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
-import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair;
/**
* Evaluate a distance function with respect to kNN queries. For each point, the
@@ -77,12 +75,12 @@ public class RankingQualityHistogram<O, D extends NumberDistance<D, ?>> extends
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(RankingQualityHistogram.class);
+ private static final Logging LOG = Logging.getLogger(RankingQualityHistogram.class);
/**
* Option to configure the number of bins to use.
*/
- public static final OptionID HISTOGRAM_BINS_ID = OptionID.getOrCreateOptionID("rankqual.bins", "Number of bins to use in the histogram");
+ public static final OptionID HISTOGRAM_BINS_ID = new OptionID("rankqual.bins", "Number of bins to use in the histogram");
/**
* Number of bins to use.
@@ -100,47 +98,53 @@ public class RankingQualityHistogram<O, D extends NumberDistance<D, ?>> extends
this.numbins = numbins;
}
+ /**
+ * Process a database
+ *
+ * @param database Database to process
+ * @param relation Relation to process
+ * @return Histogram of ranking qualities
+ */
public HistogramResult<DoubleVector> run(Database database, Relation<O> relation) {
final DistanceQuery<O, D> distanceQuery = database.getDistanceQuery(relation, getDistanceFunction());
final KNNQuery<O, D> knnQuery = database.getKNNQuery(distanceQuery, relation.size());
- if(logger.isVerbose()) {
- logger.verbose("Preprocessing clusters...");
+ if (LOG.isVerbose()) {
+ LOG.verbose("Preprocessing clusters...");
}
// Cluster by labels
Collection<Cluster<Model>> split = (new ByLabelOrAllInOneClustering()).run(database).getAllClusters();
- AggregatingHistogram<Double, Double> hist = AggregatingHistogram.DoubleSumHistogram(numbins, 0.0, 1.0);
+ DoubleStaticHistogram hist = new DoubleStaticHistogram(numbins, 0.0, 1.0);
- if(logger.isVerbose()) {
- logger.verbose("Processing points...");
+ if (LOG.isVerbose()) {
+ LOG.verbose("Processing points...");
}
- FiniteProgress progress = logger.isVerbose() ? new FiniteProgress("Computing ROC AUC values", relation.size(), logger) : null;
+ FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Computing ROC AUC values", relation.size(), LOG) : null;
MeanVariance mv = new MeanVariance();
// sort neighbors
- for(Cluster<?> clus : split) {
- for(DBIDIter iter = clus.getIDs().iter(); iter.valid(); iter.advance()) {
- DBID i1 = iter.getDBID();
- KNNResult<D> knn = knnQuery.getKNNForDBID(i1, relation.size());
+ for (Cluster<?> clus : split) {
+ for (DBIDIter iter = clus.getIDs().iter(); iter.valid(); iter.advance()) {
+ KNNResult<D> knn = knnQuery.getKNNForDBID(iter, relation.size());
double result = ROC.computeROCAUCDistanceResult(relation.size(), clus, knn);
mv.put(result);
- hist.aggregate(result, 1. / relation.size());
+ hist.increment(result, 1. / relation.size());
- if(progress != null) {
- progress.incrementProcessed(logger);
+ if (progress != null) {
+ progress.incrementProcessed(LOG);
}
}
}
- if(progress != null) {
- progress.ensureCompleted(logger);
+ if (progress != null) {
+ progress.ensureCompleted(LOG);
}
// Transform Histogram into a Double Vector array.
Collection<DoubleVector> res = new ArrayList<DoubleVector>(relation.size());
- for(DoubleObjPair<Double> pair : hist) {
- DoubleVector row = new DoubleVector(new double[] { pair.first, pair.getSecond() });
+ for (DoubleStaticHistogram.Iter iter = hist.iter(); iter.valid(); iter.advance()) {
+ DoubleVector row = new DoubleVector(new double[] { iter.getCenter(), iter.getValue() });
res.add(row);
}
HistogramResult<DoubleVector> result = new HistogramResult<DoubleVector>("Ranking Quality Histogram", "ranking-histogram", res);
@@ -152,10 +156,10 @@ public class RankingQualityHistogram<O, D extends NumberDistance<D, ?>> extends
public TypeInformation[] getInputTypeRestriction() {
return TypeUtil.array(getDistanceFunction().getInputTypeRestriction());
}
-
+
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -166,13 +170,17 @@ public class RankingQualityHistogram<O, D extends NumberDistance<D, ?>> extends
* @apiviz.exclude
*/
public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ /**
+ * Number of bins.
+ */
protected int numbins = 20;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final IntParameter param = new IntParameter(HISTOGRAM_BINS_ID, new GreaterEqualConstraint(2), 100);
- if(config.grab(param)) {
+ final IntParameter param = new IntParameter(HISTOGRAM_BINS_ID, 100);
+ param.addConstraint(new GreaterEqualConstraint(2));
+ if (config.grab(param)) {
numbins = param.getValue();
}
}
@@ -182,4 +190,4 @@ public class RankingQualityHistogram<O, D extends NumberDistance<D, ?>> extends
return new RankingQualityHistogram<O, D>(distanceFunction, numbins);
}
}
-} \ No newline at end of file
+}